diff --git a/cmd/massdl/massdl.go b/cmd/massdl/massdl.go index cb1c961..3c90be9 100644 --- a/cmd/massdl/massdl.go +++ b/cmd/massdl/massdl.go @@ -36,6 +36,7 @@ func main() { c.Init() c.Logger.Info().Str("версия", version).Str("коммит", commit).Str("дата сборки", date). Msg("Загрузчик торрентов из разделов NNM-Club начинает свою работу") + c.Logger.Info().Msg("Copyright (c) 2012-2022 Vladimir «fat0troll» Hodakov") c.InitConfig() loginer.New(c) diff --git a/internal/services/fetcher/fetcher.go b/internal/services/fetcher/fetcher.go index 7c27685..929cf15 100644 --- a/internal/services/fetcher/fetcher.go +++ b/internal/services/fetcher/fetcher.go @@ -5,6 +5,7 @@ package fetcher import ( + "fmt" "os" "path/filepath" "strconv" @@ -89,7 +90,7 @@ func download(topic *forumTopic) { } func fetch(forumID int) { - startPage := "https://" + c.Config.URL + "/forum/viewforum.php?f=" + strconv.Itoa(forumID) + startPage := fmt.Sprintf("https://%s/forum/viewforum.php?f=%d", c.Config.URL, forumID) time.Sleep(5 * time.Second) @@ -109,9 +110,7 @@ func fetch(forumID int) { dlog.Info().Str("название форума", getForumName(forumPages[1])).Msg("Определён форум для загрузки") - getNavigation(forumPages[1]) - - downloadAdditionalPages() + downloadAdditionalPages(forumID) for _, forumPage := range forumPages { getTopics(forumPage) diff --git a/internal/services/fetcher/parser.go b/internal/services/fetcher/parser.go index 1bc4710..51fe4ba 100644 --- a/internal/services/fetcher/parser.go +++ b/internal/services/fetcher/parser.go @@ -91,17 +91,6 @@ func getLastModeratedDate(querier *goquery.Document) string { return dateValue.Format("2006-01-02") } -func getNavigation(querier *goquery.Document) { - querier.Find("td[align=right] .nav a").Each(func(i int, sel *goquery.Selection) { - if !strings.Contains(sel.Text(), "След") { - href, _ := sel.Attr("href") - forumPagesLinks[sel.Text()] = href - } - }) - - dlog.Info().Int("количество страниц", len(forumPagesLinks)+1).Msg("Определено количество страниц") -} - func getTopics(querier *goquery.Document) { querier.Find(".forumline tr").Each(func(i int, sel *goquery.Selection) { if strings.Contains(sel.Text(), "DL:") { @@ -119,3 +108,15 @@ func getTopics(querier *goquery.Document) { } }) } + + +func isForumPageEmpty(querier *goquery.Document) bool { + isEmpty := false + querier.Find(".forumline tr").Each(func(i int, sel *goquery.Selection) { + if strings.Contains(sel.Text(), "В этом форуме пока нет сообщений") { + isEmpty = true + } + }) + + return isEmpty +} diff --git a/internal/services/fetcher/querier.go b/internal/services/fetcher/querier.go index 7673999..3405826 100644 --- a/internal/services/fetcher/querier.go +++ b/internal/services/fetcher/querier.go @@ -38,6 +38,10 @@ func setQuerier(pageFile string, pageType string, page int) error { return errors.New("получена анонимная страница") } + if isForumPageEmpty(querier) { + return errors.New("получена пустая страница") + } + forumPages[page] = querier default: diff --git a/internal/services/fetcher/request.go b/internal/services/fetcher/request.go index f59b638..5e634c9 100644 --- a/internal/services/fetcher/request.go +++ b/internal/services/fetcher/request.go @@ -5,27 +5,49 @@ package fetcher import ( + "fmt" "io" "io/ioutil" "net/http" - "strconv" "time" "golang.org/x/text/encoding/charmap" "golang.org/x/text/transform" ) -func downloadAdditionalPages() { - for i := range forumPagesLinks { - forumPage, _ := strconv.Atoi(i) +const ( + forumPageOffset = 50 +) + +func downloadAdditionalPages(forumID int) { + forumPage := 2 + + for { dlog.Info().Int("номер страницы", forumPage).Msg("Скачивается ещё одна страница форума") - pageFile, err := downloadFile("https://"+c.Config.URL+"/forum/"+forumPagesLinks[i], true) + pageURL := fmt.Sprintf( + "https://%s/forum/viewforum.php?f=%d&start=%d#pagestart", + c.Config.URL, forumID, (forumPage-1)*forumPageOffset, + ) + + if uberDebug { + dlog.Debug().Str("URL страницы", pageURL).Msg("Сформирован URL для скачивания") + } + + pageFile, err := downloadFile(pageURL, true) if err != nil { dlog.Fatal().Err(err).Msg("Не удалось загрузить страницу форума") } - _ = setQuerier(pageFile, "forumPage", forumPage) + err = setQuerier(pageFile, "forumPage", forumPage) + if err != nil { + dlog.Info().Str("причина завершения", err.Error()). + Msg("Завершено скачивание страниц форума") + // Found empty page, leaving cycle + break + } + + forumPage++ } }