From a5042991509162a33208ec74be22a6f735364e6c Mon Sep 17 00:00:00 2001 From: Vladimir Hodakov Date: Sat, 28 Sep 2019 19:59:02 +0400 Subject: [PATCH] Add fetcher, support for downloading forum pages --- cmd/massdl.go | 3 ++ domains/fetcher/v1/exported.go | 62 ++++++++++++++++++++++++++++++++++ domains/fetcher/v1/fetcher.go | 33 ++++++++++++++++++ domains/fetcher/v1/parser.go | 39 +++++++++++++++++++++ domains/fetcher/v1/querier.go | 37 ++++++++++++++++++++ domains/fetcher/v1/request.go | 58 +++++++++++++++++++++++++++++++ domains/loginer/v1/exported.go | 9 ++--- go.mod | 2 ++ go.sum | 9 +++++ internal/context/context.go | 20 +++++------ internal/context/exported.go | 3 +- 11 files changed, 260 insertions(+), 15 deletions(-) create mode 100644 domains/fetcher/v1/exported.go create mode 100644 domains/fetcher/v1/fetcher.go create mode 100644 domains/fetcher/v1/parser.go create mode 100644 domains/fetcher/v1/querier.go create mode 100644 domains/fetcher/v1/request.go diff --git a/cmd/massdl.go b/cmd/massdl.go index ffd89f0..90d62dc 100644 --- a/cmd/massdl.go +++ b/cmd/massdl.go @@ -5,6 +5,7 @@ package main import ( + fetcherv1 "gitlab.com/fat0troll/uploader_tools/domains/fetcher/v1" loginerv1 "gitlab.com/fat0troll/uploader_tools/domains/loginer/v1" "gitlab.com/fat0troll/uploader_tools/internal/context" ) @@ -16,8 +17,10 @@ func main() { c.InitConfig() loginerv1.New(c) + fetcherv1.New(c) c.Flagger.Parse() loginerv1.Process() + fetcherv1.Process() } diff --git a/domains/fetcher/v1/exported.go b/domains/fetcher/v1/exported.go new file mode 100644 index 0000000..c36bb7c --- /dev/null +++ b/domains/fetcher/v1/exported.go @@ -0,0 +1,62 @@ +// NNM-Club torrent filess mass downloader +// Created for Uploaders group +// Copyright (c) 2012-2019 Vladimir "fat0troll" Hodakov + +package fetcherv1 + +import ( + "net/http" + + "github.com/PuerkitoBio/goquery" + "github.com/rs/zerolog" + "gitlab.com/pztrn/flagger" + + "gitlab.com/fat0troll/uploader_tools/internal/context" +) + +var ( + c *context.Context + dclient http.Client + dlog zerolog.Logger + forumPages map[int]*goquery.Document + forumPagesLinks map[string]string + uberDebug bool +) + +// New initializes package +func New(cc *context.Context) { + c = cc + dlog = c.Logger.With().Str("модуль", "fetcher").Int("версия", 1).Logger() + + _ = c.Flagger.AddFlag(&flagger.Flag{ + Name: "forum", + Description: "Номер форума, торренты с которого нужно скачать", + Type: "int", + DefaultValue: 0, + }) + + _ = c.Flagger.AddFlag(&flagger.Flag{ + Name: "fetcherDebug", + Description: "Запустить модуль fetcher в дебаг-режиме", + Type: "bool", + DefaultValue: false, + }) + + forumPages = make(map[int]*goquery.Document) + forumPagesLinks = make(map[string]string) + + dlog.Info().Msg("Модуль инициализирован") +} + +// Process handles authorization +func Process() { + uberDebug, _ = c.Flagger.GetBoolValue("fetcherDebug") + forumID, _ := c.Flagger.GetIntValue("forum") + if forumID == 0 { + dlog.Fatal().Msg("Номер форума не указан. Используйте ключ -forum XXX, чтобы указать номер форума") + } + + dlog.Info().Int("forum ID", forumID).Msg("Получен ID форума, начинаем работу...") + + fetch(forumID) +} diff --git a/domains/fetcher/v1/fetcher.go b/domains/fetcher/v1/fetcher.go new file mode 100644 index 0000000..44b2ef0 --- /dev/null +++ b/domains/fetcher/v1/fetcher.go @@ -0,0 +1,33 @@ +// NNM-Club torrent filess mass downloader +// Created for Uploaders group +// Copyright (c) 2012-2019 Vladimir "fat0troll" Hodakov + +package fetcherv1 + +import ( + "strconv" +) + +func fetch(forumID int) { + startPage := "https://" + c.Config.URL + "/forum/viewforum.php?f=" + strconv.Itoa(forumID) + + startPageFile, err := dumpForumPage(startPage) + if err != nil { + dlog.Error().Err(err).Msg("Не удалось получить данные с форума") + } + + if uberDebug { + dlog.Info().Str("имя файла", startPageFile).Msg("Получена стартовая страница форума") + } + + err = setQuerier(startPageFile, "forumPage", 1) + if err != nil { + dlog.Fatal().Err(err).Msg("Не удалось создать обработчик для страницы") + } + + dlog.Info().Str("название форума", getForumName(forumPages[1])).Msg("Определён форум для загрузки") + + getNavigation(forumPages[1]) + + downloadAdditionalPages() +} diff --git a/domains/fetcher/v1/parser.go b/domains/fetcher/v1/parser.go new file mode 100644 index 0000000..3873f85 --- /dev/null +++ b/domains/fetcher/v1/parser.go @@ -0,0 +1,39 @@ +// NNM-Club torrent filess mass downloader +// Created for Uploaders group +// Copyright (c) 2012-2019 Vladimir "fat0troll" Hodakov + +package fetcherv1 + +import ( + "strings" + + "github.com/PuerkitoBio/goquery" +) + +// checkLoginness checks if downloaded page belongs to user +func checkLoginness(querier *goquery.Document) bool { + authorized := false + querier.Find(".mainmenu").Each(func(i int, sel *goquery.Selection) { + if strings.Contains(sel.Text(), "Выход") { + if strings.Contains(sel.Text(), c.Config.Username) { + authorized = true + } + } + }) + return authorized +} + +func getForumName(querier *goquery.Document) string { + return querier.Find("h1").First().Text() +} + +func getNavigation(querier *goquery.Document) { + querier.Find("td[align=right] .nav a").Each(func(i int, sel *goquery.Selection) { + if !strings.Contains(sel.Text(), "След") { + href, _ := sel.Attr("href") + forumPagesLinks[sel.Text()] = href + } + }) + + dlog.Info().Int("количество страниц", len(forumPagesLinks)+1).Msg("Определено количество страниц") +} diff --git a/domains/fetcher/v1/querier.go b/domains/fetcher/v1/querier.go new file mode 100644 index 0000000..231a573 --- /dev/null +++ b/domains/fetcher/v1/querier.go @@ -0,0 +1,37 @@ +// NNM-Club torrent filess mass downloader +// Created for Uploaders group +// Copyright (c) 2012-2019 Vladimir "fat0troll" Hodakov + +package fetcherv1 + +import ( + "errors" + "os" + + "github.com/PuerkitoBio/goquery" +) + +func setQuerier(pageFile string, pageType string, page int) error { + f, err := os.Open(pageFile) + if err != nil { + return err + } + defer f.Close() + + querier, err := goquery.NewDocumentFromReader(f) + if err != nil { + return err + } + + switch pageType { + case "forumPage": + if !checkLoginness(querier) { + return errors.New("получена анонимная страница") + } + forumPages[page] = querier + default: + return errors.New("неизвестный тип страницы") + } + + return nil +} diff --git a/domains/fetcher/v1/request.go b/domains/fetcher/v1/request.go new file mode 100644 index 0000000..bc5e04f --- /dev/null +++ b/domains/fetcher/v1/request.go @@ -0,0 +1,58 @@ +// NNM-Club torrent filess mass downloader +// Created for Uploaders group +// Copyright (c) 2012-2019 Vladimir "fat0troll" Hodakov + +package fetcherv1 + +import ( + "io" + "io/ioutil" + "net/http" + "strconv" + + "golang.org/x/text/encoding/charmap" + "golang.org/x/text/transform" +) + +func downloadAdditionalPages() { + for i := range forumPagesLinks { + forumPage, _ := strconv.Atoi(i) + dlog.Info().Int("номер страницы", forumPage).Msg("Скачивается ещё одна страница форума") + pageFile, err := dumpForumPage("https://" + c.Config.URL + "/forum/" + forumPagesLinks[i]) + if err != nil { + dlog.Fatal().Err(err).Msg("Не удалось загрузить страницу форума") + } + + _ = setQuerier(pageFile, "forumPage", forumPage) + } +} + +func dumpForumPage(url string) (string, error) { + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return "", err + } + for i := range c.Cookies { + req.AddCookie(c.Cookies[i]) + } + + resp, err := dclient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + tempF, err := ioutil.TempFile("", "massdl-*") + if err != nil { + return "", err + } + defer tempF.Close() + + respInUTF8 := transform.NewReader(resp.Body, charmap.Windows1251.NewDecoder()) + _, err = io.Copy(tempF, respInUTF8) + if err != nil { + return "", err + } + + return tempF.Name(), nil +} diff --git a/domains/loginer/v1/exported.go b/domains/loginer/v1/exported.go index c12cd5d..48e7b5b 100644 --- a/domains/loginer/v1/exported.go +++ b/domains/loginer/v1/exported.go @@ -6,8 +6,9 @@ package loginerv1 import ( "github.com/rs/zerolog" - "gitlab.com/fat0troll/uploader_tools/internal/context" "gitlab.com/pztrn/flagger" + + "gitlab.com/fat0troll/uploader_tools/internal/context" ) var ( @@ -18,16 +19,16 @@ var ( // New initializes package func New(cc *context.Context) { c = cc - dlog = c.Logger.With().Str("domain", "loginer").Int("version", 1).Logger() + dlog = c.Logger.With().Str("модуль", "loginer").Int("версия", 1).Logger() _ = c.Flagger.AddFlag(&flagger.Flag{ Name: "auth", - Description: "Start authentication to NNM-Club website and create application config.", + Description: "Запустить аутентификацию на сайте NNM-Club с начала.", Type: "bool", DefaultValue: false, }) - dlog.Info().Msg("Domain initialized") + dlog.Info().Msg("Модуль инициализирован") } // Process handles authorization diff --git a/go.mod b/go.mod index 28b973f..d299521 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,10 @@ module gitlab.com/fat0troll/uploader_tools go 1.13 require ( + github.com/PuerkitoBio/goquery v1.5.0 github.com/kirsle/configdir v0.0.0-20170128060238-e45d2f54772f github.com/rs/zerolog v1.15.0 github.com/stretchr/testify v1.4.0 // indirect gitlab.com/pztrn/flagger v0.0.0-20190122123836-d429d7149cc9 + golang.org/x/text v0.3.0 ) diff --git a/go.sum b/go.sum index 43d9ce2..3d0d094 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,7 @@ +github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= +github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= +github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= +github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -16,9 +20,14 @@ github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxt gitlab.com/pztrn/flagger v0.0.0-20190122123836-d429d7149cc9 h1:qLlIZybSEr3MSCaE+cJZpT6O/1q3IsBPV7GcZJc05vs= gitlab.com/pztrn/flagger v0.0.0-20190122123836-d429d7149cc9/go.mod h1:2cxOO1JFwNeKmT/icFNkUOPzQnG+qeMXURKeVFsuQA8= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= diff --git a/internal/context/context.go b/internal/context/context.go index 7abd666..3f5e101 100644 --- a/internal/context/context.go +++ b/internal/context/context.go @@ -30,14 +30,14 @@ func (c *Context) getMemoryUsage(e *zerolog.Event, level zerolog.Level, message // initFlagger initializes flags parser func (c *Context) initFlagger() { - c.Flagger = flagger.New("NNM-Club mass torrent files downloader", flagger.LoggerInterface(log.New(os.Stdout, "", log.Lshortfile))) + c.Flagger = flagger.New("NNM-Club Uploader Tools", flagger.LoggerInterface(log.New(os.Stdout, "", log.Lshortfile))) c.Flagger.Initialize() } func (c *Context) readConfig() { fh, err := os.Open(c.configFilePath) if err != nil { - dlog.Fatal().Err(err).Msg("Failed to read config file") + dlog.Fatal().Err(err).Msg("Ошибка чтения конфигурации") } defer fh.Close() @@ -45,7 +45,7 @@ func (c *Context) readConfig() { decoder := json.NewDecoder(fh) err = decoder.Decode(&c.Config) if err != nil { - dlog.Fatal().Err(err).Msg("Failed to decode config") + dlog.Fatal().Err(err).Msg("Ошибка декодирования конфигурации") } } @@ -57,28 +57,28 @@ func (c *Context) Init() { c.Logger = zerolog.New(zerolog.ConsoleWriter{Out: os.Stdout}).With().Timestamp().Logger() c.Logger = c.Logger.Hook(zerolog.HookFunc(c.getMemoryUsage)) - dlog = c.Logger.With().Str("domain", "context").Logger() + dlog = c.Logger.With().Str("модуль", "context").Logger() } func (c *Context) InitConfig() { configPath := configdir.LocalConfig("uploader_tools") err := configdir.MakePath(configPath) if err != nil { - dlog.Fatal().Err(err).Str("config directory", configPath).Msg("Failed to obtain config path") + dlog.Fatal().Err(err).Str("директория с конфигурационным файлом", configPath).Msg("Не получилось создать или обнаружить директорию для конфигурационных файлов") } - dlog.Debug().Str("config directory", configPath).Msg("Got config directory") + dlog.Debug().Str("директория с конфигурационным файлом", configPath).Msg("Найдена директория с конфигурационным файлом") configFile := filepath.Join(configPath, "settings.json") c.configFilePath = configFile if _, err = os.Stat(configFile); os.IsNotExist(err) { // Generating new config on first run - dlog.Debug().Msg("Generating new config") + dlog.Debug().Msg("Генерируется новый конфигурационный файл") c.SaveConfig() } else { - dlog.Debug().Msg("Using existing config") + dlog.Debug().Msg("Используется существующий конфигурационный файл") c.readConfig() } @@ -87,14 +87,14 @@ func (c *Context) InitConfig() { func (c *Context) SaveConfig() { fh, err := os.Create(c.configFilePath) if err != nil { - dlog.Fatal().Err(err).Msg("Failed to create config file") + dlog.Fatal().Err(err).Msg("Не удалось создать файл конфигурации") } defer fh.Close() encoder := json.NewEncoder(fh) err = encoder.Encode(&c.Config) if err != nil { - dlog.Fatal().Err(err).Msg("Failed to encode config") + dlog.Fatal().Err(err).Msg("Не удалось записать файл конфигурации") } } diff --git a/internal/context/exported.go b/internal/context/exported.go index aa46c6a..bdbe88e 100644 --- a/internal/context/exported.go +++ b/internal/context/exported.go @@ -8,8 +8,9 @@ import ( "net/http" "github.com/rs/zerolog" - "gitlab.com/fat0troll/uploader_tools/internal/config" "gitlab.com/pztrn/flagger" + + "gitlab.com/fat0troll/uploader_tools/internal/config" ) var (