1

Start rewriting massdl script on Golang

Due to Python 2 EOL I need to pick new language for
uploader tools. Instead of Python 3 I choose Go,
because I'm working with this language almost three
years and a half.

Python 2 CLI app moved to legacy/python2 branch and
will be supported due 01 January 2020.
Old shell scripts are still in legacy/shell branch and
doesn't work ATM. I will not fix them, they're keeped in
repository for historical reasons only.
This commit is contained in:
Vladimir Hodakov 2019-09-14 18:46:01 +04:00
parent 151f9a24db
commit bce5cef340
Signed by: Vladimir Hodakov
GPG Key ID: 673980B6882F82C6
8 changed files with 17 additions and 313 deletions

5
.gitignore vendored
View File

@ -1,4 +1 @@
output/*
lib/config.py
*.pyc
*.log
.DS_Store

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module gitlab.com/fat0troll/uploader_tools
go 1.13

View File

View File

@ -1,9 +0,0 @@
# user editable values
username = 'username'
password = 'password'
workdir = '/absolute/path/to/dest/workdir'
# system values, edit at your own risk
domain = 'https://nnmclub.ro'

View File

@ -1,52 +0,0 @@
# *-* encoding: utf-8 *-*
"""
Note, all script messages must be formatted as:
[status] #status_number: message
where:
status: "I" = information, "W" - warning, "E" - error.
status_number - error number:
0xx -- script environment messages
1xx -- authentication messages
2xx -- grabbing and parsing messages
3xx -- torrents downloading messages
4xx -- torrents check messages
Last message in log must be ended with "Stopped" word.
Information code numbers starts from x00, success messages from x20, warnings from x40 and errors from x66. Console messages have same numbers starting from prefix c. Log messages start from prefix m
"""
c000 = "massdl.sh — скрипт для пакетной загрузки торрент-файлов из разделов NNM-club.ru.\nИспользование:\n\n\tpython massdl.py [номер_раздела]\n\nгде [номерорума] — номер раздела на NNM-club.ru (например, для форума '*Nix Игры' это 316).\nПримеры:\n\n\tpython massdl.py 316 — скачивание форума '*Nix Игры'\n\tpython massdl.py 332 — скачивание форума 'Русский рок'\n\nАвтор: Владимир «fat0troll» Ходаков.\nСкрипт использует Python версии 2.7. При запуске убедитесь, что используете правильный интерпретатор Python!\nКонфигурация скрипта производится в конфигурационном файле lib/config.py, имеющем опции с очевидными названиями.\nВерсия %s\n"
c010 = "Выполняется скрипт %s.\nВерсия скрипта %s."
c020 = "Параметры командной строки: %s"
c021 = "Скрипт завершен успешно."
c066 = "Ваша версия Python не поддерживается. Используйте Python 2.7!"
c067 = "У вас не установлен PycURL. Обратитесь к справке вашего дистрибутива для подробностей, как его установить."
c120 = "Вы залогинились успешно как пользователь %s."
c166 = "Не удалось залогиниться как пользователь %s. Проверьте ваши данные в файле lib/config.py!"
c199 = "Строка curl для логина: %s."
c200 = "Начинается загрузка форума..."
c210 = "Скачиваем страницу %s..."
c220 = "Скачиваем форум %s"
c221 = "Скачивание страниц завершено. Найдено топиков: %s."
c300 = "Начинаем загрузку торрентов..."
c310 = "Скачиваю топик %s/%s: %s (обновлён %s)."
c340 = "Топик %s/%s: %s не проверен модератором. Пропускается."
c350 = "Топик %s/%s: %s — не найдена ссылка для скачивания. Пропускается."
m000 = '================================================================================\n= Скрипт скачки раздела NNM-Club для Linux и Unix-like ОС. =\n= Версия: %s =\n================================================================================\n'
m020 = "[I] #020: окружение скрипта в порядке.\n"
m021 = "[I] #021: работа завершена успешно. Завершено.\n"
m025 = "[I] #100: пытаемся войти на %s...\n"
m066 = "[E] #066: версия Python не поддерживается. Завершено.\n"
m067 = "[E] #067: не установлен cURL. Завершено\n"
m120 = "[I] #120: выполнен успешный вход на сайт. Cookie лежит по адресу %s.\n"
m166 = "[E] #166: неправильное имя пользователя или пароль. Завершено.\n"
m210 = "[I] #200: загружаем страницу %s..."
m220 = "[I] #220: успешно загружена стартовая страница форума '%s'.\n"
m221 = "[I] #221: успешно загружены все страницы форума. Найдено топиков: %s.\n"
m310 = "[I] #310 загружаем торрент %s из %s по ссылке %s.\n"
m340 = "[W] #340 топик %s из %s [%s, %s] не проверен модераторами. Пропуск.\n"
m350 = "[W] #350 топик %s из %s [%s, %s] не содержит ссылки на скачивание. Пропуск.\n"

13
main.go Normal file
View File

@ -0,0 +1,13 @@
// NNM-Club torrent filess mass downloader
// Created for Uploaders group
// (c) Vladimir Hodakov, 2012-2019
package main
import (
"fmt"
)
func main() {
fmt.Println("TO BE FILLED")
}

248
massdl.py
View File

@ -1,248 +0,0 @@
#!/usr/bin/env python2.7
# *-* encoding: utf-8 *-*
# For NNM-Club Uploaders
# Copyright (c) 2012-2019 Vladimir Hodakov <vladimir@hodakov.me>
#
# Usage:
#
# python2 ./massdl.py [forum_number]
#
# For getting help, execute "python2 ./massdl.py" without params
import time, tempfile, os, sys, commands
import lib.config as uconfig
import lib.messages as umessages
import dateutil
from dateutil.parser import *
version = 0.92
unixtime = int(time.time())
cookie = tempfile.mkstemp()
curdir = uconfig.workdir
log_file = curdir + "/%i-massdl.log" % unixtime
domain = uconfig.domain
def write_to_log(imessage):
log = open(log_file, 'a')
log.write(imessage)
log.close()
def cnsl_message(imessage, *args):
if imessage == "invpyver":
print(umessages.c066)
write_to_log(umessages.m066)
sys.exit(666)
elif imessage == "nocurl":
print(umessages.c067)
write_to_log(umessages.m067)
sys.exit(666)
elif imessage == "help":
print(umessages.c000 % version)
sys.exit(0)
elif imessage == "start":
print(umessages.c010 % (sys.argv[0], version))
print(umessages.c020 % sys.argv[1])
write_to_log(umessages.m020)
elif imessage == "login_ok":
print(umessages.c120 % set_username()[0])
write_to_log(umessages.m120 % cookie[1])
elif imessage == "login_failed":
print(umessages.c166 % set_username()[0])
write_to_log(umessages.m166)
sys.exit(666)
elif imessage == "login_string":
print(umessages.c199 % args[0])
write_to_log(umessages.c199 % args[0])
elif imessage == "parsing_started":
print(umessages.c200)
elif imessage == "forum":
print(umessages.c220 % args[0])
write_to_log(umessages.m220 % args[0])
elif imessage == "pages":
print(umessages.c210 % args[0])
write_to_log(umessages.m210 % args[0])
elif imessage == "found_topics":
print(umessages.c221 % args[0])
write_to_log(umessages.m221 % args[0])
elif imessage == "download_started":
print(umessages.c300)
elif imessage == "downloading_torrent":
print(umessages.c310 % (args[0], args[1], args[2], args[4]))
write_to_log(umessages.m310 % (args[0], args[1], args[3]))
elif imessage == "skip_non_approved":
print(umessages.c340 % (args[0], args[1], args[2]))
write_to_log(umessages.m340 % (args[0], args[1], args[2], args[3]))
elif imessage == "skip_no_dl_link":
print(umessages.c350 % (args[0], args[1], args[2]))
write_to_log(umessages.m350 % (args[0], args[1], args[2], args[3]))
elif imessage == "done":
print(umessages.c021)
write_to_log(umessages.m021)
sys.exit(0)
def check_requirements():
req_py_version = (2,7)
unsupported_py_version = (3,0)
cur_version = sys.version_info
if cur_version >= req_py_version:
if cur_version < unsupported_py_version:
pass;
else:
cnsl_message("invpyver")
else:
cnsl_message("invpyver")
try:
import curl
import pycurl
except ImportError:
cnsl_message("nocurl")
def check_params(params):
"""
Checking script parameters: if there is less or more than one param -- show help. If there's one param: check ones consistency!
"""
if len(params) != 2:
cnsl_message("help")
else:
if str(params[1]).isdigit():
pass;
else:
cnsl_message("help")
def start_log():
log = open(log_file, 'w')
log.write(umessages.m000 % version)
log.close()
def set_username():
return (uconfig.username, uconfig.password)
def connect():
cookiefile = open(cookie[1], 'r+w')
write_to_log(umessages.m025 % domain)
login_command = 'curl -c - -d "username=' + set_username()[0] + '&password=' + set_username()[1] + '&autologin=on&login=%C2%F5%EE%E4&redirect=index.php&code=58161005a04f0ee5" "' + domain + '/forum/login.php" 2>>' + log_file
cnsl_message("login_string", login_command)
cookie_txt = commands.getoutput(login_command)
if cookie_txt.find("Cookie") != -1:
# If it's -1, then there is no cookie!
cookiefile.write(cookie_txt)
print(cookiefile.read())
cnsl_message("login_ok")
else:
cnsl_message("login_failed")
sys.exit(666)
def iteratorium(links_array, offset):
"""
Links array -- array with links to topics containing torrents. Offset -- load parameter (for pages)
"""
cnsl_message("pages", str((offset / 50) + 1))
forum_page = commands.getoutput('curl -b ' + cookie[1] + ' "' + domain + '/forum/viewforum.php?f=' + sys.argv[1] + '&start=' + str(offset) + '" | iconv -f cp1251 -t utf-8')
for line in forum_page.splitlines():
if 'viewtopic.php' in line:
if 'DL:' in line:
for substring in line.split('"'):
if 'viewtopic.php' in substring:
links_array.append(substring.split('&')[0])
# Check for the next page
if forum_page.find("След.") != -1:
iteratorium(links_array, offset + 50)
clean_array = []
for item in links_array:
if item not in clean_array:
clean_array.append(item)
return clean_array
def download():
"""
And now that's main part of the script. We need to:
* iterate over each other page of the forum
* collect all forum topics
* collect all torrents from topics
* check the integrity of downloaded stuff (or why we want to see the log?)
The magic begins...
"""
# Firstly, we need to download forum's startpage
cnsl_message("parsing_started")
startpage = commands.getoutput('curl -b ' + cookie[1] + ' ' + domain + '/forum/viewforum.php?f=' + sys.argv[1] + ' | iconv -f cp1251 -t utf-8')
# Finding forum name :D
for line in startpage.splitlines():
if 'maintitle' in line:
fname_raw = line
forumname = fname_raw.split('>')[3].split('<')[0]
cnsl_message("forum", forumname)
# And now we start our "good iteratorium"
topics = iteratorium([], 0)
cnsl_message("found_topics", str(len(topics)))
# Aaaand... the first real actions is here
torrentsdir = "%s/%s" % (curdir, sys.argv[1])
if not os.path.exists(torrentsdir):
os.makedirs(torrentsdir)
get_torrent(topics, torrentsdir, forumname)
def parse_topic(topic, forumname, topics, torrents_count, torrentsdir):
topicpage = commands.getoutput('curl -b ' + cookie[1] + ' "' + domain + '/forum/' + topic + '" | iconv -f cp1251 -t utf-8')
for line in topicpage.splitlines():
if 'maintitle' in line:
tname_raw = line
topicname = tname_raw.split('>')[3].split('<')[0]
for line in topicpage.splitlines():
if 'не проверено модератором!' in line:
cnsl_message("skip_non_approved", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
return 340
for line in topicpage.splitlines():
if 'проверено модератором' in line:
tdate_raw = line
try:
tdate_raw
except NameError:
cnsl_message("skip_non_approved", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
return 340
# fix for russian months names (dateutil doesn't parse it, damnnt)
tdate_raw = tdate_raw.replace('Янв', '01').replace('Фев', '02').replace('Мар', '03').replace('Апр', '04').replace('Май', '05').replace('Июн', '06').replace('Июл', '07').replace('Авг', '08').replace('Сен', '09').replace('Окт', '10').replace('Ноя', '11').replace('Дек', '12')
topicdate = dateutil.parser.parse(tdate_raw.split('>')[1].split('<')[0].split(' ',3)[3]).strftime("%Y-%m-%d")
dl_raw = ""
for line in topicpage.splitlines():
if 'download.php' in line:
for lvl2 in line.split('>'):
if 'download.php' in lvl2:
dl_raw = lvl2
if dl_raw == "":
cnsl_message("skip_no_dl_link", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
return 350
downlink = dl_raw.split('"')[1]
cnsl_message("downloading_torrent", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic), topicdate)
os.chdir(torrentsdir)
download_torrent(downlink, topicdate)
def download_torrent(downlink, topicdate):
basefilename = commands.getoutput('curl -sI -b ' + cookie[1] + ' -J -L "' + domain + '/forum/' + downlink + '"' + "| grep -o -E 'filename=.*$' | tr -d '\r\n' | sed -e 's/filename=//' | sed 's/\"//g'")
os.system('curl -b %s -L "%s/forum/%s" -o "%s %s" 2>> %s' % (cookie[1], domain, downlink, topicdate, basefilename, log_file))
def get_torrent(topics, torrentsdir, forumname):
cnsl_message("download_started")
torrents_count = 0
for topic in topics:
torrents_count = torrents_count + 1
parse_topic(topic, forumname, topics, torrents_count, torrentsdir)
def cleanup():
os.remove(cookie[1])
cnsl_message("done")
# That's da fuckin' magic here ;-)
check_params(sys.argv)
start_log()
check_requirements()
import curl, pycurl
cnsl_message("start")
connect()
download()
cleanup()

View File