Start rewriting massdl script on Golang
Due to Python 2 EOL I need to pick new language for uploader tools. Instead of Python 3 I choose Go, because I'm working with this language almost three years and a half. Python 2 CLI app moved to legacy/python2 branch and will be supported due 01 January 2020. Old shell scripts are still in legacy/shell branch and doesn't work ATM. I will not fix them, they're keeped in repository for historical reasons only.
This commit is contained in:
parent
151f9a24db
commit
bce5cef340
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,4 +1 @@
|
||||
output/*
|
||||
lib/config.py
|
||||
*.pyc
|
||||
*.log
|
||||
.DS_Store
|
@ -1,9 +0,0 @@
|
||||
# user editable values
|
||||
|
||||
username = 'username'
|
||||
password = 'password'
|
||||
workdir = '/absolute/path/to/dest/workdir'
|
||||
|
||||
# system values, edit at your own risk
|
||||
|
||||
domain = 'https://nnmclub.ro'
|
@ -1,52 +0,0 @@
|
||||
# *-* encoding: utf-8 *-*
|
||||
"""
|
||||
Note, all script messages must be formatted as:
|
||||
|
||||
[status] #status_number: message
|
||||
|
||||
where:
|
||||
|
||||
status: "I" = information, "W" - warning, "E" - error.
|
||||
status_number - error number:
|
||||
|
||||
0xx -- script environment messages
|
||||
1xx -- authentication messages
|
||||
2xx -- grabbing and parsing messages
|
||||
3xx -- torrents downloading messages
|
||||
4xx -- torrents check messages
|
||||
|
||||
Last message in log must be ended with "Stopped" word.
|
||||
|
||||
Information code numbers starts from x00, success messages from x20, warnings from x40 and errors from x66. Console messages have same numbers starting from prefix ‘c’. Log messages start from prefix ‘m’
|
||||
"""
|
||||
c000 = "massdl.sh — скрипт для пакетной загрузки торрент-файлов из разделов NNM-club.ru.\nИспользование:\n\n\tpython massdl.py [номер_раздела]\n\nгде [номер_форума] — номер раздела на NNM-club.ru (например, для форума '*Nix Игры' это 316).\nПримеры:\n\n\tpython massdl.py 316 — скачивание форума '*Nix Игры'\n\tpython massdl.py 332 — скачивание форума 'Русский рок'\n\nАвтор: Владимир «fat0troll» Ходаков.\nСкрипт использует Python версии 2.7. При запуске убедитесь, что используете правильный интерпретатор Python!\nКонфигурация скрипта производится в конфигурационном файле lib/config.py, имеющем опции с очевидными названиями.\nВерсия %s\n"
|
||||
c010 = "Выполняется скрипт %s.\nВерсия скрипта %s."
|
||||
c020 = "Параметры командной строки: %s"
|
||||
c021 = "Скрипт завершен успешно."
|
||||
c066 = "Ваша версия Python не поддерживается. Используйте Python 2.7!"
|
||||
c067 = "У вас не установлен PycURL. Обратитесь к справке вашего дистрибутива для подробностей, как его установить."
|
||||
c120 = "Вы залогинились успешно как пользователь %s."
|
||||
c166 = "Не удалось залогиниться как пользователь %s. Проверьте ваши данные в файле lib/config.py!"
|
||||
c199 = "Строка curl для логина: %s."
|
||||
c200 = "Начинается загрузка форума..."
|
||||
c210 = "Скачиваем страницу %s..."
|
||||
c220 = "Скачиваем форум %s"
|
||||
c221 = "Скачивание страниц завершено. Найдено топиков: %s."
|
||||
c300 = "Начинаем загрузку торрентов..."
|
||||
c310 = "Скачиваю топик %s/%s: %s (обновлён %s)."
|
||||
c340 = "Топик %s/%s: %s не проверен модератором. Пропускается."
|
||||
c350 = "Топик %s/%s: %s — не найдена ссылка для скачивания. Пропускается."
|
||||
m000 = '================================================================================\n= Скрипт скачки раздела NNM-Club для Linux и Unix-like ОС. =\n= Версия: %s =\n================================================================================\n'
|
||||
m020 = "[I] #020: окружение скрипта в порядке.\n"
|
||||
m021 = "[I] #021: работа завершена успешно. Завершено.\n"
|
||||
m025 = "[I] #100: пытаемся войти на %s...\n"
|
||||
m066 = "[E] #066: версия Python не поддерживается. Завершено.\n"
|
||||
m067 = "[E] #067: не установлен cURL. Завершено\n"
|
||||
m120 = "[I] #120: выполнен успешный вход на сайт. Cookie лежит по адресу %s.\n"
|
||||
m166 = "[E] #166: неправильное имя пользователя или пароль. Завершено.\n"
|
||||
m210 = "[I] #200: загружаем страницу %s..."
|
||||
m220 = "[I] #220: успешно загружена стартовая страница форума '%s'.\n"
|
||||
m221 = "[I] #221: успешно загружены все страницы форума. Найдено топиков: %s.\n"
|
||||
m310 = "[I] #310 загружаем торрент %s из %s по ссылке %s.\n"
|
||||
m340 = "[W] #340 топик %s из %s [%s, %s] не проверен модераторами. Пропуск.\n"
|
||||
m350 = "[W] #350 топик %s из %s [%s, %s] не содержит ссылки на скачивание. Пропуск.\n"
|
13
main.go
Normal file
13
main.go
Normal file
@ -0,0 +1,13 @@
|
||||
// NNM-Club torrent filess mass downloader
|
||||
// Created for Uploaders group
|
||||
// (c) Vladimir Hodakov, 2012-2019
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Println("TO BE FILLED")
|
||||
}
|
248
massdl.py
248
massdl.py
@ -1,248 +0,0 @@
|
||||
#!/usr/bin/env python2.7
|
||||
# *-* encoding: utf-8 *-*
|
||||
# For NNM-Club Uploaders
|
||||
# Copyright (c) 2012-2019 Vladimir Hodakov <vladimir@hodakov.me>
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# python2 ./massdl.py [forum_number]
|
||||
#
|
||||
# For getting help, execute "python2 ./massdl.py" without params
|
||||
|
||||
import time, tempfile, os, sys, commands
|
||||
import lib.config as uconfig
|
||||
import lib.messages as umessages
|
||||
import dateutil
|
||||
from dateutil.parser import *
|
||||
version = 0.92
|
||||
unixtime = int(time.time())
|
||||
cookie = tempfile.mkstemp()
|
||||
curdir = uconfig.workdir
|
||||
log_file = curdir + "/%i-massdl.log" % unixtime
|
||||
domain = uconfig.domain
|
||||
|
||||
|
||||
def write_to_log(imessage):
|
||||
log = open(log_file, 'a')
|
||||
log.write(imessage)
|
||||
log.close()
|
||||
|
||||
def cnsl_message(imessage, *args):
|
||||
if imessage == "invpyver":
|
||||
print(umessages.c066)
|
||||
write_to_log(umessages.m066)
|
||||
sys.exit(666)
|
||||
elif imessage == "nocurl":
|
||||
print(umessages.c067)
|
||||
write_to_log(umessages.m067)
|
||||
sys.exit(666)
|
||||
elif imessage == "help":
|
||||
print(umessages.c000 % version)
|
||||
sys.exit(0)
|
||||
elif imessage == "start":
|
||||
print(umessages.c010 % (sys.argv[0], version))
|
||||
print(umessages.c020 % sys.argv[1])
|
||||
write_to_log(umessages.m020)
|
||||
elif imessage == "login_ok":
|
||||
print(umessages.c120 % set_username()[0])
|
||||
write_to_log(umessages.m120 % cookie[1])
|
||||
elif imessage == "login_failed":
|
||||
print(umessages.c166 % set_username()[0])
|
||||
write_to_log(umessages.m166)
|
||||
sys.exit(666)
|
||||
elif imessage == "login_string":
|
||||
print(umessages.c199 % args[0])
|
||||
write_to_log(umessages.c199 % args[0])
|
||||
elif imessage == "parsing_started":
|
||||
print(umessages.c200)
|
||||
elif imessage == "forum":
|
||||
print(umessages.c220 % args[0])
|
||||
write_to_log(umessages.m220 % args[0])
|
||||
elif imessage == "pages":
|
||||
print(umessages.c210 % args[0])
|
||||
write_to_log(umessages.m210 % args[0])
|
||||
elif imessage == "found_topics":
|
||||
print(umessages.c221 % args[0])
|
||||
write_to_log(umessages.m221 % args[0])
|
||||
elif imessage == "download_started":
|
||||
print(umessages.c300)
|
||||
elif imessage == "downloading_torrent":
|
||||
print(umessages.c310 % (args[0], args[1], args[2], args[4]))
|
||||
write_to_log(umessages.m310 % (args[0], args[1], args[3]))
|
||||
elif imessage == "skip_non_approved":
|
||||
print(umessages.c340 % (args[0], args[1], args[2]))
|
||||
write_to_log(umessages.m340 % (args[0], args[1], args[2], args[3]))
|
||||
elif imessage == "skip_no_dl_link":
|
||||
print(umessages.c350 % (args[0], args[1], args[2]))
|
||||
write_to_log(umessages.m350 % (args[0], args[1], args[2], args[3]))
|
||||
elif imessage == "done":
|
||||
print(umessages.c021)
|
||||
write_to_log(umessages.m021)
|
||||
sys.exit(0)
|
||||
|
||||
def check_requirements():
|
||||
req_py_version = (2,7)
|
||||
unsupported_py_version = (3,0)
|
||||
cur_version = sys.version_info
|
||||
|
||||
if cur_version >= req_py_version:
|
||||
if cur_version < unsupported_py_version:
|
||||
pass;
|
||||
else:
|
||||
cnsl_message("invpyver")
|
||||
else:
|
||||
cnsl_message("invpyver")
|
||||
|
||||
try:
|
||||
import curl
|
||||
import pycurl
|
||||
except ImportError:
|
||||
cnsl_message("nocurl")
|
||||
|
||||
def check_params(params):
|
||||
"""
|
||||
Checking script parameters: if there is less or more than one param -- show help. If there's one param: check one’s consistency!
|
||||
"""
|
||||
if len(params) != 2:
|
||||
cnsl_message("help")
|
||||
else:
|
||||
if str(params[1]).isdigit():
|
||||
pass;
|
||||
else:
|
||||
cnsl_message("help")
|
||||
|
||||
def start_log():
|
||||
log = open(log_file, 'w')
|
||||
log.write(umessages.m000 % version)
|
||||
log.close()
|
||||
|
||||
def set_username():
|
||||
return (uconfig.username, uconfig.password)
|
||||
|
||||
def connect():
|
||||
cookiefile = open(cookie[1], 'r+w')
|
||||
write_to_log(umessages.m025 % domain)
|
||||
login_command = 'curl -c - -d "username=' + set_username()[0] + '&password=' + set_username()[1] + '&autologin=on&login=%C2%F5%EE%E4&redirect=index.php&code=58161005a04f0ee5" "' + domain + '/forum/login.php" 2>>' + log_file
|
||||
cnsl_message("login_string", login_command)
|
||||
cookie_txt = commands.getoutput(login_command)
|
||||
if cookie_txt.find("Cookie") != -1:
|
||||
# If it's -1, then there is no cookie!
|
||||
cookiefile.write(cookie_txt)
|
||||
print(cookiefile.read())
|
||||
cnsl_message("login_ok")
|
||||
else:
|
||||
cnsl_message("login_failed")
|
||||
sys.exit(666)
|
||||
|
||||
def iteratorium(links_array, offset):
|
||||
"""
|
||||
Links array -- array with links to topics containing torrents. Offset -- load parameter (for pages)
|
||||
"""
|
||||
cnsl_message("pages", str((offset / 50) + 1))
|
||||
forum_page = commands.getoutput('curl -b ' + cookie[1] + ' "' + domain + '/forum/viewforum.php?f=' + sys.argv[1] + '&start=' + str(offset) + '" | iconv -f cp1251 -t utf-8')
|
||||
for line in forum_page.splitlines():
|
||||
if 'viewtopic.php' in line:
|
||||
if 'DL:' in line:
|
||||
for substring in line.split('"'):
|
||||
if 'viewtopic.php' in substring:
|
||||
links_array.append(substring.split('&')[0])
|
||||
# Check for the next page
|
||||
if forum_page.find("След.") != -1:
|
||||
iteratorium(links_array, offset + 50)
|
||||
clean_array = []
|
||||
for item in links_array:
|
||||
if item not in clean_array:
|
||||
clean_array.append(item)
|
||||
return clean_array
|
||||
|
||||
def download():
|
||||
"""
|
||||
And now that's main part of the script. We need to:
|
||||
|
||||
* iterate over each other page of the forum
|
||||
* collect all forum topics
|
||||
* collect all torrents from topics
|
||||
* check the integrity of downloaded stuff (or why we want to see the log?)
|
||||
The magic begins...
|
||||
"""
|
||||
# Firstly, we need to download forum's startpage
|
||||
cnsl_message("parsing_started")
|
||||
startpage = commands.getoutput('curl -b ' + cookie[1] + ' ' + domain + '/forum/viewforum.php?f=' + sys.argv[1] + ' | iconv -f cp1251 -t utf-8')
|
||||
# Finding forum name :D
|
||||
for line in startpage.splitlines():
|
||||
if 'maintitle' in line:
|
||||
fname_raw = line
|
||||
forumname = fname_raw.split('>')[3].split('<')[0]
|
||||
cnsl_message("forum", forumname)
|
||||
# And now we start our "good iteratorium"
|
||||
topics = iteratorium([], 0)
|
||||
cnsl_message("found_topics", str(len(topics)))
|
||||
# Aaaand... the first real actions is here
|
||||
torrentsdir = "%s/%s" % (curdir, sys.argv[1])
|
||||
if not os.path.exists(torrentsdir):
|
||||
os.makedirs(torrentsdir)
|
||||
get_torrent(topics, torrentsdir, forumname)
|
||||
|
||||
def parse_topic(topic, forumname, topics, torrents_count, torrentsdir):
|
||||
topicpage = commands.getoutput('curl -b ' + cookie[1] + ' "' + domain + '/forum/' + topic + '" | iconv -f cp1251 -t utf-8')
|
||||
for line in topicpage.splitlines():
|
||||
if 'maintitle' in line:
|
||||
tname_raw = line
|
||||
topicname = tname_raw.split('>')[3].split('<')[0]
|
||||
for line in topicpage.splitlines():
|
||||
if 'не проверено модератором!' in line:
|
||||
cnsl_message("skip_non_approved", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
|
||||
return 340
|
||||
for line in topicpage.splitlines():
|
||||
if 'проверено модератором' in line:
|
||||
tdate_raw = line
|
||||
try:
|
||||
tdate_raw
|
||||
except NameError:
|
||||
cnsl_message("skip_non_approved", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
|
||||
return 340
|
||||
# fix for russian months names (dateutil doesn't parse it, damnnt)
|
||||
tdate_raw = tdate_raw.replace('Янв', '01').replace('Фев', '02').replace('Мар', '03').replace('Апр', '04').replace('Май', '05').replace('Июн', '06').replace('Июл', '07').replace('Авг', '08').replace('Сен', '09').replace('Окт', '10').replace('Ноя', '11').replace('Дек', '12')
|
||||
topicdate = dateutil.parser.parse(tdate_raw.split('>')[1].split('<')[0].split(' ',3)[3]).strftime("%Y-%m-%d")
|
||||
dl_raw = ""
|
||||
for line in topicpage.splitlines():
|
||||
if 'download.php' in line:
|
||||
for lvl2 in line.split('>'):
|
||||
if 'download.php' in lvl2:
|
||||
dl_raw = lvl2
|
||||
if dl_raw == "":
|
||||
cnsl_message("skip_no_dl_link", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
|
||||
return 350
|
||||
downlink = dl_raw.split('"')[1]
|
||||
cnsl_message("downloading_torrent", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic), topicdate)
|
||||
os.chdir(torrentsdir)
|
||||
download_torrent(downlink, topicdate)
|
||||
|
||||
def download_torrent(downlink, topicdate):
|
||||
basefilename = commands.getoutput('curl -sI -b ' + cookie[1] + ' -J -L "' + domain + '/forum/' + downlink + '"' + "| grep -o -E 'filename=.*$' | tr -d '\r\n' | sed -e 's/filename=//' | sed 's/\"//g'")
|
||||
os.system('curl -b %s -L "%s/forum/%s" -o "%s %s" 2>> %s' % (cookie[1], domain, downlink, topicdate, basefilename, log_file))
|
||||
|
||||
def get_torrent(topics, torrentsdir, forumname):
|
||||
cnsl_message("download_started")
|
||||
torrents_count = 0
|
||||
for topic in topics:
|
||||
torrents_count = torrents_count + 1
|
||||
parse_topic(topic, forumname, topics, torrents_count, torrentsdir)
|
||||
|
||||
def cleanup():
|
||||
os.remove(cookie[1])
|
||||
cnsl_message("done")
|
||||
|
||||
|
||||
# That's da fuckin' magic here ;-)
|
||||
|
||||
check_params(sys.argv)
|
||||
start_log()
|
||||
check_requirements()
|
||||
|
||||
import curl, pycurl
|
||||
|
||||
cnsl_message("start")
|
||||
connect()
|
||||
download()
|
||||
cleanup()
|
Loading…
Reference in New Issue
Block a user