From 151f9a24dba688fb5700f5ba076e493c4ef0a875 Mon Sep 17 00:00:00 2001 From: Vladimir Hodakov Date: Sat, 14 Sep 2019 18:35:19 +0400 Subject: [PATCH] Fix some parsing issues by logging them --- .gitignore | 3 ++- lib/config.py.example | 4 ++-- lib/messages.py | 3 +++ massdl.py | 54 ++++++++++++++++++++++++++----------------- output/.gitkeep | 0 5 files changed, 40 insertions(+), 24 deletions(-) create mode 100644 output/.gitkeep diff --git a/.gitignore b/.gitignore index befc0dc..41600ab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +output/* lib/config.py *.pyc -*.log \ No newline at end of file +*.log diff --git a/lib/config.py.example b/lib/config.py.example index f0431df..5243f93 100755 --- a/lib/config.py.example +++ b/lib/config.py.example @@ -2,8 +2,8 @@ username = 'username' password = 'password' +workdir = '/absolute/path/to/dest/workdir' # system values, edit at your own risk -domain = 'http://nnmclub.ro' -workdir = "./" \ No newline at end of file +domain = 'https://nnmclub.ro' \ No newline at end of file diff --git a/lib/messages.py b/lib/messages.py index 7077795..8c7ba47 100755 --- a/lib/messages.py +++ b/lib/messages.py @@ -27,6 +27,7 @@ c066 = "Ваша версия Python не поддерживается. Испо c067 = "У вас не установлен PycURL. Обратитесь к справке вашего дистрибутива для подробностей, как его установить." c120 = "Вы залогинились успешно как пользователь %s." c166 = "Не удалось залогиниться как пользователь %s. Проверьте ваши данные в файле lib/config.py!" +c199 = "Строка curl для логина: %s." c200 = "Начинается загрузка форума..." c210 = "Скачиваем страницу %s..." c220 = "Скачиваем форум %s" @@ -34,6 +35,7 @@ c221 = "Скачивание страниц завершено. Найдено c300 = "Начинаем загрузку торрентов..." c310 = "Скачиваю топик %s/%s: %s (обновлён %s)." c340 = "Топик %s/%s: %s не проверен модератором. Пропускается." +c350 = "Топик %s/%s: %s — не найдена ссылка для скачивания. Пропускается." m000 = '================================================================================\n= Скрипт скачки раздела NNM-Club для Linux и Unix-like ОС. =\n= Версия: %s =\n================================================================================\n' m020 = "[I] #020: окружение скрипта в порядке.\n" m021 = "[I] #021: работа завершена успешно. Завершено.\n" @@ -47,3 +49,4 @@ m220 = "[I] #220: успешно загружена стартовая стра m221 = "[I] #221: успешно загружены все страницы форума. Найдено топиков: %s.\n" m310 = "[I] #310 загружаем торрент %s из %s по ссылке %s.\n" m340 = "[W] #340 топик %s из %s [%s, %s] не проверен модераторами. Пропуск.\n" +m350 = "[W] #350 топик %s из %s [%s, %s] не содержит ссылки на скачивание. Пропуск.\n" diff --git a/massdl.py b/massdl.py index 1bf5c48..5c25366 100755 --- a/massdl.py +++ b/massdl.py @@ -18,59 +18,65 @@ version = 0.92 unixtime = int(time.time()) cookie = tempfile.mkstemp() curdir = uconfig.workdir -log_file = "/%i-massdl.log" % unixtime +log_file = curdir + "/%i-massdl.log" % unixtime domain = uconfig.domain def write_to_log(imessage): - log = open(curdir + log_file, 'a') + log = open(log_file, 'a') log.write(imessage) log.close() def cnsl_message(imessage, *args): if imessage == "invpyver": - print umessages.c066 + print(umessages.c066) write_to_log(umessages.m066) sys.exit(666) elif imessage == "nocurl": - print umessages.c067 + print(umessages.c067) write_to_log(umessages.m067) sys.exit(666) elif imessage == "help": - print umessages.c000 % version + print(umessages.c000 % version) sys.exit(0) elif imessage == "start": - print umessages.c010 % (sys.argv[0], version) - print umessages.c020 % sys.argv[1] + print(umessages.c010 % (sys.argv[0], version)) + print(umessages.c020 % sys.argv[1]) write_to_log(umessages.m020) elif imessage == "login_ok": - print umessages.c120 % set_username()[0] + print(umessages.c120 % set_username()[0]) write_to_log(umessages.m120 % cookie[1]) elif imessage == "login_failed": - print umessages.c166 % set_username()[0] + print(umessages.c166 % set_username()[0]) write_to_log(umessages.m166) sys.exit(666) + elif imessage == "login_string": + print(umessages.c199 % args[0]) + write_to_log(umessages.c199 % args[0]) elif imessage == "parsing_started": - print umessages.c200 + print(umessages.c200) elif imessage == "forum": - print umessages.c220 % args[0] + print(umessages.c220 % args[0]) write_to_log(umessages.m220 % args[0]) elif imessage == "pages": - print umessages.c210 % args[0] + print(umessages.c210 % args[0]) write_to_log(umessages.m210 % args[0]) elif imessage == "found_topics": - print umessages.c221 % args[0] + print(umessages.c221 % args[0]) write_to_log(umessages.m221 % args[0]) elif imessage == "download_started": - print umessages.c300 + print(umessages.c300) elif imessage == "downloading_torrent": - print umessages.c310 % (args[0], args[1], args[2], args[4]) + print(umessages.c310 % (args[0], args[1], args[2], args[4])) write_to_log(umessages.m310 % (args[0], args[1], args[3])) elif imessage == "skip_non_approved": - print umessages.c340 % (args[0], args[1], args[2]) + print(umessages.c340 % (args[0], args[1], args[2])) write_to_log(umessages.m340 % (args[0], args[1], args[2], args[3])) + elif imessage == "skip_no_dl_link": + print(umessages.c350 % (args[0], args[1], args[2])) + write_to_log(umessages.m350 % (args[0], args[1], args[2], args[3])) elif imessage == "done": - print umessages.c021 + print(umessages.c021) write_to_log(umessages.m021) sys.exit(0) @@ -106,7 +112,7 @@ def check_params(params): cnsl_message("help") def start_log(): - log = open(curdir + log_file, 'w') + log = open(log_file, 'w') log.write(umessages.m000 % version) log.close() @@ -116,11 +122,13 @@ def set_username(): def connect(): cookiefile = open(cookie[1], 'r+w') write_to_log(umessages.m025 % domain) - cookie_txt = commands.getoutput('curl -c - -d "username=' + set_username()[0] + '&password=' + set_username()[1] + '&autologin=on&login=%C2%F5%EE%E4&redirect=index.php&code=58161005a04f0ee5" "' + domain + '/forum/login.php" 2>>' + curdir + log_file ) + login_command = 'curl -c - -d "username=' + set_username()[0] + '&password=' + set_username()[1] + '&autologin=on&login=%C2%F5%EE%E4&redirect=index.php&code=58161005a04f0ee5" "' + domain + '/forum/login.php" 2>>' + log_file + cnsl_message("login_string", login_command) + cookie_txt = commands.getoutput(login_command) if cookie_txt.find("Cookie") != -1: # If it's -1, then there is no cookie! cookiefile.write(cookie_txt) - print cookiefile.read() + print(cookiefile.read()) cnsl_message("login_ok") else: cnsl_message("login_failed") @@ -196,11 +204,15 @@ def parse_topic(topic, forumname, topics, torrents_count, torrentsdir): # fix for russian months names (dateutil doesn't parse it, damnnt) tdate_raw = tdate_raw.replace('Янв', '01').replace('Фев', '02').replace('Мар', '03').replace('Апр', '04').replace('Май', '05').replace('Июн', '06').replace('Июл', '07').replace('Авг', '08').replace('Сен', '09').replace('Окт', '10').replace('Ноя', '11').replace('Дек', '12') topicdate = dateutil.parser.parse(tdate_raw.split('>')[1].split('<')[0].split(' ',3)[3]).strftime("%Y-%m-%d") + dl_raw = "" for line in topicpage.splitlines(): if 'download.php' in line: for lvl2 in line.split('>'): if 'download.php' in lvl2: dl_raw = lvl2 + if dl_raw == "": + cnsl_message("skip_no_dl_link", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic)) + return 350 downlink = dl_raw.split('"')[1] cnsl_message("downloading_torrent", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic), topicdate) os.chdir(torrentsdir) @@ -208,7 +220,7 @@ def parse_topic(topic, forumname, topics, torrents_count, torrentsdir): def download_torrent(downlink, topicdate): basefilename = commands.getoutput('curl -sI -b ' + cookie[1] + ' -J -L "' + domain + '/forum/' + downlink + '"' + "| grep -o -E 'filename=.*$' | tr -d '\r\n' | sed -e 's/filename=//' | sed 's/\"//g'") - os.system('curl -b %s -L "%s/forum/%s" -o "%s %s" 2>> %s%s' % (cookie[1], domain, downlink, topicdate, basefilename, curdir, log_file)) + os.system('curl -b %s -L "%s/forum/%s" -o "%s %s" 2>> %s' % (cookie[1], domain, downlink, topicdate, basefilename, log_file)) def get_torrent(topics, torrentsdir, forumname): cnsl_message("download_started") diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 0000000..e69de29