Fix some parsing issues by logging them
This commit is contained in:
parent
b1cf0bf32d
commit
151f9a24db
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
output/*
|
||||
lib/config.py
|
||||
*.pyc
|
||||
*.log
|
@ -2,8 +2,8 @@
|
||||
|
||||
username = 'username'
|
||||
password = 'password'
|
||||
workdir = '/absolute/path/to/dest/workdir'
|
||||
|
||||
# system values, edit at your own risk
|
||||
|
||||
domain = 'http://nnmclub.ro'
|
||||
workdir = "./"
|
||||
domain = 'https://nnmclub.ro'
|
@ -27,6 +27,7 @@ c066 = "Ваша версия Python не поддерживается. Испо
|
||||
c067 = "У вас не установлен PycURL. Обратитесь к справке вашего дистрибутива для подробностей, как его установить."
|
||||
c120 = "Вы залогинились успешно как пользователь %s."
|
||||
c166 = "Не удалось залогиниться как пользователь %s. Проверьте ваши данные в файле lib/config.py!"
|
||||
c199 = "Строка curl для логина: %s."
|
||||
c200 = "Начинается загрузка форума..."
|
||||
c210 = "Скачиваем страницу %s..."
|
||||
c220 = "Скачиваем форум %s"
|
||||
@ -34,6 +35,7 @@ c221 = "Скачивание страниц завершено. Найдено
|
||||
c300 = "Начинаем загрузку торрентов..."
|
||||
c310 = "Скачиваю топик %s/%s: %s (обновлён %s)."
|
||||
c340 = "Топик %s/%s: %s не проверен модератором. Пропускается."
|
||||
c350 = "Топик %s/%s: %s — не найдена ссылка для скачивания. Пропускается."
|
||||
m000 = '================================================================================\n= Скрипт скачки раздела NNM-Club для Linux и Unix-like ОС. =\n= Версия: %s =\n================================================================================\n'
|
||||
m020 = "[I] #020: окружение скрипта в порядке.\n"
|
||||
m021 = "[I] #021: работа завершена успешно. Завершено.\n"
|
||||
@ -47,3 +49,4 @@ m220 = "[I] #220: успешно загружена стартовая стра
|
||||
m221 = "[I] #221: успешно загружены все страницы форума. Найдено топиков: %s.\n"
|
||||
m310 = "[I] #310 загружаем торрент %s из %s по ссылке %s.\n"
|
||||
m340 = "[W] #340 топик %s из %s [%s, %s] не проверен модераторами. Пропуск.\n"
|
||||
m350 = "[W] #350 топик %s из %s [%s, %s] не содержит ссылки на скачивание. Пропуск.\n"
|
||||
|
54
massdl.py
54
massdl.py
@ -18,59 +18,65 @@ version = 0.92
|
||||
unixtime = int(time.time())
|
||||
cookie = tempfile.mkstemp()
|
||||
curdir = uconfig.workdir
|
||||
log_file = "/%i-massdl.log" % unixtime
|
||||
log_file = curdir + "/%i-massdl.log" % unixtime
|
||||
domain = uconfig.domain
|
||||
|
||||
|
||||
def write_to_log(imessage):
|
||||
log = open(curdir + log_file, 'a')
|
||||
log = open(log_file, 'a')
|
||||
log.write(imessage)
|
||||
log.close()
|
||||
|
||||
def cnsl_message(imessage, *args):
|
||||
if imessage == "invpyver":
|
||||
print umessages.c066
|
||||
print(umessages.c066)
|
||||
write_to_log(umessages.m066)
|
||||
sys.exit(666)
|
||||
elif imessage == "nocurl":
|
||||
print umessages.c067
|
||||
print(umessages.c067)
|
||||
write_to_log(umessages.m067)
|
||||
sys.exit(666)
|
||||
elif imessage == "help":
|
||||
print umessages.c000 % version
|
||||
print(umessages.c000 % version)
|
||||
sys.exit(0)
|
||||
elif imessage == "start":
|
||||
print umessages.c010 % (sys.argv[0], version)
|
||||
print umessages.c020 % sys.argv[1]
|
||||
print(umessages.c010 % (sys.argv[0], version))
|
||||
print(umessages.c020 % sys.argv[1])
|
||||
write_to_log(umessages.m020)
|
||||
elif imessage == "login_ok":
|
||||
print umessages.c120 % set_username()[0]
|
||||
print(umessages.c120 % set_username()[0])
|
||||
write_to_log(umessages.m120 % cookie[1])
|
||||
elif imessage == "login_failed":
|
||||
print umessages.c166 % set_username()[0]
|
||||
print(umessages.c166 % set_username()[0])
|
||||
write_to_log(umessages.m166)
|
||||
sys.exit(666)
|
||||
elif imessage == "login_string":
|
||||
print(umessages.c199 % args[0])
|
||||
write_to_log(umessages.c199 % args[0])
|
||||
elif imessage == "parsing_started":
|
||||
print umessages.c200
|
||||
print(umessages.c200)
|
||||
elif imessage == "forum":
|
||||
print umessages.c220 % args[0]
|
||||
print(umessages.c220 % args[0])
|
||||
write_to_log(umessages.m220 % args[0])
|
||||
elif imessage == "pages":
|
||||
print umessages.c210 % args[0]
|
||||
print(umessages.c210 % args[0])
|
||||
write_to_log(umessages.m210 % args[0])
|
||||
elif imessage == "found_topics":
|
||||
print umessages.c221 % args[0]
|
||||
print(umessages.c221 % args[0])
|
||||
write_to_log(umessages.m221 % args[0])
|
||||
elif imessage == "download_started":
|
||||
print umessages.c300
|
||||
print(umessages.c300)
|
||||
elif imessage == "downloading_torrent":
|
||||
print umessages.c310 % (args[0], args[1], args[2], args[4])
|
||||
print(umessages.c310 % (args[0], args[1], args[2], args[4]))
|
||||
write_to_log(umessages.m310 % (args[0], args[1], args[3]))
|
||||
elif imessage == "skip_non_approved":
|
||||
print umessages.c340 % (args[0], args[1], args[2])
|
||||
print(umessages.c340 % (args[0], args[1], args[2]))
|
||||
write_to_log(umessages.m340 % (args[0], args[1], args[2], args[3]))
|
||||
elif imessage == "skip_no_dl_link":
|
||||
print(umessages.c350 % (args[0], args[1], args[2]))
|
||||
write_to_log(umessages.m350 % (args[0], args[1], args[2], args[3]))
|
||||
elif imessage == "done":
|
||||
print umessages.c021
|
||||
print(umessages.c021)
|
||||
write_to_log(umessages.m021)
|
||||
sys.exit(0)
|
||||
|
||||
@ -106,7 +112,7 @@ def check_params(params):
|
||||
cnsl_message("help")
|
||||
|
||||
def start_log():
|
||||
log = open(curdir + log_file, 'w')
|
||||
log = open(log_file, 'w')
|
||||
log.write(umessages.m000 % version)
|
||||
log.close()
|
||||
|
||||
@ -116,11 +122,13 @@ def set_username():
|
||||
def connect():
|
||||
cookiefile = open(cookie[1], 'r+w')
|
||||
write_to_log(umessages.m025 % domain)
|
||||
cookie_txt = commands.getoutput('curl -c - -d "username=' + set_username()[0] + '&password=' + set_username()[1] + '&autologin=on&login=%C2%F5%EE%E4&redirect=index.php&code=58161005a04f0ee5" "' + domain + '/forum/login.php" 2>>' + curdir + log_file )
|
||||
login_command = 'curl -c - -d "username=' + set_username()[0] + '&password=' + set_username()[1] + '&autologin=on&login=%C2%F5%EE%E4&redirect=index.php&code=58161005a04f0ee5" "' + domain + '/forum/login.php" 2>>' + log_file
|
||||
cnsl_message("login_string", login_command)
|
||||
cookie_txt = commands.getoutput(login_command)
|
||||
if cookie_txt.find("Cookie") != -1:
|
||||
# If it's -1, then there is no cookie!
|
||||
cookiefile.write(cookie_txt)
|
||||
print cookiefile.read()
|
||||
print(cookiefile.read())
|
||||
cnsl_message("login_ok")
|
||||
else:
|
||||
cnsl_message("login_failed")
|
||||
@ -196,11 +204,15 @@ def parse_topic(topic, forumname, topics, torrents_count, torrentsdir):
|
||||
# fix for russian months names (dateutil doesn't parse it, damnnt)
|
||||
tdate_raw = tdate_raw.replace('Янв', '01').replace('Фев', '02').replace('Мар', '03').replace('Апр', '04').replace('Май', '05').replace('Июн', '06').replace('Июл', '07').replace('Авг', '08').replace('Сен', '09').replace('Окт', '10').replace('Ноя', '11').replace('Дек', '12')
|
||||
topicdate = dateutil.parser.parse(tdate_raw.split('>')[1].split('<')[0].split(' ',3)[3]).strftime("%Y-%m-%d")
|
||||
dl_raw = ""
|
||||
for line in topicpage.splitlines():
|
||||
if 'download.php' in line:
|
||||
for lvl2 in line.split('>'):
|
||||
if 'download.php' in lvl2:
|
||||
dl_raw = lvl2
|
||||
if dl_raw == "":
|
||||
cnsl_message("skip_no_dl_link", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
|
||||
return 350
|
||||
downlink = dl_raw.split('"')[1]
|
||||
cnsl_message("downloading_torrent", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic), topicdate)
|
||||
os.chdir(torrentsdir)
|
||||
@ -208,7 +220,7 @@ def parse_topic(topic, forumname, topics, torrents_count, torrentsdir):
|
||||
|
||||
def download_torrent(downlink, topicdate):
|
||||
basefilename = commands.getoutput('curl -sI -b ' + cookie[1] + ' -J -L "' + domain + '/forum/' + downlink + '"' + "| grep -o -E 'filename=.*$' | tr -d '\r\n' | sed -e 's/filename=//' | sed 's/\"//g'")
|
||||
os.system('curl -b %s -L "%s/forum/%s" -o "%s %s" 2>> %s%s' % (cookie[1], domain, downlink, topicdate, basefilename, curdir, log_file))
|
||||
os.system('curl -b %s -L "%s/forum/%s" -o "%s %s" 2>> %s' % (cookie[1], domain, downlink, topicdate, basefilename, log_file))
|
||||
|
||||
def get_torrent(topics, torrentsdir, forumname):
|
||||
cnsl_message("download_started")
|
||||
|
0
output/.gitkeep
Normal file
0
output/.gitkeep
Normal file
Loading…
Reference in New Issue
Block a user