2016-04-04 13:13:09 +04:00
|
|
|
|
#!/usr/bin/env python2.7
|
2013-08-24 22:04:28 +04:00
|
|
|
|
# *-* encoding: utf-8 *-*
|
|
|
|
|
# For NNM-Club Uploaders
|
2016-04-04 13:13:09 +04:00
|
|
|
|
# Copyright (c) 2012-2016 Vladimir Kozlov <iam@toofat.ru>
|
2013-08-24 22:04:28 +04:00
|
|
|
|
#
|
|
|
|
|
# Usage:
|
|
|
|
|
#
|
2014-12-26 23:08:52 +04:00
|
|
|
|
# python2 ./massdl.py [forum_number]
|
2016-04-04 13:13:09 +04:00
|
|
|
|
#
|
2013-08-24 22:04:28 +04:00
|
|
|
|
# For getting help, execute "python2 ./massdl.py" without params
|
|
|
|
|
|
|
|
|
|
import time, tempfile, os, sys, commands
|
2014-12-26 23:08:52 +04:00
|
|
|
|
import lib.config as uconfig
|
|
|
|
|
import lib.messages as umessages
|
|
|
|
|
import dateutil
|
|
|
|
|
from dateutil.parser import *
|
2016-04-04 13:13:09 +04:00
|
|
|
|
version = 0.92
|
2013-08-24 22:04:28 +04:00
|
|
|
|
unixtime = int(time.time())
|
|
|
|
|
cookie = tempfile.mkstemp()
|
2016-04-04 13:13:09 +04:00
|
|
|
|
curdir = uconfig.workdir
|
2013-08-24 22:04:28 +04:00
|
|
|
|
log_file = "/%i-massdl.log" % unixtime
|
2014-12-26 23:08:52 +04:00
|
|
|
|
domain = uconfig.domain
|
2013-08-24 22:04:28 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def write_to_log(imessage):
|
|
|
|
|
log = open(curdir + log_file, 'a')
|
|
|
|
|
log.write(imessage)
|
|
|
|
|
log.close()
|
|
|
|
|
|
|
|
|
|
def cnsl_message(imessage, *args):
|
|
|
|
|
if imessage == "invpyver":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c066
|
|
|
|
|
write_to_log(umessages.m066)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
sys.exit(666)
|
|
|
|
|
elif imessage == "nocurl":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c067
|
|
|
|
|
write_to_log(umessages.m067)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
sys.exit(666)
|
|
|
|
|
elif imessage == "help":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c000 % version
|
2013-08-24 22:04:28 +04:00
|
|
|
|
sys.exit(0)
|
|
|
|
|
elif imessage == "start":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c010 % (sys.argv[0], version)
|
|
|
|
|
print umessages.c020 % sys.argv[1]
|
|
|
|
|
write_to_log(umessages.m020)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
elif imessage == "login_ok":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c120 % set_username()[0]
|
|
|
|
|
write_to_log(umessages.m120 % cookie[1])
|
2013-08-24 22:04:28 +04:00
|
|
|
|
elif imessage == "login_failed":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c166 % set_username()[0]
|
|
|
|
|
write_to_log(umessages.m166)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
sys.exit(666)
|
|
|
|
|
elif imessage == "parsing_started":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c200
|
2013-08-24 22:04:28 +04:00
|
|
|
|
elif imessage == "forum":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c220 % args[0]
|
|
|
|
|
write_to_log(umessages.m220 % args[0])
|
2013-08-24 22:04:28 +04:00
|
|
|
|
elif imessage == "pages":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c210 % args[0]
|
|
|
|
|
write_to_log(umessages.m210 % args[0])
|
2013-08-24 22:04:28 +04:00
|
|
|
|
elif imessage == "found_topics":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c221 % args[0]
|
|
|
|
|
write_to_log(umessages.m221 % args[0])
|
2013-08-24 22:04:28 +04:00
|
|
|
|
elif imessage == "download_started":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c300
|
2013-08-24 22:04:28 +04:00
|
|
|
|
elif imessage == "downloading_torrent":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c310 % (args[0], args[1], args[2], args[4])
|
|
|
|
|
write_to_log(umessages.m310 % (args[0], args[1], args[3]))
|
|
|
|
|
elif imessage == "skip_non_approved":
|
|
|
|
|
print umessages.c340 % (args[0], args[1], args[2])
|
|
|
|
|
write_to_log(umessages.m340 % (args[0], args[1], args[2], args[3]))
|
2013-08-24 22:04:28 +04:00
|
|
|
|
elif imessage == "done":
|
2014-12-26 23:08:52 +04:00
|
|
|
|
print umessages.c021
|
|
|
|
|
write_to_log(umessages.m021)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
def check_requirements():
|
|
|
|
|
req_py_version = (2,7)
|
|
|
|
|
unsupported_py_version = (3,0)
|
|
|
|
|
cur_version = sys.version_info
|
|
|
|
|
|
|
|
|
|
if cur_version >= req_py_version:
|
|
|
|
|
if cur_version < unsupported_py_version:
|
|
|
|
|
pass;
|
|
|
|
|
else:
|
|
|
|
|
cnsl_message("invpyver")
|
|
|
|
|
else:
|
|
|
|
|
cnsl_message("invpyver")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
import curl
|
|
|
|
|
import pycurl
|
|
|
|
|
except ImportError:
|
|
|
|
|
cnsl_message("nocurl")
|
|
|
|
|
|
|
|
|
|
def check_params(params):
|
|
|
|
|
"""
|
2014-12-26 23:08:52 +04:00
|
|
|
|
Checking script parameters: if there is less or more than one param -- show help. If there's one param: check one’s consistency!
|
2013-08-24 22:04:28 +04:00
|
|
|
|
"""
|
2014-12-26 23:08:52 +04:00
|
|
|
|
if len(params) != 2:
|
2013-08-24 22:04:28 +04:00
|
|
|
|
cnsl_message("help")
|
|
|
|
|
else:
|
2014-12-26 23:08:52 +04:00
|
|
|
|
if str(params[1]).isdigit():
|
|
|
|
|
pass;
|
2013-08-24 22:04:28 +04:00
|
|
|
|
else:
|
|
|
|
|
cnsl_message("help")
|
|
|
|
|
|
|
|
|
|
def start_log():
|
|
|
|
|
log = open(curdir + log_file, 'w')
|
2014-12-26 23:08:52 +04:00
|
|
|
|
log.write(umessages.m000 % version)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
log.close()
|
|
|
|
|
|
|
|
|
|
def set_username():
|
2014-12-26 23:08:52 +04:00
|
|
|
|
return (uconfig.username, uconfig.password)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
|
|
|
|
|
def connect():
|
|
|
|
|
cookiefile = open(cookie[1], 'r+w')
|
2014-12-26 23:08:52 +04:00
|
|
|
|
write_to_log(umessages.m025 % domain)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
cookie_txt = commands.getoutput('curl -c - -d "username=' + set_username()[0] + '&password=' + set_username()[1] + '&autologin=on&login=%C2%F5%EE%E4&redirect=index.php" "' + domain + '/forum/login.php" 2>>' + curdir + log_file )
|
|
|
|
|
if cookie_txt.find("Cookie") != -1:
|
|
|
|
|
# If it's -1, then there is no cookie!
|
|
|
|
|
cookiefile.write(cookie_txt)
|
|
|
|
|
print cookiefile.read()
|
|
|
|
|
cnsl_message("login_ok")
|
|
|
|
|
else:
|
|
|
|
|
cnsl_message("login_failed")
|
|
|
|
|
sys.exit(666)
|
|
|
|
|
|
|
|
|
|
def iteratorium(links_array, offset):
|
|
|
|
|
"""
|
|
|
|
|
Links array -- array with links to topics containing torrents. Offset -- load parameter (for pages)
|
|
|
|
|
"""
|
|
|
|
|
cnsl_message("pages", str((offset / 50) + 1))
|
2014-12-26 23:08:52 +04:00
|
|
|
|
forum_page = commands.getoutput('curl -b ' + cookie[1] + ' "' + domain + '/forum/viewforum.php?f=' + sys.argv[1] + '&start=' + str(offset) + '" | iconv -f cp1251 -t utf-8')
|
2013-08-24 22:04:28 +04:00
|
|
|
|
for line in forum_page.splitlines():
|
|
|
|
|
if 'viewtopic.php' in line:
|
|
|
|
|
if 'DL:' in line:
|
|
|
|
|
for substring in line.split('"'):
|
|
|
|
|
if 'viewtopic.php' in substring:
|
|
|
|
|
links_array.append(substring.split('&')[0])
|
|
|
|
|
# Check for the next page
|
|
|
|
|
if forum_page.find("След.") != -1:
|
|
|
|
|
iteratorium(links_array, offset + 50)
|
|
|
|
|
clean_array = []
|
|
|
|
|
for item in links_array:
|
|
|
|
|
if item not in clean_array:
|
|
|
|
|
clean_array.append(item)
|
|
|
|
|
return clean_array
|
|
|
|
|
|
|
|
|
|
def download():
|
|
|
|
|
"""
|
|
|
|
|
And now that's main part of the script. We need to:
|
2016-04-04 13:13:09 +04:00
|
|
|
|
|
2013-08-24 22:04:28 +04:00
|
|
|
|
* iterate over each other page of the forum
|
|
|
|
|
* collect all forum topics
|
|
|
|
|
* collect all torrents from topics
|
|
|
|
|
* check the integrity of downloaded stuff (or why we want to see the log?)
|
|
|
|
|
The magic begins...
|
|
|
|
|
"""
|
|
|
|
|
# Firstly, we need to download forum's startpage
|
|
|
|
|
cnsl_message("parsing_started")
|
2014-12-26 23:08:52 +04:00
|
|
|
|
startpage = commands.getoutput('curl -b ' + cookie[1] + ' ' + domain + '/forum/viewforum.php?f=' + sys.argv[1] + ' | iconv -f cp1251 -t utf-8')
|
2013-08-24 22:04:28 +04:00
|
|
|
|
# Finding forum name :D
|
|
|
|
|
for line in startpage.splitlines():
|
|
|
|
|
if 'maintitle' in line:
|
|
|
|
|
fname_raw = line
|
|
|
|
|
forumname = fname_raw.split('>')[3].split('<')[0]
|
|
|
|
|
cnsl_message("forum", forumname)
|
|
|
|
|
# And now we start our "good iteratorium"
|
|
|
|
|
topics = iteratorium([], 0)
|
|
|
|
|
cnsl_message("found_topics", str(len(topics)))
|
|
|
|
|
# Aaaand... the first real actions is here
|
2014-12-26 23:08:52 +04:00
|
|
|
|
torrentsdir = "%s/%s" % (curdir, sys.argv[1])
|
2016-04-04 13:13:09 +04:00
|
|
|
|
if not os.path.exists(torrentsdir):
|
|
|
|
|
os.makedirs(torrentsdir)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
get_torrent(topics, torrentsdir, forumname)
|
|
|
|
|
|
|
|
|
|
def parse_topic(topic, forumname, topics, torrents_count, torrentsdir):
|
|
|
|
|
topicpage = commands.getoutput('curl -b ' + cookie[1] + ' "' + domain + '/forum/' + topic + '" | iconv -f cp1251 -t utf-8')
|
|
|
|
|
for line in topicpage.splitlines():
|
|
|
|
|
if 'maintitle' in line:
|
|
|
|
|
tname_raw = line
|
|
|
|
|
topicname = tname_raw.split('>')[3].split('<')[0]
|
2014-12-26 23:08:52 +04:00
|
|
|
|
for line in topicpage.splitlines():
|
|
|
|
|
if 'не проверено модератором!' in line:
|
|
|
|
|
cnsl_message("skip_non_approved", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
|
|
|
|
|
return 340
|
|
|
|
|
for line in topicpage.splitlines():
|
|
|
|
|
if 'проверено модератором' in line:
|
|
|
|
|
tdate_raw = line
|
2016-04-04 13:13:09 +04:00
|
|
|
|
try:
|
|
|
|
|
tdate_raw
|
|
|
|
|
except NameError:
|
|
|
|
|
cnsl_message("skip_non_approved", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
|
|
|
|
|
return 340
|
2014-12-26 23:08:52 +04:00
|
|
|
|
# fix for russian months names (dateutil doesn't parse it, damnnt)
|
|
|
|
|
tdate_raw = tdate_raw.replace('Янв', '01').replace('Фев', '02').replace('Мар', '03').replace('Апр', '04').replace('Май', '05').replace('Июн', '06').replace('Июл', '07').replace('Авг', '08').replace('Сен', '09').replace('Окт', '10').replace('Ноя', '11').replace('Дек', '12')
|
|
|
|
|
topicdate = dateutil.parser.parse(tdate_raw.split('>')[1].split('<')[0].split(' ',3)[3]).strftime("%Y-%m-%d")
|
2013-08-24 22:04:28 +04:00
|
|
|
|
for line in topicpage.splitlines():
|
|
|
|
|
if 'download.php' in line:
|
|
|
|
|
for lvl2 in line.split('>'):
|
|
|
|
|
if 'download.php' in lvl2:
|
|
|
|
|
dl_raw = lvl2
|
|
|
|
|
downlink = dl_raw.split('"')[1]
|
2014-12-26 23:08:52 +04:00
|
|
|
|
cnsl_message("downloading_torrent", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic), topicdate)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
os.chdir(torrentsdir)
|
2016-04-04 13:13:09 +04:00
|
|
|
|
download_torrent(downlink, topicdate)
|
2013-08-24 22:04:28 +04:00
|
|
|
|
|
2016-04-04 13:13:09 +04:00
|
|
|
|
def download_torrent(downlink, topicdate):
|
|
|
|
|
basefilename = commands.getoutput('curl -sI -b ' + cookie[1] + ' -J -L "' + domain + '/forum/' + downlink + '"' + "| grep -o -E 'filename=.*$' | tr -d '\r\n' | sed -e 's/filename=//' | sed 's/\"//g'")
|
|
|
|
|
os.system('curl -b %s -L "%s/forum/%s" -o "%s %s" 2>> %s%s' % (cookie[1], domain, downlink, topicdate, basefilename, curdir, log_file))
|
2013-08-24 22:04:28 +04:00
|
|
|
|
|
|
|
|
|
def get_torrent(topics, torrentsdir, forumname):
|
|
|
|
|
cnsl_message("download_started")
|
|
|
|
|
torrents_count = 0
|
|
|
|
|
for topic in topics:
|
|
|
|
|
torrents_count = torrents_count + 1
|
|
|
|
|
parse_topic(topic, forumname, topics, torrents_count, torrentsdir)
|
|
|
|
|
|
|
|
|
|
def cleanup():
|
|
|
|
|
os.remove(cookie[1])
|
|
|
|
|
cnsl_message("done")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# That's da fuckin' magic here ;-)
|
|
|
|
|
|
|
|
|
|
check_params(sys.argv)
|
|
|
|
|
start_log()
|
|
|
|
|
check_requirements()
|
|
|
|
|
|
|
|
|
|
import curl, pycurl
|
|
|
|
|
|
|
|
|
|
cnsl_message("start")
|
|
|
|
|
connect()
|
|
|
|
|
download()
|
|
|
|
|
cleanup()
|