1
uploader_tools/massdl.py
Vladimir Hodakov efc529c145
Add forgotten stuff like gitignore, update copyrights
This repository track changes in my life and my self-identification,
resulting in three names of copyright holder within seven years,
including one nickname and two realnames.

Adding .gitignore, __init__.py and trying to fix script
in 2019 before abandoning forever due to abandoned Python 2.
2019-09-14 16:56:09 +04:00

237 lines
8.6 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python2.7
# *-* encoding: utf-8 *-*
# For NNM-Club Uploaders
# Copyright (c) 2012-2019 Vladimir Hodakov <vladimir@hodakov.me>
#
# Usage:
#
# python2 ./massdl.py [forum_number]
#
# For getting help, execute "python2 ./massdl.py" without params
import time, tempfile, os, sys, commands
import lib.config as uconfig
import lib.messages as umessages
import dateutil
from dateutil.parser import *
version = 0.92
unixtime = int(time.time())
cookie = tempfile.mkstemp()
curdir = uconfig.workdir
log_file = "/%i-massdl.log" % unixtime
domain = uconfig.domain
def write_to_log(imessage):
log = open(curdir + log_file, 'a')
log.write(imessage)
log.close()
def cnsl_message(imessage, *args):
if imessage == "invpyver":
print umessages.c066
write_to_log(umessages.m066)
sys.exit(666)
elif imessage == "nocurl":
print umessages.c067
write_to_log(umessages.m067)
sys.exit(666)
elif imessage == "help":
print umessages.c000 % version
sys.exit(0)
elif imessage == "start":
print umessages.c010 % (sys.argv[0], version)
print umessages.c020 % sys.argv[1]
write_to_log(umessages.m020)
elif imessage == "login_ok":
print umessages.c120 % set_username()[0]
write_to_log(umessages.m120 % cookie[1])
elif imessage == "login_failed":
print umessages.c166 % set_username()[0]
write_to_log(umessages.m166)
sys.exit(666)
elif imessage == "parsing_started":
print umessages.c200
elif imessage == "forum":
print umessages.c220 % args[0]
write_to_log(umessages.m220 % args[0])
elif imessage == "pages":
print umessages.c210 % args[0]
write_to_log(umessages.m210 % args[0])
elif imessage == "found_topics":
print umessages.c221 % args[0]
write_to_log(umessages.m221 % args[0])
elif imessage == "download_started":
print umessages.c300
elif imessage == "downloading_torrent":
print umessages.c310 % (args[0], args[1], args[2], args[4])
write_to_log(umessages.m310 % (args[0], args[1], args[3]))
elif imessage == "skip_non_approved":
print umessages.c340 % (args[0], args[1], args[2])
write_to_log(umessages.m340 % (args[0], args[1], args[2], args[3]))
elif imessage == "done":
print umessages.c021
write_to_log(umessages.m021)
sys.exit(0)
def check_requirements():
req_py_version = (2,7)
unsupported_py_version = (3,0)
cur_version = sys.version_info
if cur_version >= req_py_version:
if cur_version < unsupported_py_version:
pass;
else:
cnsl_message("invpyver")
else:
cnsl_message("invpyver")
try:
import curl
import pycurl
except ImportError:
cnsl_message("nocurl")
def check_params(params):
"""
Checking script parameters: if there is less or more than one param -- show help. If there's one param: check ones consistency!
"""
if len(params) != 2:
cnsl_message("help")
else:
if str(params[1]).isdigit():
pass;
else:
cnsl_message("help")
def start_log():
log = open(curdir + log_file, 'w')
log.write(umessages.m000 % version)
log.close()
def set_username():
return (uconfig.username, uconfig.password)
def connect():
cookiefile = open(cookie[1], 'r+w')
write_to_log(umessages.m025 % domain)
cookie_txt = commands.getoutput('curl -c - -d "username=' + set_username()[0] + '&password=' + set_username()[1] + '&autologin=on&login=%C2%F5%EE%E4&redirect=index.php" "' + domain + '/forum/login.php" 2>>' + curdir + log_file )
if cookie_txt.find("Cookie") != -1:
# If it's -1, then there is no cookie!
cookiefile.write(cookie_txt)
print cookiefile.read()
cnsl_message("login_ok")
else:
cnsl_message("login_failed")
sys.exit(666)
def iteratorium(links_array, offset):
"""
Links array -- array with links to topics containing torrents. Offset -- load parameter (for pages)
"""
cnsl_message("pages", str((offset / 50) + 1))
forum_page = commands.getoutput('curl -b ' + cookie[1] + ' "' + domain + '/forum/viewforum.php?f=' + sys.argv[1] + '&start=' + str(offset) + '" | iconv -f cp1251 -t utf-8')
for line in forum_page.splitlines():
if 'viewtopic.php' in line:
if 'DL:' in line:
for substring in line.split('"'):
if 'viewtopic.php' in substring:
links_array.append(substring.split('&')[0])
# Check for the next page
if forum_page.find("След.") != -1:
iteratorium(links_array, offset + 50)
clean_array = []
for item in links_array:
if item not in clean_array:
clean_array.append(item)
return clean_array
def download():
"""
And now that's main part of the script. We need to:
* iterate over each other page of the forum
* collect all forum topics
* collect all torrents from topics
* check the integrity of downloaded stuff (or why we want to see the log?)
The magic begins...
"""
# Firstly, we need to download forum's startpage
cnsl_message("parsing_started")
startpage = commands.getoutput('curl -b ' + cookie[1] + ' ' + domain + '/forum/viewforum.php?f=' + sys.argv[1] + ' | iconv -f cp1251 -t utf-8')
# Finding forum name :D
for line in startpage.splitlines():
if 'maintitle' in line:
fname_raw = line
forumname = fname_raw.split('>')[3].split('<')[0]
cnsl_message("forum", forumname)
# And now we start our "good iteratorium"
topics = iteratorium([], 0)
cnsl_message("found_topics", str(len(topics)))
# Aaaand... the first real actions is here
torrentsdir = "%s/%s" % (curdir, sys.argv[1])
if not os.path.exists(torrentsdir):
os.makedirs(torrentsdir)
get_torrent(topics, torrentsdir, forumname)
def parse_topic(topic, forumname, topics, torrents_count, torrentsdir):
topicpage = commands.getoutput('curl -b ' + cookie[1] + ' "' + domain + '/forum/' + topic + '" | iconv -f cp1251 -t utf-8')
for line in topicpage.splitlines():
if 'maintitle' in line:
tname_raw = line
topicname = tname_raw.split('>')[3].split('<')[0]
for line in topicpage.splitlines():
if 'не проверено модератором!' in line:
cnsl_message("skip_non_approved", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
return 340
for line in topicpage.splitlines():
if 'проверено модератором' in line:
tdate_raw = line
try:
tdate_raw
except NameError:
cnsl_message("skip_non_approved", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic))
return 340
# fix for russian months names (dateutil doesn't parse it, damnnt)
tdate_raw = tdate_raw.replace('Янв', '01').replace('Фев', '02').replace('Мар', '03').replace('Апр', '04').replace('Май', '05').replace('Июн', '06').replace('Июл', '07').replace('Авг', '08').replace('Сен', '09').replace('Окт', '10').replace('Ноя', '11').replace('Дек', '12')
topicdate = dateutil.parser.parse(tdate_raw.split('>')[1].split('<')[0].split(' ',3)[3]).strftime("%Y-%m-%d")
for line in topicpage.splitlines():
if 'download.php' in line:
for lvl2 in line.split('>'):
if 'download.php' in lvl2:
dl_raw = lvl2
downlink = dl_raw.split('"')[1]
cnsl_message("downloading_torrent", str(torrents_count), str(len(topics)), topicname, (domain + "/" + topic), topicdate)
os.chdir(torrentsdir)
download_torrent(downlink, topicdate)
def download_torrent(downlink, topicdate):
basefilename = commands.getoutput('curl -sI -b ' + cookie[1] + ' -J -L "' + domain + '/forum/' + downlink + '"' + "| grep -o -E 'filename=.*$' | tr -d '\r\n' | sed -e 's/filename=//' | sed 's/\"//g'")
os.system('curl -b %s -L "%s/forum/%s" -o "%s %s" 2>> %s%s' % (cookie[1], domain, downlink, topicdate, basefilename, curdir, log_file))
def get_torrent(topics, torrentsdir, forumname):
cnsl_message("download_started")
torrents_count = 0
for topic in topics:
torrents_count = torrents_count + 1
parse_topic(topic, forumname, topics, torrents_count, torrentsdir)
def cleanup():
os.remove(cookie[1])
cnsl_message("done")
# That's da fuckin' magic here ;-)
check_params(sys.argv)
start_log()
check_requirements()
import curl, pycurl
cnsl_message("start")
connect()
download()
cleanup()