NNTP to IMAP duplicator

Annotation For nntpdup.py
anonymous

Annotation For nntpdup.py

Origin for each line in nntpdup.py from check-in 7513432477:

295fec7f85 2016-09-17    1: #!/usr/bin/env python3
3205f8a9ae 2015-10-12    2: 
3205f8a9ae 2015-10-12    3: import configparser, email.utils, getpass, imaplib, nntplib, re, sqlite3, sys
7513432477 2018-10-28    4: imaplib._MAXLINE = 1024 * 1024 * 4
7513432477 2018-10-28    5: nntplib._MAXLINE = 1024 * 1024 * 4
3205f8a9ae 2015-10-12    6: 
3205f8a9ae 2015-10-12    7: config = configparser.ConfigParser(allow_no_value = True)
3205f8a9ae 2015-10-12    8: config.read('nntpdup.conf')
3205f8a9ae 2015-10-12    9: 
973a1d241e 2015-10-13   10: try:
7513432477 2018-10-28   11: 	#server = nntplib.NNTP_SSL(config['connection']['newsserver'])
7513432477 2018-10-28   12: 	server = nntplib.NNTP(config['connection']['newsserver'])
973a1d241e 2015-10-13   13: except nntplib.NNTPTemporaryError as err:
973a1d241e 2015-10-13   14: 	if err.response.startswith('400 load at '):
973a1d241e 2015-10-13   15: 		print(err.response)
973a1d241e 2015-10-13   16: 		exit(0)
973a1d241e 2015-10-13   17: 	else:
973a1d241e 2015-10-13   18: 		raise(err)
3205f8a9ae 2015-10-12   19: mserver = imaplib.IMAP4_SSL(config['connection']['mailserver'])
3205f8a9ae 2015-10-12   20: reMessageId = re.compile('(<[-\][a-zA-Z0-9@.%/=_\$+!&~#\?}]+>)"?\)\)(\d+ \(FLAGS\(\)\))?$')
3205f8a9ae 2015-10-12   21: mserver.login(config['connection']['mail_user'], config['connection']['mail_password'])
3205f8a9ae 2015-10-12   22: if 'mail_limit' in config['connection']:
3205f8a9ae 2015-10-12   23: 	mailLimit = int(config['connection']['mail_limit'])
3205f8a9ae 2015-10-12   24: else:
3205f8a9ae 2015-10-12   25: 	mailLimit = 100
3205f8a9ae 2015-10-12   26: if 'header_limit' in config['connection']:
3205f8a9ae 2015-10-12   27: 	headerLimit = int(config['connection']['header_limit'])
3205f8a9ae 2015-10-12   28: else:
3205f8a9ae 2015-10-12   29: 	headerLimit = 1000
3205f8a9ae 2015-10-12   30: 
3205f8a9ae 2015-10-12   31: tables = {
3205f8a9ae 2015-10-12   32: 	'list': ["create table list (id integer primary key, name text, last integer default 0);"],
3205f8a9ae 2015-10-12   33: 	'ids': ["create table ids (id integer, name text, mask integer, date integer);", "create unique index ids__id_name on ids(id, name);"],
3205f8a9ae 2015-10-12   34: }
3205f8a9ae 2015-10-12   35: 
3205f8a9ae 2015-10-12   36: class Folder:
3205f8a9ae 2015-10-12   37: 	def __init__(this, filename):
3205f8a9ae 2015-10-12   38: 		this.db = sqlite3.connect(filename)
3205f8a9ae 2015-10-12   39: 		this.id = None
3205f8a9ae 2015-10-12   40: 		found = set()
3205f8a9ae 2015-10-12   41: 		for row in this.db.execute("select name from sqlite_master where type = 'table';"):
3205f8a9ae 2015-10-12   42: 			found.add(row[0])
3205f8a9ae 2015-10-12   43: 		for absent in set(tables.keys()).difference(found):
3205f8a9ae 2015-10-12   44: 			for query in tables[absent]:
3205f8a9ae 2015-10-12   45: 				this.db.execute(query)
3205f8a9ae 2015-10-12   46: 
3205f8a9ae 2015-10-12   47: 	def select(this, folderName):
3205f8a9ae 2015-10-12   48: 		this.name = folderName
3205f8a9ae 2015-10-12   49: 		this.id = None
3205f8a9ae 2015-10-12   50: 		while True:
3205f8a9ae 2015-10-12   51: 			present = False
3205f8a9ae 2015-10-12   52: 			for row in this.db.execute("select id, last from list where name = ?;", [folderName]):
3205f8a9ae 2015-10-12   53: 				present = True
3205f8a9ae 2015-10-12   54: 				this.id = row[0]
3205f8a9ae 2015-10-12   55: 				this.last = row[1]
3205f8a9ae 2015-10-12   56: 			if present:
3205f8a9ae 2015-10-12   57: 				break
3205f8a9ae 2015-10-12   58: 			this.db.execute("insert into list(name) values (?);", [folderName])
3205f8a9ae 2015-10-12   59: 		if this.id == None:
3205f8a9ae 2015-10-12   60: 			print('Id not found.')
3205f8a9ae 2015-10-12   61: 			exit(1)
3205f8a9ae 2015-10-12   62: 		this.mask = {}
3205f8a9ae 2015-10-12   63: 		this.get_count()
3205f8a9ae 2015-10-12   64: 
3205f8a9ae 2015-10-12   65: 	def get_count(this):
3205f8a9ae 2015-10-12   66: 		this.count = 0
3205f8a9ae 2015-10-12   67: 		for row in this.db.execute("select count(*) from ids where id = ? and mask in (3, 1);", [this.id]):
3205f8a9ae 2015-10-12   68: 			this.count = row[0]
3205f8a9ae 2015-10-12   69: 
3205f8a9ae 2015-10-12   70: 	def get_record_count(this, mask):
3205f8a9ae 2015-10-12   71: 		for row in this.db.execute("select count(*) from ids where id = ? and mask = ?;", [this.id, mask]):
3205f8a9ae 2015-10-12   72: 			return(row[0])
3205f8a9ae 2015-10-12   73: 
3205f8a9ae 2015-10-12   74: 	def check(this, name):
3205f8a9ae 2015-10-12   75: 		if name in this.mask:
3205f8a9ae 2015-10-12   76: 			return(this.mask[name])
3205f8a9ae 2015-10-12   77: 		for row in this.db.execute("select mask from ids where id = ? and name = ?;", [this.id, name]):
3205f8a9ae 2015-10-12   78: 			this.mask[name] = row[0]
3205f8a9ae 2015-10-12   79: 			return(row[0])
3205f8a9ae 2015-10-12   80: 
3205f8a9ae 2015-10-12   81: 	def addlast(this, count):
3205f8a9ae 2015-10-12   82: 		this.last += count
3205f8a9ae 2015-10-12   83: 		this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
3205f8a9ae 2015-10-12   84: 
3205f8a9ae 2015-10-12   85: 	def droplast(this):
3205f8a9ae 2015-10-12   86: 		this.last = 0
3205f8a9ae 2015-10-12   87: 		this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
3205f8a9ae 2015-10-12   88: 
3205f8a9ae 2015-10-12   89: 	def addmail(this, mid):
3205f8a9ae 2015-10-12   90: 		mask = this.check(mid)
3205f8a9ae 2015-10-12   91: 		if mask in (3, 2):
3205f8a9ae 2015-10-12   92: 			this.db.execute("update ids set mask = 3 where id = ? and name = ?;", [this.id, mid])
3205f8a9ae 2015-10-12   93: 			this.mask[mid] = 3
3205f8a9ae 2015-10-12   94: 		else:
3205f8a9ae 2015-10-12   95: 			this.db.execute("insert into ids(id, name, mask) values(?, ?, ?);", [this.id, mid, 1])
3205f8a9ae 2015-10-12   96: 			this.count += 1
3205f8a9ae 2015-10-12   97: 			this.mask[mid] = 1
3205f8a9ae 2015-10-12   98: 
3205f8a9ae 2015-10-12   99: 	def addnews(this, mid, date = None):
3205f8a9ae 2015-10-12  100: 		mask = this.check(mid)
3205f8a9ae 2015-10-12  101: 		if mask in (1, 3):
3205f8a9ae 2015-10-12  102: 			this.db.execute("update ids set mask = 3, date = ? where id = ? and name = ?;", [date, this.id, mid])
3205f8a9ae 2015-10-12  103: 			this.mask[mid] = 3
3205f8a9ae 2015-10-12  104: 		else:
3205f8a9ae 2015-10-12  105: 			this.db.execute("insert into ids(id, name, mask, date) values(?, ?, ?, ?);", [this.id, mid, 2, date])
3205f8a9ae 2015-10-12  106: 			this.count += 1
3205f8a9ae 2015-10-12  107: 			this.mask[mid] = 2
3205f8a9ae 2015-10-12  108: 
3205f8a9ae 2015-10-12  109: 	def zeromail(this):
3205f8a9ae 2015-10-12  110: 		this.mask = {}
3205f8a9ae 2015-10-12  111: 		this.db.execute("update ids set mask = 2 where id = ? and mask = 3;", [this.id])
3205f8a9ae 2015-10-12  112: 		this.db.execute("delete from ids where id = ? and mask = 1;", [this.id])
3205f8a9ae 2015-10-12  113: 		this.sync()
3205f8a9ae 2015-10-12  114: 		this.get_count()
3205f8a9ae 2015-10-12  115: 
3205f8a9ae 2015-10-12  116: 	def zeronews(this):
3205f8a9ae 2015-10-12  117: 		this.mask = {}
3205f8a9ae 2015-10-12  118: 		this.db.execute("update ids set mask = 1 where id = ? and mask = 3;", [this.id])
3205f8a9ae 2015-10-12  119: 		this.db.execute("delete from ids where id = ? and mask = 2;", [this.id])
3205f8a9ae 2015-10-12  120: 		this.droplast()
3205f8a9ae 2015-10-12  121: 		this.sync()
3205f8a9ae 2015-10-12  122: 
3205f8a9ae 2015-10-12  123: 	def sync(this):
3205f8a9ae 2015-10-12  124: 		this.db.commit()
3205f8a9ae 2015-10-12  125: 
3205f8a9ae 2015-10-12  126: 	def get_unfetched(this):
3205f8a9ae 2015-10-12  127: 		return(this.db.execute("select name, date from ids where id = ? and mask = 2 order by date desc;", [this.id]))
3205f8a9ae 2015-10-12  128: 
3205f8a9ae 2015-10-12  129: 	def forget(this, mid):
3205f8a9ae 2015-10-12  130: 		this.db.execute("delete from ids where id = ? and name = ?;", [this.id, mid])
3205f8a9ae 2015-10-12  131: 
3205f8a9ae 2015-10-12  132: def check_folder(mserver, folder, folderName):
3205f8a9ae 2015-10-12  133: 	folder.zeromail()
3205f8a9ae 2015-10-12  134: 	deleted = 0
3205f8a9ae 2015-10-12  135: 	mserver.select(folderName)
3205f8a9ae 2015-10-12  136: 	typ, data = mserver.search(None, 'NOT DELETED')
3205f8a9ae 2015-10-12  137: 	count = 0
3205f8a9ae 2015-10-12  138: 	print(' - building imap index', folderName, '[', end='')
3205f8a9ae 2015-10-12  139: 	for num in data[0].split():
3205f8a9ae 2015-10-12  140: 		found = False
3205f8a9ae 2015-10-12  141: 		typ, data = mserver.fetch(num, '(ENVELOPE)')
3205f8a9ae 2015-10-12  142: 		field = 0
3205f8a9ae 2015-10-12  143: 		for rec in data:
3205f8a9ae 2015-10-12  144: 			if type(rec) == tuple:
3205f8a9ae 2015-10-12  145: 				data[field] = ''.join(i.decode('utf-8', 'ignore') for i in rec)
3205f8a9ae 2015-10-12  146: 			else:
3205f8a9ae 2015-10-12  147: 				data[field] = rec.decode('utf-8', 'ignore')
3205f8a9ae 2015-10-12  148: 			field += 1
3205f8a9ae 2015-10-12  149: 		data = ''.join(data)
3205f8a9ae 2015-10-12  150: 		isMid = reMessageId.search(data)
3205f8a9ae 2015-10-12  151: 		if isMid:
3205f8a9ae 2015-10-12  152: 			mid = isMid.group(1)
3205f8a9ae 2015-10-12  153: 			mask = folder.check(mid)
3205f8a9ae 2015-10-12  154: 			if not mask in (1, 3):
3205f8a9ae 2015-10-12  155: 				folder.addmail(mid)
3205f8a9ae 2015-10-12  156: 				count += 1
3205f8a9ae 2015-10-12  157: 			else:
3205f8a9ae 2015-10-12  158: 				mserver.store(num, '+FLAGS', '\\Deleted')
3205f8a9ae 2015-10-12  159: 				deleted += 1
3205f8a9ae 2015-10-12  160: 				sys.stdout.write('x')
3205f8a9ae 2015-10-12  161: 				sys.stdout.flush()
3205f8a9ae 2015-10-12  162: 		else:
3205f8a9ae 2015-10-12  163: 			print('Message id not found.')
3205f8a9ae 2015-10-12  164: 			print(repr(data))
3205f8a9ae 2015-10-12  165: 			exit(1)
3205f8a9ae 2015-10-12  166: 		if (count % 1000) == 0:
3205f8a9ae 2015-10-12  167: 			sys.stdout.write('.')
3205f8a9ae 2015-10-12  168: 			sys.stdout.flush()
7513432477 2018-10-28  169: 	print('], deleted:', deleted)
3205f8a9ae 2015-10-12  170: 	folder.sync()
3205f8a9ae 2015-10-12  171: 	mserver.expunge()
3205f8a9ae 2015-10-12  172: 
3205f8a9ae 2015-10-12  173: folder = Folder('nntpdup.sqlite')
3205f8a9ae 2015-10-12  174: 
3205f8a9ae 2015-10-12  175: limits = [0, 0]
3205f8a9ae 2015-10-12  176: limitSteps = [headerLimit / len(config['groups']), mailLimit / len(config['groups'])]
3205f8a9ae 2015-10-12  177: 
3205f8a9ae 2015-10-12  178: maxlength = 0
3205f8a9ae 2015-10-12  179: for folderName in (config['groups'].keys()):
3205f8a9ae 2015-10-12  180: 	maxlength = max(maxlength, len(folderName))
3205f8a9ae 2015-10-12  181: 
3205f8a9ae 2015-10-12  182: skew = 1 + int(maxlength / 8)
3205f8a9ae 2015-10-12  183: 
3205f8a9ae 2015-10-12  184: for folderName in (set(config['groups'].keys())):
3205f8a9ae 2015-10-12  185: 	stats = [0, 0]
3205f8a9ae 2015-10-12  186: 	folder.select(folderName)
7513432477 2018-10-28  187: 	localFolderName = folderName
3205f8a9ae 2015-10-12  188: 
7513432477 2018-10-28  189: 	resp = mserver.select(localFolderName)
7513432477 2018-10-28  190: 	print('#--', localFolderName, ':', resp)
3205f8a9ae 2015-10-12  191: 	if resp[0] != 'OK':
7513432477 2018-10-28  192: 		localFolderName = folderName.replace('.', '/')
7513432477 2018-10-28  193: 		resp = mserver.select(localFolderName)
7513432477 2018-10-28  194: 		if resp[0] != 'OK':
7513432477 2018-10-28  195: 			print("Can't open folder.")
7513432477 2018-10-28  196: 			exit(1)
3205f8a9ae 2015-10-12  197: 	if int(resp[1][0]) != folder.count:
7513432477 2018-10-28  198: 		check_folder(mserver, folder, localFolderName)
3205f8a9ae 2015-10-12  199: 
3205f8a9ae 2015-10-12  200: 	_, count, first, last, _ = server.group(folderName)
3205f8a9ae 2015-10-12  201: 	limits[0] += limitSteps[0]
3205f8a9ae 2015-10-12  202: 	if last > folder.last:
3205f8a9ae 2015-10-12  203: 		count = 0
3205f8a9ae 2015-10-12  204: 		# we need to fetch new ids
3205f8a9ae 2015-10-12  205: 		request = min(last, folder.last + limits[0])
3205f8a9ae 2015-10-12  206: 		try:
3205f8a9ae 2015-10-12  207: 			for record in server.over((int(folder.last) + 1, int(request)))[1]:
3205f8a9ae 2015-10-12  208: 				mid = record[1]['message-id']
3205f8a9ae 2015-10-12  209: 				if len(record[1]['message-id']) > 0:
3205f8a9ae 2015-10-12  210: 					try:
3205f8a9ae 2015-10-12  211: 						folder.addnews(record[1]['message-id'], email.utils.parsedate_to_datetime(record[1]['date']).timestamp())
3205f8a9ae 2015-10-12  212: 					except OverflowError as err:
3205f8a9ae 2015-10-12  213: 						folder.addnews(record[1]['message-id'])
3205f8a9ae 2015-10-12  214: 					except TypeError as err:
3205f8a9ae 2015-10-12  215: 						folder.addnews(record[1]['message-id'])
3205f8a9ae 2015-10-12  216: 				count += 1
3205f8a9ae 2015-10-12  217: 		except nntplib.NNTPTemporaryError as err:
3205f8a9ae 2015-10-12  218: 			if err.response.startswith('423 '):
3205f8a9ae 2015-10-12  219: 				pass
3205f8a9ae 2015-10-12  220: 			else:
3205f8a9ae 2015-10-12  221: 				raise(err)
3205f8a9ae 2015-10-12  222: 		except nntplib.NNTPPermanentError as err:
3205f8a9ae 2015-10-12  223: 			print(folder.last, request)
3205f8a9ae 2015-10-12  224: 			raise(err)
3205f8a9ae 2015-10-12  225: 		except sqlite3.IntegrityError as err:
3205f8a9ae 2015-10-12  226: 			print(repr(record))
3205f8a9ae 2015-10-12  227: 			print([x for x in map(repr, folder.db.execute("select * from ids where id = ? and name = ?;", [folder.id, record[1]['message-id']]))])
3205f8a9ae 2015-10-12  228: 			raise(err)
3205f8a9ae 2015-10-12  229: 		stats[0] = count
3205f8a9ae 2015-10-12  230: 		limits[0] -= count
3205f8a9ae 2015-10-12  231: 		folder.addlast(request - folder.last)
3205f8a9ae 2015-10-12  232: 		folder.sync()
3205f8a9ae 2015-10-12  233: 	elif folder.get_record_count(1) > 0:
3205f8a9ae 2015-10-12  234: 		folder.droplast()
3205f8a9ae 2015-10-12  235: 
3205f8a9ae 2015-10-12  236: 	limits[1] += limitSteps[1]
3205f8a9ae 2015-10-12  237: 	if folder.get_record_count(2) > 0:
3205f8a9ae 2015-10-12  238: 		count = 0
3205f8a9ae 2015-10-12  239: 		# there are extra articles
3205f8a9ae 2015-10-12  240: 		raw_date = []
3205f8a9ae 2015-10-12  241: 		unfetched = []
3205f8a9ae 2015-10-12  242: 		for item, env_date in folder.get_unfetched():
3205f8a9ae 2015-10-12  243: 			mask = folder.check(item)
3205f8a9ae 2015-10-12  244: 			if mask == 2:
3205f8a9ae 2015-10-12  245: 				unfetched += (item, env_date),
3205f8a9ae 2015-10-12  246: 		for item, env_date in unfetched:
3205f8a9ae 2015-10-12  247: 			try:
3205f8a9ae 2015-10-12  248: 				_, info = server.article(item)
3205f8a9ae 2015-10-12  249: 				if env_date == None or env_date < 0:
3205f8a9ae 2015-10-12  250: 					date = None
3205f8a9ae 2015-10-12  251: 					backup_date = None
3205f8a9ae 2015-10-12  252: 					out = []
3205f8a9ae 2015-10-12  253: 					for line in info.lines:
3205f8a9ae 2015-10-12  254: 						if len(line) == 0:
3205f8a9ae 2015-10-12  255: 							mesg = email.message_from_string('\n'.join(out))
3205f8a9ae 2015-10-12  256: 							for header in mesg._headers:
3205f8a9ae 2015-10-12  257: 								if header[0] == 'Date':
3205f8a9ae 2015-10-12  258: 									raw_date += header[1],
3205f8a9ae 2015-10-12  259: 									date = email.utils.parsedate(header[1])
3205f8a9ae 2015-10-12  260: 								elif header[0] == 'Original-Received':
3205f8a9ae 2015-10-12  261: 									raw_date += header[1],
3205f8a9ae 2015-10-12  262: 									tmp_date = email.utils.parsedate(header[1].split(';')[-1])
3205f8a9ae 2015-10-12  263: 									if tmp_date != None and tmp_date[0] >= 1970:
3205f8a9ae 2015-10-12  264: 										backup_date = tmp_date
3205f8a9ae 2015-10-12  265: 							if date == None and backup_date == None:
3205f8a9ae 2015-10-12  266: 								print('Date missed.')
3205f8a9ae 2015-10-12  267: 								print(repr(out))
3205f8a9ae 2015-10-12  268: 								exit(1)
3205f8a9ae 2015-10-12  269: 							elif date == None:
3205f8a9ae 2015-10-12  270: 								date = backup_date
3205f8a9ae 2015-10-12  271: 							break
3205f8a9ae 2015-10-12  272: 						try:
3205f8a9ae 2015-10-12  273: 							out.append(line.decode('ascii', 'ignore'))
3205f8a9ae 2015-10-12  274: 						except UnicodeDecodeError:
3205f8a9ae 2015-10-12  275: 							print(repr(line))
3205f8a9ae 2015-10-12  276: 							exit(1)
3205f8a9ae 2015-10-12  277: 					out.append('\n')
3205f8a9ae 2015-10-12  278: 					try:
3205f8a9ae 2015-10-12  279: 						#print('*', item, date, type(date))
7513432477 2018-10-28  280: 						mserver.append(localFolderName, None, date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12  281: 					except AttributeError as err:
3205f8a9ae 2015-10-12  282: 						#print('*', item, raw_date, repr(date))
3205f8a9ae 2015-10-12  283: 						#raise(err)
7513432477 2018-10-28  284: 						mserver.append(localFolderName, None, backup_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12  285: 					except OverflowError as err:
3205f8a9ae 2015-10-12  286: 						#print('*', item, raw_date, repr(date))
3205f8a9ae 2015-10-12  287: 						#raise(err)
7513432477 2018-10-28  288: 						mserver.append(localFolderName, None, backup_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12  289: 				else:
3205f8a9ae 2015-10-12  290: 					#print('*', item, env_date, type(env_date))
7513432477 2018-10-28  291: 					mserver.append(localFolderName, None, env_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12  292: 				folder.addmail(item)
3205f8a9ae 2015-10-12  293: 				folder.sync()
3205f8a9ae 2015-10-12  294: 				count += 1
3205f8a9ae 2015-10-12  295: 				if count >= limits[1]:
3205f8a9ae 2015-10-12  296: 					break
3205f8a9ae 2015-10-12  297: 			except nntplib.NNTPTemporaryError as err:
3205f8a9ae 2015-10-12  298: 				if err.response.startswith('430 No such article'):
3205f8a9ae 2015-10-12  299: 					folder.forget(item)
3205f8a9ae 2015-10-12  300: 				else:
3205f8a9ae 2015-10-12  301: 					print(err.response, item, env_date)
3205f8a9ae 2015-10-12  302: 					raise(err)
3205f8a9ae 2015-10-12  303: 		stats[1] = count
3205f8a9ae 2015-10-12  304: 		limits[1] -= count
3205f8a9ae 2015-10-12  305: 
3205f8a9ae 2015-10-12  306: 	if stats[0] != 0 or stats[1] != 0:
3205f8a9ae 2015-10-12  307: 		print('# ', folderName, '\t'*(skew - int((len(folderName) + 2) / 8)), '\t'.join(map(str, stats)), sep = '')
3205f8a9ae 2015-10-12  308: 	folder.sync()