NNTP to IMAP duplicator

Annotation For nntpdup.py
anonymous

Annotation For nntpdup.py

Lines of nntpdup.py from check-in 973a1d241e that are changed by the sequence of edits moving toward check-in 295fec7f85:

973a1d241e 2015-10-13    1: #!/usr/bin/env python3.4
                         2: 
                         3: import configparser, email.utils, getpass, imaplib, nntplib, re, sqlite3, sys
                         4: imaplib._MAXLINE = 1024 * 1024
                         5: nntplib._MAXLINE = 1024 * 1024
                         6: 
                         7: config = configparser.ConfigParser(allow_no_value = True)
                         8: config.read('nntpdup.conf')
                         9: 
                        10: try:
                        11: 	server = nntplib.NNTP_SSL(config['connection']['newsserver'])
                        12: except nntplib.NNTPTemporaryError as err:
                        13: 	if err.response.startswith('400 load at '):
                        14: 		print(err.response)
                        15: 		exit(0)
                        16: 	else:
                        17: 		raise(err)
                        18: mserver = imaplib.IMAP4_SSL(config['connection']['mailserver'])
                        19: reMessageId = re.compile('(<[-\][a-zA-Z0-9@.%/=_\$+!&~#\?}]+>)"?\)\)(\d+ \(FLAGS\(\)\))?$')
                        20: mserver.login(config['connection']['mail_user'], config['connection']['mail_password'])
                        21: if 'mail_limit' in config['connection']:
                        22: 	mailLimit = int(config['connection']['mail_limit'])
                        23: else:
                        24: 	mailLimit = 100
                        25: if 'header_limit' in config['connection']:
                        26: 	headerLimit = int(config['connection']['header_limit'])
                        27: else:
                        28: 	headerLimit = 1000
                        29: 
                        30: tables = {
                        31: 	'list': ["create table list (id integer primary key, name text, last integer default 0);"],
                        32: 	'ids': ["create table ids (id integer, name text, mask integer, date integer);", "create unique index ids__id_name on ids(id, name);"],
                        33: }
                        34: 
                        35: class Folder:
                        36: 	def __init__(this, filename):
                        37: 		this.db = sqlite3.connect(filename)
                        38: 		this.id = None
                        39: 		found = set()
                        40: 		for row in this.db.execute("select name from sqlite_master where type = 'table';"):
                        41: 			found.add(row[0])
                        42: 		for absent in set(tables.keys()).difference(found):
                        43: 			for query in tables[absent]:
                        44: 				this.db.execute(query)
                        45: 
                        46: 	def select(this, folderName):
                        47: 		this.name = folderName
                        48: 		this.id = None
                        49: 		while True:
                        50: 			present = False
                        51: 			for row in this.db.execute("select id, last from list where name = ?;", [folderName]):
                        52: 				present = True
                        53: 				this.id = row[0]
                        54: 				this.last = row[1]
                        55: 			if present:
                        56: 				break
                        57: 			this.db.execute("insert into list(name) values (?);", [folderName])
                        58: 		if this.id == None:
                        59: 			print('Id not found.')
                        60: 			exit(1)
                        61: 		this.mask = {}
                        62: 		this.get_count()
                        63: 
                        64: 	def get_count(this):
                        65: 		this.count = 0
                        66: 		for row in this.db.execute("select count(*) from ids where id = ? and mask in (3, 1);", [this.id]):
                        67: 			this.count = row[0]
                        68: 
                        69: 	def get_record_count(this, mask):
                        70: 		for row in this.db.execute("select count(*) from ids where id = ? and mask = ?;", [this.id, mask]):
                        71: 			return(row[0])
                        72: 
                        73: 	def check(this, name):
                        74: 		if name in this.mask:
                        75: 			return(this.mask[name])
                        76: 		for row in this.db.execute("select mask from ids where id = ? and name = ?;", [this.id, name]):
                        77: 			this.mask[name] = row[0]
                        78: 			return(row[0])
                        79: 
                        80: 	def addlast(this, count):
                        81: 		this.last += count
                        82: 		this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
                        83: 
                        84: 	def droplast(this):
                        85: 		this.last = 0
                        86: 		this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
                        87: 
                        88: 	def addmail(this, mid):
                        89: 		mask = this.check(mid)
                        90: 		if mask in (3, 2):
                        91: 			this.db.execute("update ids set mask = 3 where id = ? and name = ?;", [this.id, mid])
                        92: 			this.mask[mid] = 3
                        93: 		else:
                        94: 			this.db.execute("insert into ids(id, name, mask) values(?, ?, ?);", [this.id, mid, 1])
                        95: 			this.count += 1
                        96: 			this.mask[mid] = 1
                        97: 
                        98: 	def addnews(this, mid, date = None):
                        99: 		mask = this.check(mid)
                       100: 		if mask in (1, 3):
                       101: 			this.db.execute("update ids set mask = 3, date = ? where id = ? and name = ?;", [date, this.id, mid])
                       102: 			this.mask[mid] = 3
                       103: 		else:
                       104: 			this.db.execute("insert into ids(id, name, mask, date) values(?, ?, ?, ?);", [this.id, mid, 2, date])
                       105: 			this.count += 1
                       106: 			this.mask[mid] = 2
                       107: 
                       108: 	def zeromail(this):
                       109: 		this.mask = {}
                       110: 		this.db.execute("update ids set mask = 2 where id = ? and mask = 3;", [this.id])
                       111: 		this.db.execute("delete from ids where id = ? and mask = 1;", [this.id])
                       112: 		this.sync()
                       113: 		this.get_count()
                       114: 
                       115: 	def zeronews(this):
                       116: 		this.mask = {}
                       117: 		this.db.execute("update ids set mask = 1 where id = ? and mask = 3;", [this.id])
                       118: 		this.db.execute("delete from ids where id = ? and mask = 2;", [this.id])
                       119: 		this.droplast()
                       120: 		this.sync()
                       121: 
                       122: 	def sync(this):
                       123: 		this.db.commit()
                       124: 
                       125: 	def get_unfetched(this):
                       126: 		return(this.db.execute("select name, date from ids where id = ? and mask = 2 order by date desc;", [this.id]))
                       127: 
                       128: 	def forget(this, mid):
                       129: 		this.db.execute("delete from ids where id = ? and name = ?;", [this.id, mid])
                       130: 
                       131: def check_folder(mserver, folder, folderName):
                       132: 	folder.zeromail()
                       133: 	deleted = 0
                       134: 	mserver.select(folderName)
                       135: 	typ, data = mserver.search(None, 'NOT DELETED')
                       136: 	count = 0
                       137: 	print(' - building imap index', folderName, '[', end='')
                       138: 	for num in data[0].split():
                       139: 		found = False
                       140: 		typ, data = mserver.fetch(num, '(ENVELOPE)')
                       141: 		field = 0
                       142: 		for rec in data:
                       143: 			if type(rec) == tuple:
                       144: 				data[field] = ''.join(i.decode('utf-8', 'ignore') for i in rec)
                       145: 			else:
                       146: 				data[field] = rec.decode('utf-8', 'ignore')
                       147: 			field += 1
                       148: 		data = ''.join(data)
                       149: 		isMid = reMessageId.search(data)
                       150: 		if isMid:
                       151: 			mid = isMid.group(1)
                       152: 			mask = folder.check(mid)
                       153: 			if not mask in (1, 3):
                       154: 				folder.addmail(mid)
                       155: 				count += 1
                       156: 			else:
                       157: 				mserver.store(num, '+FLAGS', '\\Deleted')
                       158: 				deleted += 1
                       159: 				sys.stdout.write('x')
                       160: 				sys.stdout.flush()
                       161: 		else:
                       162: 			print('Message id not found.')
                       163: 			print(repr(data))
                       164: 			exit(1)
                       165: 		if (count % 1000) == 0:
                       166: 			sys.stdout.write('.')
                       167: 			sys.stdout.flush()
                       168: 	print('], deleted:', deleted, end = '')
                       169: 	folder.sync()
                       170: 	mserver.expunge()
                       171: 
                       172: folder = Folder('nntpdup.sqlite')
                       173: 
                       174: limits = [0, 0]
                       175: limitSteps = [headerLimit / len(config['groups']), mailLimit / len(config['groups'])]
                       176: 
                       177: maxlength = 0
                       178: for folderName in (config['groups'].keys()):
                       179: 	maxlength = max(maxlength, len(folderName))
                       180: 
                       181: skew = 1 + int(maxlength / 8)
                       182: 
                       183: for folderName in (set(config['groups'].keys())):
                       184: 	stats = [0, 0]
                       185: 	folder.select(folderName)
                       186: 
                       187: 	resp = mserver.select(folderName)
                       188: 	if resp[0] != 'OK':
                       189: 		print("Can't open folder.")
                       190: 		exit(1)
                       191: 	if int(resp[1][0]) != folder.count:
                       192: 		check_folder(mserver, folder, folderName)
                       193: 
                       194: 	_, count, first, last, _ = server.group(folderName)
                       195: 	limits[0] += limitSteps[0]
                       196: 	if last > folder.last:
                       197: 		count = 0
                       198: 		# we need to fetch new ids
                       199: 		request = min(last, folder.last + limits[0])
                       200: 		try:
                       201: 			for record in server.over((int(folder.last) + 1, int(request)))[1]:
                       202: 				mid = record[1]['message-id']
                       203: 				if len(record[1]['message-id']) > 0:
                       204: 					try:
                       205: 						folder.addnews(record[1]['message-id'], email.utils.parsedate_to_datetime(record[1]['date']).timestamp())
                       206: 					except OverflowError as err:
                       207: 						folder.addnews(record[1]['message-id'])
                       208: 					except TypeError as err:
                       209: 						folder.addnews(record[1]['message-id'])
                       210: 				count += 1
                       211: 		except nntplib.NNTPTemporaryError as err:
                       212: 			if err.response.startswith('423 '):
                       213: 				pass
                       214: 			else:
                       215: 				raise(err)
                       216: 		except nntplib.NNTPPermanentError as err:
                       217: 			print(folder.last, request)
                       218: 			raise(err)
                       219: 		except sqlite3.IntegrityError as err:
                       220: 			print(repr(record))
                       221: 			print([x for x in map(repr, folder.db.execute("select * from ids where id = ? and name = ?;", [folder.id, record[1]['message-id']]))])
                       222: 			raise(err)
                       223: 		stats[0] = count
                       224: 		limits[0] -= count
                       225: 		folder.addlast(request - folder.last)
                       226: 		folder.sync()
                       227: 	elif folder.get_record_count(1) > 0:
                       228: 		folder.droplast()
                       229: 
                       230: 	limits[1] += limitSteps[1]
                       231: 	if folder.get_record_count(2) > 0:
                       232: 		count = 0
                       233: 		# there are extra articles
                       234: 		raw_date = []
                       235: 		unfetched = []
                       236: 		for item, env_date in folder.get_unfetched():
                       237: 			mask = folder.check(item)
                       238: 			if mask == 2:
                       239: 				unfetched += (item, env_date),
                       240: 		for item, env_date in unfetched:
                       241: 			try:
                       242: 				_, info = server.article(item)
                       243: 				if env_date == None or env_date < 0:
                       244: 					date = None
                       245: 					backup_date = None
                       246: 					out = []
                       247: 					for line in info.lines:
                       248: 						if len(line) == 0:
                       249: 							mesg = email.message_from_string('\n'.join(out))
                       250: 							for header in mesg._headers:
                       251: 								if header[0] == 'Date':
                       252: 									raw_date += header[1],
                       253: 									date = email.utils.parsedate(header[1])
                       254: 								elif header[0] == 'Original-Received':
                       255: 									raw_date += header[1],
                       256: 									tmp_date = email.utils.parsedate(header[1].split(';')[-1])
                       257: 									if tmp_date != None and tmp_date[0] >= 1970:
                       258: 										backup_date = tmp_date
                       259: 							if date == None and backup_date == None:
                       260: 								print('Date missed.')
                       261: 								print(repr(out))
                       262: 								exit(1)
                       263: 							elif date == None:
                       264: 								date = backup_date
                       265: 							break
                       266: 						try:
                       267: 							out.append(line.decode('ascii', 'ignore'))
                       268: 						except UnicodeDecodeError:
                       269: 							print(repr(line))
                       270: 							exit(1)
                       271: 					out.append('\n')
                       272: 					try:
                       273: 						#print('*', item, date, type(date))
                       274: 						mserver.append(folderName, None, date, b'\n'.join(info.lines))
                       275: 					except AttributeError as err:
                       276: 						#print('*', item, raw_date, repr(date))
                       277: 						#raise(err)
                       278: 						mserver.append(folderName, None, backup_date, b'\n'.join(info.lines))
                       279: 					except OverflowError as err:
                       280: 						#print('*', item, raw_date, repr(date))
                       281: 						#raise(err)
                       282: 						mserver.append(folderName, None, backup_date, b'\n'.join(info.lines))
                       283: 				else:
                       284: 					#print('*', item, env_date, type(env_date))
                       285: 					mserver.append(folderName, None, env_date, b'\n'.join(info.lines))
                       286: 				folder.addmail(item)
                       287: 				folder.sync()
                       288: 				count += 1
                       289: 				if count >= limits[1]:
                       290: 					break
                       291: 			except nntplib.NNTPTemporaryError as err:
                       292: 				if err.response.startswith('430 No such article'):
                       293: 					folder.forget(item)
                       294: 				else:
                       295: 					print(err.response, item, env_date)
                       296: 					raise(err)
                       297: 		stats[1] = count
                       298: 		limits[1] -= count
                       299: 
                       300: 	if stats[0] != 0 or stats[1] != 0:
                       301: 		print('# ', folderName, '\t'*(skew - int((len(folderName) + 2) / 8)), '\t'.join(map(str, stats)), sep = '')
                       302: 	folder.sync()