3205f8a9ae 2015-10-12 1: #!/usr/bin/env python3.4
3205f8a9ae 2015-10-12 2:
3205f8a9ae 2015-10-12 3: import configparser, email.utils, getpass, imaplib, nntplib, re, sqlite3, sys
3205f8a9ae 2015-10-12 4: imaplib._MAXLINE = 1024 * 1024
3205f8a9ae 2015-10-12 5: nntplib._MAXLINE = 1024 * 1024
3205f8a9ae 2015-10-12 6:
3205f8a9ae 2015-10-12 7: config = configparser.ConfigParser(allow_no_value = True)
3205f8a9ae 2015-10-12 8: config.read('nntpdup.conf')
3205f8a9ae 2015-10-12 9:
3205f8a9ae 2015-10-12 10: server = nntplib.NNTP_SSL(config['connection']['newsserver'])
3205f8a9ae 2015-10-12 11: mserver = imaplib.IMAP4_SSL(config['connection']['mailserver'])
3205f8a9ae 2015-10-12 12: reMessageId = re.compile('(<[-\][a-zA-Z0-9@.%/=_\$+!&~#\?}]+>)"?\)\)(\d+ \(FLAGS\(\)\))?$')
3205f8a9ae 2015-10-12 13: mserver.login(config['connection']['mail_user'], config['connection']['mail_password'])
3205f8a9ae 2015-10-12 14: if 'mail_limit' in config['connection']:
3205f8a9ae 2015-10-12 15: mailLimit = int(config['connection']['mail_limit'])
3205f8a9ae 2015-10-12 16: else:
3205f8a9ae 2015-10-12 17: mailLimit = 100
3205f8a9ae 2015-10-12 18: if 'header_limit' in config['connection']:
3205f8a9ae 2015-10-12 19: headerLimit = int(config['connection']['header_limit'])
3205f8a9ae 2015-10-12 20: else:
3205f8a9ae 2015-10-12 21: headerLimit = 1000
3205f8a9ae 2015-10-12 22:
3205f8a9ae 2015-10-12 23: tables = {
3205f8a9ae 2015-10-12 24: 'list': ["create table list (id integer primary key, name text, last integer default 0);"],
3205f8a9ae 2015-10-12 25: 'ids': ["create table ids (id integer, name text, mask integer, date integer);", "create unique index ids__id_name on ids(id, name);"],
3205f8a9ae 2015-10-12 26: }
3205f8a9ae 2015-10-12 27:
3205f8a9ae 2015-10-12 28: class Folder:
3205f8a9ae 2015-10-12 29: def __init__(this, filename):
3205f8a9ae 2015-10-12 30: this.db = sqlite3.connect(filename)
3205f8a9ae 2015-10-12 31: this.id = None
3205f8a9ae 2015-10-12 32: found = set()
3205f8a9ae 2015-10-12 33: for row in this.db.execute("select name from sqlite_master where type = 'table';"):
3205f8a9ae 2015-10-12 34: found.add(row[0])
3205f8a9ae 2015-10-12 35: for absent in set(tables.keys()).difference(found):
3205f8a9ae 2015-10-12 36: for query in tables[absent]:
3205f8a9ae 2015-10-12 37: this.db.execute(query)
3205f8a9ae 2015-10-12 38:
3205f8a9ae 2015-10-12 39: def select(this, folderName):
3205f8a9ae 2015-10-12 40: this.name = folderName
3205f8a9ae 2015-10-12 41: this.id = None
3205f8a9ae 2015-10-12 42: while True:
3205f8a9ae 2015-10-12 43: present = False
3205f8a9ae 2015-10-12 44: for row in this.db.execute("select id, last from list where name = ?;", [folderName]):
3205f8a9ae 2015-10-12 45: present = True
3205f8a9ae 2015-10-12 46: this.id = row[0]
3205f8a9ae 2015-10-12 47: this.last = row[1]
3205f8a9ae 2015-10-12 48: if present:
3205f8a9ae 2015-10-12 49: break
3205f8a9ae 2015-10-12 50: this.db.execute("insert into list(name) values (?);", [folderName])
3205f8a9ae 2015-10-12 51: if this.id == None:
3205f8a9ae 2015-10-12 52: print('Id not found.')
3205f8a9ae 2015-10-12 53: exit(1)
3205f8a9ae 2015-10-12 54: this.mask = {}
3205f8a9ae 2015-10-12 55: this.get_count()
3205f8a9ae 2015-10-12 56:
3205f8a9ae 2015-10-12 57: def get_count(this):
3205f8a9ae 2015-10-12 58: this.count = 0
3205f8a9ae 2015-10-12 59: for row in this.db.execute("select count(*) from ids where id = ? and mask in (3, 1);", [this.id]):
3205f8a9ae 2015-10-12 60: this.count = row[0]
3205f8a9ae 2015-10-12 61:
3205f8a9ae 2015-10-12 62: def get_record_count(this, mask):
3205f8a9ae 2015-10-12 63: for row in this.db.execute("select count(*) from ids where id = ? and mask = ?;", [this.id, mask]):
3205f8a9ae 2015-10-12 64: return(row[0])
3205f8a9ae 2015-10-12 65:
3205f8a9ae 2015-10-12 66: def check(this, name):
3205f8a9ae 2015-10-12 67: if name in this.mask:
3205f8a9ae 2015-10-12 68: return(this.mask[name])
3205f8a9ae 2015-10-12 69: for row in this.db.execute("select mask from ids where id = ? and name = ?;", [this.id, name]):
3205f8a9ae 2015-10-12 70: this.mask[name] = row[0]
3205f8a9ae 2015-10-12 71: return(row[0])
3205f8a9ae 2015-10-12 72:
3205f8a9ae 2015-10-12 73: def addlast(this, count):
3205f8a9ae 2015-10-12 74: this.last += count
3205f8a9ae 2015-10-12 75: this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
3205f8a9ae 2015-10-12 76:
3205f8a9ae 2015-10-12 77: def droplast(this):
3205f8a9ae 2015-10-12 78: this.last = 0
3205f8a9ae 2015-10-12 79: this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
3205f8a9ae 2015-10-12 80:
3205f8a9ae 2015-10-12 81: def addmail(this, mid):
3205f8a9ae 2015-10-12 82: mask = this.check(mid)
3205f8a9ae 2015-10-12 83: if mask in (3, 2):
3205f8a9ae 2015-10-12 84: this.db.execute("update ids set mask = 3 where id = ? and name = ?;", [this.id, mid])
3205f8a9ae 2015-10-12 85: this.mask[mid] = 3
3205f8a9ae 2015-10-12 86: else:
3205f8a9ae 2015-10-12 87: this.db.execute("insert into ids(id, name, mask) values(?, ?, ?);", [this.id, mid, 1])
3205f8a9ae 2015-10-12 88: this.count += 1
3205f8a9ae 2015-10-12 89: this.mask[mid] = 1
3205f8a9ae 2015-10-12 90:
3205f8a9ae 2015-10-12 91: def addnews(this, mid, date = None):
3205f8a9ae 2015-10-12 92: mask = this.check(mid)
3205f8a9ae 2015-10-12 93: if mask in (1, 3):
3205f8a9ae 2015-10-12 94: this.db.execute("update ids set mask = 3, date = ? where id = ? and name = ?;", [date, this.id, mid])
3205f8a9ae 2015-10-12 95: this.mask[mid] = 3
3205f8a9ae 2015-10-12 96: else:
3205f8a9ae 2015-10-12 97: this.db.execute("insert into ids(id, name, mask, date) values(?, ?, ?, ?);", [this.id, mid, 2, date])
3205f8a9ae 2015-10-12 98: this.count += 1
3205f8a9ae 2015-10-12 99: this.mask[mid] = 2
3205f8a9ae 2015-10-12 100:
3205f8a9ae 2015-10-12 101: def zeromail(this):
3205f8a9ae 2015-10-12 102: this.mask = {}
3205f8a9ae 2015-10-12 103: this.db.execute("update ids set mask = 2 where id = ? and mask = 3;", [this.id])
3205f8a9ae 2015-10-12 104: this.db.execute("delete from ids where id = ? and mask = 1;", [this.id])
3205f8a9ae 2015-10-12 105: this.sync()
3205f8a9ae 2015-10-12 106: this.get_count()
3205f8a9ae 2015-10-12 107:
3205f8a9ae 2015-10-12 108: def zeronews(this):
3205f8a9ae 2015-10-12 109: this.mask = {}
3205f8a9ae 2015-10-12 110: this.db.execute("update ids set mask = 1 where id = ? and mask = 3;", [this.id])
3205f8a9ae 2015-10-12 111: this.db.execute("delete from ids where id = ? and mask = 2;", [this.id])
3205f8a9ae 2015-10-12 112: this.droplast()
3205f8a9ae 2015-10-12 113: this.sync()
3205f8a9ae 2015-10-12 114:
3205f8a9ae 2015-10-12 115: def sync(this):
3205f8a9ae 2015-10-12 116: this.db.commit()
3205f8a9ae 2015-10-12 117:
3205f8a9ae 2015-10-12 118: def get_unfetched(this):
3205f8a9ae 2015-10-12 119: return(this.db.execute("select name, date from ids where id = ? and mask = 2 order by date desc;", [this.id]))
3205f8a9ae 2015-10-12 120:
3205f8a9ae 2015-10-12 121: def forget(this, mid):
3205f8a9ae 2015-10-12 122: this.db.execute("delete from ids where id = ? and name = ?;", [this.id, mid])
3205f8a9ae 2015-10-12 123:
3205f8a9ae 2015-10-12 124: def check_folder(mserver, folder, folderName):
3205f8a9ae 2015-10-12 125: folder.zeromail()
3205f8a9ae 2015-10-12 126: deleted = 0
3205f8a9ae 2015-10-12 127: mserver.select(folderName)
3205f8a9ae 2015-10-12 128: typ, data = mserver.search(None, 'NOT DELETED')
3205f8a9ae 2015-10-12 129: count = 0
3205f8a9ae 2015-10-12 130: print(' - building imap index', folderName, '[', end='')
3205f8a9ae 2015-10-12 131: for num in data[0].split():
3205f8a9ae 2015-10-12 132: found = False
3205f8a9ae 2015-10-12 133: typ, data = mserver.fetch(num, '(ENVELOPE)')
3205f8a9ae 2015-10-12 134: field = 0
3205f8a9ae 2015-10-12 135: for rec in data:
3205f8a9ae 2015-10-12 136: if type(rec) == tuple:
3205f8a9ae 2015-10-12 137: data[field] = ''.join(i.decode('utf-8', 'ignore') for i in rec)
3205f8a9ae 2015-10-12 138: else:
3205f8a9ae 2015-10-12 139: data[field] = rec.decode('utf-8', 'ignore')
3205f8a9ae 2015-10-12 140: field += 1
3205f8a9ae 2015-10-12 141: data = ''.join(data)
3205f8a9ae 2015-10-12 142: isMid = reMessageId.search(data)
3205f8a9ae 2015-10-12 143: if isMid:
3205f8a9ae 2015-10-12 144: mid = isMid.group(1)
3205f8a9ae 2015-10-12 145: mask = folder.check(mid)
3205f8a9ae 2015-10-12 146: if not mask in (1, 3):
3205f8a9ae 2015-10-12 147: folder.addmail(mid)
3205f8a9ae 2015-10-12 148: count += 1
3205f8a9ae 2015-10-12 149: else:
3205f8a9ae 2015-10-12 150: mserver.store(num, '+FLAGS', '\\Deleted')
3205f8a9ae 2015-10-12 151: deleted += 1
3205f8a9ae 2015-10-12 152: sys.stdout.write('x')
3205f8a9ae 2015-10-12 153: sys.stdout.flush()
3205f8a9ae 2015-10-12 154: else:
3205f8a9ae 2015-10-12 155: print('Message id not found.')
3205f8a9ae 2015-10-12 156: print(repr(data))
3205f8a9ae 2015-10-12 157: exit(1)
3205f8a9ae 2015-10-12 158: if (count % 1000) == 0:
3205f8a9ae 2015-10-12 159: sys.stdout.write('.')
3205f8a9ae 2015-10-12 160: sys.stdout.flush()
3205f8a9ae 2015-10-12 161: print('], deleted:', deleted, end = '')
3205f8a9ae 2015-10-12 162: folder.sync()
3205f8a9ae 2015-10-12 163: mserver.expunge()
3205f8a9ae 2015-10-12 164:
3205f8a9ae 2015-10-12 165: folder = Folder('nntpdup.sqlite')
3205f8a9ae 2015-10-12 166:
3205f8a9ae 2015-10-12 167: limits = [0, 0]
3205f8a9ae 2015-10-12 168: limitSteps = [headerLimit / len(config['groups']), mailLimit / len(config['groups'])]
3205f8a9ae 2015-10-12 169:
3205f8a9ae 2015-10-12 170: maxlength = 0
3205f8a9ae 2015-10-12 171: for folderName in (config['groups'].keys()):
3205f8a9ae 2015-10-12 172: maxlength = max(maxlength, len(folderName))
3205f8a9ae 2015-10-12 173:
3205f8a9ae 2015-10-12 174: skew = 1 + int(maxlength / 8)
3205f8a9ae 2015-10-12 175:
3205f8a9ae 2015-10-12 176: for folderName in (set(config['groups'].keys())):
3205f8a9ae 2015-10-12 177: stats = [0, 0]
3205f8a9ae 2015-10-12 178: folder.select(folderName)
3205f8a9ae 2015-10-12 179:
3205f8a9ae 2015-10-12 180: resp = mserver.select(folderName)
3205f8a9ae 2015-10-12 181: if resp[0] != 'OK':
3205f8a9ae 2015-10-12 182: print("Can't open folder.")
3205f8a9ae 2015-10-12 183: exit(1)
3205f8a9ae 2015-10-12 184: if int(resp[1][0]) != folder.count:
3205f8a9ae 2015-10-12 185: check_folder(mserver, folder, folderName)
3205f8a9ae 2015-10-12 186:
3205f8a9ae 2015-10-12 187: _, count, first, last, _ = server.group(folderName)
3205f8a9ae 2015-10-12 188: limits[0] += limitSteps[0]
3205f8a9ae 2015-10-12 189: if last > folder.last:
3205f8a9ae 2015-10-12 190: count = 0
3205f8a9ae 2015-10-12 191: # we need to fetch new ids
3205f8a9ae 2015-10-12 192: request = min(last, folder.last + limits[0])
3205f8a9ae 2015-10-12 193: try:
3205f8a9ae 2015-10-12 194: for record in server.over((int(folder.last) + 1, int(request)))[1]:
3205f8a9ae 2015-10-12 195: mid = record[1]['message-id']
3205f8a9ae 2015-10-12 196: if len(record[1]['message-id']) > 0:
3205f8a9ae 2015-10-12 197: try:
3205f8a9ae 2015-10-12 198: folder.addnews(record[1]['message-id'], email.utils.parsedate_to_datetime(record[1]['date']).timestamp())
3205f8a9ae 2015-10-12 199: except OverflowError as err:
3205f8a9ae 2015-10-12 200: folder.addnews(record[1]['message-id'])
3205f8a9ae 2015-10-12 201: except TypeError as err:
3205f8a9ae 2015-10-12 202: folder.addnews(record[1]['message-id'])
3205f8a9ae 2015-10-12 203: count += 1
3205f8a9ae 2015-10-12 204: except nntplib.NNTPTemporaryError as err:
3205f8a9ae 2015-10-12 205: if err.response.startswith('423 '):
3205f8a9ae 2015-10-12 206: pass
3205f8a9ae 2015-10-12 207: else:
3205f8a9ae 2015-10-12 208: raise(err)
3205f8a9ae 2015-10-12 209: except nntplib.NNTPPermanentError as err:
3205f8a9ae 2015-10-12 210: print(folder.last, request)
3205f8a9ae 2015-10-12 211: raise(err)
3205f8a9ae 2015-10-12 212: except sqlite3.IntegrityError as err:
3205f8a9ae 2015-10-12 213: print(repr(record))
3205f8a9ae 2015-10-12 214: print([x for x in map(repr, folder.db.execute("select * from ids where id = ? and name = ?;", [folder.id, record[1]['message-id']]))])
3205f8a9ae 2015-10-12 215: raise(err)
3205f8a9ae 2015-10-12 216: stats[0] = count
3205f8a9ae 2015-10-12 217: limits[0] -= count
3205f8a9ae 2015-10-12 218: folder.addlast(request - folder.last)
3205f8a9ae 2015-10-12 219: folder.sync()
3205f8a9ae 2015-10-12 220: elif folder.get_record_count(1) > 0:
3205f8a9ae 2015-10-12 221: folder.droplast()
3205f8a9ae 2015-10-12 222:
3205f8a9ae 2015-10-12 223: limits[1] += limitSteps[1]
3205f8a9ae 2015-10-12 224: if folder.get_record_count(2) > 0:
3205f8a9ae 2015-10-12 225: count = 0
3205f8a9ae 2015-10-12 226: # there are extra articles
3205f8a9ae 2015-10-12 227: raw_date = []
3205f8a9ae 2015-10-12 228: unfetched = []
3205f8a9ae 2015-10-12 229: for item, env_date in folder.get_unfetched():
3205f8a9ae 2015-10-12 230: mask = folder.check(item)
3205f8a9ae 2015-10-12 231: if mask == 2:
3205f8a9ae 2015-10-12 232: unfetched += (item, env_date),
3205f8a9ae 2015-10-12 233: for item, env_date in unfetched:
3205f8a9ae 2015-10-12 234: try:
3205f8a9ae 2015-10-12 235: _, info = server.article(item)
3205f8a9ae 2015-10-12 236: if env_date == None or env_date < 0:
3205f8a9ae 2015-10-12 237: date = None
3205f8a9ae 2015-10-12 238: backup_date = None
3205f8a9ae 2015-10-12 239: out = []
3205f8a9ae 2015-10-12 240: for line in info.lines:
3205f8a9ae 2015-10-12 241: if len(line) == 0:
3205f8a9ae 2015-10-12 242: mesg = email.message_from_string('\n'.join(out))
3205f8a9ae 2015-10-12 243: for header in mesg._headers:
3205f8a9ae 2015-10-12 244: if header[0] == 'Date':
3205f8a9ae 2015-10-12 245: raw_date += header[1],
3205f8a9ae 2015-10-12 246: date = email.utils.parsedate(header[1])
3205f8a9ae 2015-10-12 247: elif header[0] == 'Original-Received':
3205f8a9ae 2015-10-12 248: raw_date += header[1],
3205f8a9ae 2015-10-12 249: tmp_date = email.utils.parsedate(header[1].split(';')[-1])
3205f8a9ae 2015-10-12 250: if tmp_date != None and tmp_date[0] >= 1970:
3205f8a9ae 2015-10-12 251: backup_date = tmp_date
3205f8a9ae 2015-10-12 252: if date == None and backup_date == None:
3205f8a9ae 2015-10-12 253: print('Date missed.')
3205f8a9ae 2015-10-12 254: print(repr(out))
3205f8a9ae 2015-10-12 255: exit(1)
3205f8a9ae 2015-10-12 256: elif date == None:
3205f8a9ae 2015-10-12 257: date = backup_date
3205f8a9ae 2015-10-12 258: break
3205f8a9ae 2015-10-12 259: try:
3205f8a9ae 2015-10-12 260: out.append(line.decode('ascii', 'ignore'))
3205f8a9ae 2015-10-12 261: except UnicodeDecodeError:
3205f8a9ae 2015-10-12 262: print(repr(line))
3205f8a9ae 2015-10-12 263: exit(1)
3205f8a9ae 2015-10-12 264: out.append('\n')
3205f8a9ae 2015-10-12 265: try:
3205f8a9ae 2015-10-12 266: #print('*', item, date, type(date))
3205f8a9ae 2015-10-12 267: mserver.append(folderName, None, date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 268: except AttributeError as err:
3205f8a9ae 2015-10-12 269: #print('*', item, raw_date, repr(date))
3205f8a9ae 2015-10-12 270: #raise(err)
3205f8a9ae 2015-10-12 271: mserver.append(folderName, None, backup_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 272: except OverflowError as err:
3205f8a9ae 2015-10-12 273: #print('*', item, raw_date, repr(date))
3205f8a9ae 2015-10-12 274: #raise(err)
3205f8a9ae 2015-10-12 275: mserver.append(folderName, None, backup_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 276: else:
3205f8a9ae 2015-10-12 277: #print('*', item, env_date, type(env_date))
3205f8a9ae 2015-10-12 278: mserver.append(folderName, None, env_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 279: folder.addmail(item)
3205f8a9ae 2015-10-12 280: folder.sync()
3205f8a9ae 2015-10-12 281: count += 1
3205f8a9ae 2015-10-12 282: if count >= limits[1]:
3205f8a9ae 2015-10-12 283: break
3205f8a9ae 2015-10-12 284: except nntplib.NNTPTemporaryError as err:
3205f8a9ae 2015-10-12 285: if err.response.startswith('430 No such article'):
3205f8a9ae 2015-10-12 286: folder.forget(item)
3205f8a9ae 2015-10-12 287: else:
3205f8a9ae 2015-10-12 288: print(err.response, item, env_date)
3205f8a9ae 2015-10-12 289: raise(err)
3205f8a9ae 2015-10-12 290: stats[1] = count
3205f8a9ae 2015-10-12 291: limits[1] -= count
3205f8a9ae 2015-10-12 292:
3205f8a9ae 2015-10-12 293: if stats[0] != 0 or stats[1] != 0:
3205f8a9ae 2015-10-12 294: print('# ', folderName, '\t'*(skew - int((len(folderName) + 2) / 8)), '\t'.join(map(str, stats)), sep = '')
3205f8a9ae 2015-10-12 295: folder.sync()