3205f8a9ae 2015-10-12 1: #!/usr/bin/env python3.4
3205f8a9ae 2015-10-12 2:
3205f8a9ae 2015-10-12 3: import configparser, email.utils, getpass, imaplib, nntplib, re, sqlite3, sys
3205f8a9ae 2015-10-12 4: imaplib._MAXLINE = 1024 * 1024
3205f8a9ae 2015-10-12 5: nntplib._MAXLINE = 1024 * 1024
3205f8a9ae 2015-10-12 6:
3205f8a9ae 2015-10-12 7: config = configparser.ConfigParser(allow_no_value = True)
3205f8a9ae 2015-10-12 8: config.read('nntpdup.conf')
3205f8a9ae 2015-10-12 9:
973a1d241e 2015-10-13 10: try:
973a1d241e 2015-10-13 11: server = nntplib.NNTP_SSL(config['connection']['newsserver'])
973a1d241e 2015-10-13 12: except nntplib.NNTPTemporaryError as err:
973a1d241e 2015-10-13 13: if err.response.startswith('400 load at '):
973a1d241e 2015-10-13 14: print(err.response)
973a1d241e 2015-10-13 15: exit(0)
973a1d241e 2015-10-13 16: else:
973a1d241e 2015-10-13 17: raise(err)
3205f8a9ae 2015-10-12 18: mserver = imaplib.IMAP4_SSL(config['connection']['mailserver'])
3205f8a9ae 2015-10-12 19: reMessageId = re.compile('(<[-\][a-zA-Z0-9@.%/=_\$+!&~#\?}]+>)"?\)\)(\d+ \(FLAGS\(\)\))?$')
3205f8a9ae 2015-10-12 20: mserver.login(config['connection']['mail_user'], config['connection']['mail_password'])
3205f8a9ae 2015-10-12 21: if 'mail_limit' in config['connection']:
3205f8a9ae 2015-10-12 22: mailLimit = int(config['connection']['mail_limit'])
3205f8a9ae 2015-10-12 23: else:
3205f8a9ae 2015-10-12 24: mailLimit = 100
3205f8a9ae 2015-10-12 25: if 'header_limit' in config['connection']:
3205f8a9ae 2015-10-12 26: headerLimit = int(config['connection']['header_limit'])
3205f8a9ae 2015-10-12 27: else:
3205f8a9ae 2015-10-12 28: headerLimit = 1000
3205f8a9ae 2015-10-12 29:
3205f8a9ae 2015-10-12 30: tables = {
3205f8a9ae 2015-10-12 31: 'list': ["create table list (id integer primary key, name text, last integer default 0);"],
3205f8a9ae 2015-10-12 32: 'ids': ["create table ids (id integer, name text, mask integer, date integer);", "create unique index ids__id_name on ids(id, name);"],
3205f8a9ae 2015-10-12 33: }
3205f8a9ae 2015-10-12 34:
3205f8a9ae 2015-10-12 35: class Folder:
3205f8a9ae 2015-10-12 36: def __init__(this, filename):
3205f8a9ae 2015-10-12 37: this.db = sqlite3.connect(filename)
3205f8a9ae 2015-10-12 38: this.id = None
3205f8a9ae 2015-10-12 39: found = set()
3205f8a9ae 2015-10-12 40: for row in this.db.execute("select name from sqlite_master where type = 'table';"):
3205f8a9ae 2015-10-12 41: found.add(row[0])
3205f8a9ae 2015-10-12 42: for absent in set(tables.keys()).difference(found):
3205f8a9ae 2015-10-12 43: for query in tables[absent]:
3205f8a9ae 2015-10-12 44: this.db.execute(query)
3205f8a9ae 2015-10-12 45:
3205f8a9ae 2015-10-12 46: def select(this, folderName):
3205f8a9ae 2015-10-12 47: this.name = folderName
3205f8a9ae 2015-10-12 48: this.id = None
3205f8a9ae 2015-10-12 49: while True:
3205f8a9ae 2015-10-12 50: present = False
3205f8a9ae 2015-10-12 51: for row in this.db.execute("select id, last from list where name = ?;", [folderName]):
3205f8a9ae 2015-10-12 52: present = True
3205f8a9ae 2015-10-12 53: this.id = row[0]
3205f8a9ae 2015-10-12 54: this.last = row[1]
3205f8a9ae 2015-10-12 55: if present:
3205f8a9ae 2015-10-12 56: break
3205f8a9ae 2015-10-12 57: this.db.execute("insert into list(name) values (?);", [folderName])
3205f8a9ae 2015-10-12 58: if this.id == None:
3205f8a9ae 2015-10-12 59: print('Id not found.')
3205f8a9ae 2015-10-12 60: exit(1)
3205f8a9ae 2015-10-12 61: this.mask = {}
3205f8a9ae 2015-10-12 62: this.get_count()
3205f8a9ae 2015-10-12 63:
3205f8a9ae 2015-10-12 64: def get_count(this):
3205f8a9ae 2015-10-12 65: this.count = 0
3205f8a9ae 2015-10-12 66: for row in this.db.execute("select count(*) from ids where id = ? and mask in (3, 1);", [this.id]):
3205f8a9ae 2015-10-12 67: this.count = row[0]
3205f8a9ae 2015-10-12 68:
3205f8a9ae 2015-10-12 69: def get_record_count(this, mask):
3205f8a9ae 2015-10-12 70: for row in this.db.execute("select count(*) from ids where id = ? and mask = ?;", [this.id, mask]):
3205f8a9ae 2015-10-12 71: return(row[0])
3205f8a9ae 2015-10-12 72:
3205f8a9ae 2015-10-12 73: def check(this, name):
3205f8a9ae 2015-10-12 74: if name in this.mask:
3205f8a9ae 2015-10-12 75: return(this.mask[name])
3205f8a9ae 2015-10-12 76: for row in this.db.execute("select mask from ids where id = ? and name = ?;", [this.id, name]):
3205f8a9ae 2015-10-12 77: this.mask[name] = row[0]
3205f8a9ae 2015-10-12 78: return(row[0])
3205f8a9ae 2015-10-12 79:
3205f8a9ae 2015-10-12 80: def addlast(this, count):
3205f8a9ae 2015-10-12 81: this.last += count
3205f8a9ae 2015-10-12 82: this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
3205f8a9ae 2015-10-12 83:
3205f8a9ae 2015-10-12 84: def droplast(this):
3205f8a9ae 2015-10-12 85: this.last = 0
3205f8a9ae 2015-10-12 86: this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
3205f8a9ae 2015-10-12 87:
3205f8a9ae 2015-10-12 88: def addmail(this, mid):
3205f8a9ae 2015-10-12 89: mask = this.check(mid)
3205f8a9ae 2015-10-12 90: if mask in (3, 2):
3205f8a9ae 2015-10-12 91: this.db.execute("update ids set mask = 3 where id = ? and name = ?;", [this.id, mid])
3205f8a9ae 2015-10-12 92: this.mask[mid] = 3
3205f8a9ae 2015-10-12 93: else:
3205f8a9ae 2015-10-12 94: this.db.execute("insert into ids(id, name, mask) values(?, ?, ?);", [this.id, mid, 1])
3205f8a9ae 2015-10-12 95: this.count += 1
3205f8a9ae 2015-10-12 96: this.mask[mid] = 1
3205f8a9ae 2015-10-12 97:
3205f8a9ae 2015-10-12 98: def addnews(this, mid, date = None):
3205f8a9ae 2015-10-12 99: mask = this.check(mid)
3205f8a9ae 2015-10-12 100: if mask in (1, 3):
3205f8a9ae 2015-10-12 101: this.db.execute("update ids set mask = 3, date = ? where id = ? and name = ?;", [date, this.id, mid])
3205f8a9ae 2015-10-12 102: this.mask[mid] = 3
3205f8a9ae 2015-10-12 103: else:
3205f8a9ae 2015-10-12 104: this.db.execute("insert into ids(id, name, mask, date) values(?, ?, ?, ?);", [this.id, mid, 2, date])
3205f8a9ae 2015-10-12 105: this.count += 1
3205f8a9ae 2015-10-12 106: this.mask[mid] = 2
3205f8a9ae 2015-10-12 107:
3205f8a9ae 2015-10-12 108: def zeromail(this):
3205f8a9ae 2015-10-12 109: this.mask = {}
3205f8a9ae 2015-10-12 110: this.db.execute("update ids set mask = 2 where id = ? and mask = 3;", [this.id])
3205f8a9ae 2015-10-12 111: this.db.execute("delete from ids where id = ? and mask = 1;", [this.id])
3205f8a9ae 2015-10-12 112: this.sync()
3205f8a9ae 2015-10-12 113: this.get_count()
3205f8a9ae 2015-10-12 114:
3205f8a9ae 2015-10-12 115: def zeronews(this):
3205f8a9ae 2015-10-12 116: this.mask = {}
3205f8a9ae 2015-10-12 117: this.db.execute("update ids set mask = 1 where id = ? and mask = 3;", [this.id])
3205f8a9ae 2015-10-12 118: this.db.execute("delete from ids where id = ? and mask = 2;", [this.id])
3205f8a9ae 2015-10-12 119: this.droplast()
3205f8a9ae 2015-10-12 120: this.sync()
3205f8a9ae 2015-10-12 121:
3205f8a9ae 2015-10-12 122: def sync(this):
3205f8a9ae 2015-10-12 123: this.db.commit()
3205f8a9ae 2015-10-12 124:
3205f8a9ae 2015-10-12 125: def get_unfetched(this):
3205f8a9ae 2015-10-12 126: return(this.db.execute("select name, date from ids where id = ? and mask = 2 order by date desc;", [this.id]))
3205f8a9ae 2015-10-12 127:
3205f8a9ae 2015-10-12 128: def forget(this, mid):
3205f8a9ae 2015-10-12 129: this.db.execute("delete from ids where id = ? and name = ?;", [this.id, mid])
3205f8a9ae 2015-10-12 130:
3205f8a9ae 2015-10-12 131: def check_folder(mserver, folder, folderName):
3205f8a9ae 2015-10-12 132: folder.zeromail()
3205f8a9ae 2015-10-12 133: deleted = 0
3205f8a9ae 2015-10-12 134: mserver.select(folderName)
3205f8a9ae 2015-10-12 135: typ, data = mserver.search(None, 'NOT DELETED')
3205f8a9ae 2015-10-12 136: count = 0
3205f8a9ae 2015-10-12 137: print(' - building imap index', folderName, '[', end='')
3205f8a9ae 2015-10-12 138: for num in data[0].split():
3205f8a9ae 2015-10-12 139: found = False
3205f8a9ae 2015-10-12 140: typ, data = mserver.fetch(num, '(ENVELOPE)')
3205f8a9ae 2015-10-12 141: field = 0
3205f8a9ae 2015-10-12 142: for rec in data:
3205f8a9ae 2015-10-12 143: if type(rec) == tuple:
3205f8a9ae 2015-10-12 144: data[field] = ''.join(i.decode('utf-8', 'ignore') for i in rec)
3205f8a9ae 2015-10-12 145: else:
3205f8a9ae 2015-10-12 146: data[field] = rec.decode('utf-8', 'ignore')
3205f8a9ae 2015-10-12 147: field += 1
3205f8a9ae 2015-10-12 148: data = ''.join(data)
3205f8a9ae 2015-10-12 149: isMid = reMessageId.search(data)
3205f8a9ae 2015-10-12 150: if isMid:
3205f8a9ae 2015-10-12 151: mid = isMid.group(1)
3205f8a9ae 2015-10-12 152: mask = folder.check(mid)
3205f8a9ae 2015-10-12 153: if not mask in (1, 3):
3205f8a9ae 2015-10-12 154: folder.addmail(mid)
3205f8a9ae 2015-10-12 155: count += 1
3205f8a9ae 2015-10-12 156: else:
3205f8a9ae 2015-10-12 157: mserver.store(num, '+FLAGS', '\\Deleted')
3205f8a9ae 2015-10-12 158: deleted += 1
3205f8a9ae 2015-10-12 159: sys.stdout.write('x')
3205f8a9ae 2015-10-12 160: sys.stdout.flush()
3205f8a9ae 2015-10-12 161: else:
3205f8a9ae 2015-10-12 162: print('Message id not found.')
3205f8a9ae 2015-10-12 163: print(repr(data))
3205f8a9ae 2015-10-12 164: exit(1)
3205f8a9ae 2015-10-12 165: if (count % 1000) == 0:
3205f8a9ae 2015-10-12 166: sys.stdout.write('.')
3205f8a9ae 2015-10-12 167: sys.stdout.flush()
3205f8a9ae 2015-10-12 168: print('], deleted:', deleted, end = '')
3205f8a9ae 2015-10-12 169: folder.sync()
3205f8a9ae 2015-10-12 170: mserver.expunge()
3205f8a9ae 2015-10-12 171:
3205f8a9ae 2015-10-12 172: folder = Folder('nntpdup.sqlite')
3205f8a9ae 2015-10-12 173:
3205f8a9ae 2015-10-12 174: limits = [0, 0]
3205f8a9ae 2015-10-12 175: limitSteps = [headerLimit / len(config['groups']), mailLimit / len(config['groups'])]
3205f8a9ae 2015-10-12 176:
3205f8a9ae 2015-10-12 177: maxlength = 0
3205f8a9ae 2015-10-12 178: for folderName in (config['groups'].keys()):
3205f8a9ae 2015-10-12 179: maxlength = max(maxlength, len(folderName))
3205f8a9ae 2015-10-12 180:
3205f8a9ae 2015-10-12 181: skew = 1 + int(maxlength / 8)
3205f8a9ae 2015-10-12 182:
3205f8a9ae 2015-10-12 183: for folderName in (set(config['groups'].keys())):
3205f8a9ae 2015-10-12 184: stats = [0, 0]
3205f8a9ae 2015-10-12 185: folder.select(folderName)
3205f8a9ae 2015-10-12 186:
3205f8a9ae 2015-10-12 187: resp = mserver.select(folderName)
3205f8a9ae 2015-10-12 188: if resp[0] != 'OK':
3205f8a9ae 2015-10-12 189: print("Can't open folder.")
3205f8a9ae 2015-10-12 190: exit(1)
3205f8a9ae 2015-10-12 191: if int(resp[1][0]) != folder.count:
3205f8a9ae 2015-10-12 192: check_folder(mserver, folder, folderName)
3205f8a9ae 2015-10-12 193:
3205f8a9ae 2015-10-12 194: _, count, first, last, _ = server.group(folderName)
3205f8a9ae 2015-10-12 195: limits[0] += limitSteps[0]
3205f8a9ae 2015-10-12 196: if last > folder.last:
3205f8a9ae 2015-10-12 197: count = 0
3205f8a9ae 2015-10-12 198: # we need to fetch new ids
3205f8a9ae 2015-10-12 199: request = min(last, folder.last + limits[0])
3205f8a9ae 2015-10-12 200: try:
3205f8a9ae 2015-10-12 201: for record in server.over((int(folder.last) + 1, int(request)))[1]:
3205f8a9ae 2015-10-12 202: mid = record[1]['message-id']
3205f8a9ae 2015-10-12 203: if len(record[1]['message-id']) > 0:
3205f8a9ae 2015-10-12 204: try:
3205f8a9ae 2015-10-12 205: folder.addnews(record[1]['message-id'], email.utils.parsedate_to_datetime(record[1]['date']).timestamp())
3205f8a9ae 2015-10-12 206: except OverflowError as err:
3205f8a9ae 2015-10-12 207: folder.addnews(record[1]['message-id'])
3205f8a9ae 2015-10-12 208: except TypeError as err:
3205f8a9ae 2015-10-12 209: folder.addnews(record[1]['message-id'])
3205f8a9ae 2015-10-12 210: count += 1
3205f8a9ae 2015-10-12 211: except nntplib.NNTPTemporaryError as err:
3205f8a9ae 2015-10-12 212: if err.response.startswith('423 '):
3205f8a9ae 2015-10-12 213: pass
3205f8a9ae 2015-10-12 214: else:
3205f8a9ae 2015-10-12 215: raise(err)
3205f8a9ae 2015-10-12 216: except nntplib.NNTPPermanentError as err:
3205f8a9ae 2015-10-12 217: print(folder.last, request)
3205f8a9ae 2015-10-12 218: raise(err)
3205f8a9ae 2015-10-12 219: except sqlite3.IntegrityError as err:
3205f8a9ae 2015-10-12 220: print(repr(record))
3205f8a9ae 2015-10-12 221: print([x for x in map(repr, folder.db.execute("select * from ids where id = ? and name = ?;", [folder.id, record[1]['message-id']]))])
3205f8a9ae 2015-10-12 222: raise(err)
3205f8a9ae 2015-10-12 223: stats[0] = count
3205f8a9ae 2015-10-12 224: limits[0] -= count
3205f8a9ae 2015-10-12 225: folder.addlast(request - folder.last)
3205f8a9ae 2015-10-12 226: folder.sync()
3205f8a9ae 2015-10-12 227: elif folder.get_record_count(1) > 0:
3205f8a9ae 2015-10-12 228: folder.droplast()
3205f8a9ae 2015-10-12 229:
3205f8a9ae 2015-10-12 230: limits[1] += limitSteps[1]
3205f8a9ae 2015-10-12 231: if folder.get_record_count(2) > 0:
3205f8a9ae 2015-10-12 232: count = 0
3205f8a9ae 2015-10-12 233: # there are extra articles
3205f8a9ae 2015-10-12 234: raw_date = []
3205f8a9ae 2015-10-12 235: unfetched = []
3205f8a9ae 2015-10-12 236: for item, env_date in folder.get_unfetched():
3205f8a9ae 2015-10-12 237: mask = folder.check(item)
3205f8a9ae 2015-10-12 238: if mask == 2:
3205f8a9ae 2015-10-12 239: unfetched += (item, env_date),
3205f8a9ae 2015-10-12 240: for item, env_date in unfetched:
3205f8a9ae 2015-10-12 241: try:
3205f8a9ae 2015-10-12 242: _, info = server.article(item)
3205f8a9ae 2015-10-12 243: if env_date == None or env_date < 0:
3205f8a9ae 2015-10-12 244: date = None
3205f8a9ae 2015-10-12 245: backup_date = None
3205f8a9ae 2015-10-12 246: out = []
3205f8a9ae 2015-10-12 247: for line in info.lines:
3205f8a9ae 2015-10-12 248: if len(line) == 0:
3205f8a9ae 2015-10-12 249: mesg = email.message_from_string('\n'.join(out))
3205f8a9ae 2015-10-12 250: for header in mesg._headers:
3205f8a9ae 2015-10-12 251: if header[0] == 'Date':
3205f8a9ae 2015-10-12 252: raw_date += header[1],
3205f8a9ae 2015-10-12 253: date = email.utils.parsedate(header[1])
3205f8a9ae 2015-10-12 254: elif header[0] == 'Original-Received':
3205f8a9ae 2015-10-12 255: raw_date += header[1],
3205f8a9ae 2015-10-12 256: tmp_date = email.utils.parsedate(header[1].split(';')[-1])
3205f8a9ae 2015-10-12 257: if tmp_date != None and tmp_date[0] >= 1970:
3205f8a9ae 2015-10-12 258: backup_date = tmp_date
3205f8a9ae 2015-10-12 259: if date == None and backup_date == None:
3205f8a9ae 2015-10-12 260: print('Date missed.')
3205f8a9ae 2015-10-12 261: print(repr(out))
3205f8a9ae 2015-10-12 262: exit(1)
3205f8a9ae 2015-10-12 263: elif date == None:
3205f8a9ae 2015-10-12 264: date = backup_date
3205f8a9ae 2015-10-12 265: break
3205f8a9ae 2015-10-12 266: try:
3205f8a9ae 2015-10-12 267: out.append(line.decode('ascii', 'ignore'))
3205f8a9ae 2015-10-12 268: except UnicodeDecodeError:
3205f8a9ae 2015-10-12 269: print(repr(line))
3205f8a9ae 2015-10-12 270: exit(1)
3205f8a9ae 2015-10-12 271: out.append('\n')
3205f8a9ae 2015-10-12 272: try:
3205f8a9ae 2015-10-12 273: #print('*', item, date, type(date))
3205f8a9ae 2015-10-12 274: mserver.append(folderName, None, date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 275: except AttributeError as err:
3205f8a9ae 2015-10-12 276: #print('*', item, raw_date, repr(date))
3205f8a9ae 2015-10-12 277: #raise(err)
3205f8a9ae 2015-10-12 278: mserver.append(folderName, None, backup_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 279: except OverflowError as err:
3205f8a9ae 2015-10-12 280: #print('*', item, raw_date, repr(date))
3205f8a9ae 2015-10-12 281: #raise(err)
3205f8a9ae 2015-10-12 282: mserver.append(folderName, None, backup_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 283: else:
3205f8a9ae 2015-10-12 284: #print('*', item, env_date, type(env_date))
3205f8a9ae 2015-10-12 285: mserver.append(folderName, None, env_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 286: folder.addmail(item)
3205f8a9ae 2015-10-12 287: folder.sync()
3205f8a9ae 2015-10-12 288: count += 1
3205f8a9ae 2015-10-12 289: if count >= limits[1]:
3205f8a9ae 2015-10-12 290: break
3205f8a9ae 2015-10-12 291: except nntplib.NNTPTemporaryError as err:
3205f8a9ae 2015-10-12 292: if err.response.startswith('430 No such article'):
3205f8a9ae 2015-10-12 293: folder.forget(item)
3205f8a9ae 2015-10-12 294: else:
3205f8a9ae 2015-10-12 295: print(err.response, item, env_date)
3205f8a9ae 2015-10-12 296: raise(err)
3205f8a9ae 2015-10-12 297: stats[1] = count
3205f8a9ae 2015-10-12 298: limits[1] -= count
3205f8a9ae 2015-10-12 299:
3205f8a9ae 2015-10-12 300: if stats[0] != 0 or stats[1] != 0:
3205f8a9ae 2015-10-12 301: print('# ', folderName, '\t'*(skew - int((len(folderName) + 2) / 8)), '\t'.join(map(str, stats)), sep = '')
3205f8a9ae 2015-10-12 302: folder.sync()