295fec7f85 2016-09-17 1: #!/usr/bin/env python3
3205f8a9ae 2015-10-12 2:
3205f8a9ae 2015-10-12 3: import configparser, email.utils, getpass, imaplib, nntplib, re, sqlite3, sys
7513432477 2018-10-28 4: imaplib._MAXLINE = 1024 * 1024 * 4
7513432477 2018-10-28 5: nntplib._MAXLINE = 1024 * 1024 * 4
3205f8a9ae 2015-10-12 6:
3205f8a9ae 2015-10-12 7: config = configparser.ConfigParser(allow_no_value = True)
3205f8a9ae 2015-10-12 8: config.read('nntpdup.conf')
3205f8a9ae 2015-10-12 9:
973a1d241e 2015-10-13 10: try:
7513432477 2018-10-28 11: #server = nntplib.NNTP_SSL(config['connection']['newsserver'])
7513432477 2018-10-28 12: server = nntplib.NNTP(config['connection']['newsserver'])
973a1d241e 2015-10-13 13: except nntplib.NNTPTemporaryError as err:
973a1d241e 2015-10-13 14: if err.response.startswith('400 load at '):
973a1d241e 2015-10-13 15: print(err.response)
973a1d241e 2015-10-13 16: exit(0)
973a1d241e 2015-10-13 17: else:
973a1d241e 2015-10-13 18: raise(err)
3205f8a9ae 2015-10-12 19: mserver = imaplib.IMAP4_SSL(config['connection']['mailserver'])
3205f8a9ae 2015-10-12 20: reMessageId = re.compile('(<[-\][a-zA-Z0-9@.%/=_\$+!&~#\?}]+>)"?\)\)(\d+ \(FLAGS\(\)\))?$')
3205f8a9ae 2015-10-12 21: mserver.login(config['connection']['mail_user'], config['connection']['mail_password'])
3205f8a9ae 2015-10-12 22: if 'mail_limit' in config['connection']:
3205f8a9ae 2015-10-12 23: mailLimit = int(config['connection']['mail_limit'])
3205f8a9ae 2015-10-12 24: else:
3205f8a9ae 2015-10-12 25: mailLimit = 100
3205f8a9ae 2015-10-12 26: if 'header_limit' in config['connection']:
3205f8a9ae 2015-10-12 27: headerLimit = int(config['connection']['header_limit'])
3205f8a9ae 2015-10-12 28: else:
3205f8a9ae 2015-10-12 29: headerLimit = 1000
3205f8a9ae 2015-10-12 30:
3205f8a9ae 2015-10-12 31: tables = {
3205f8a9ae 2015-10-12 32: 'list': ["create table list (id integer primary key, name text, last integer default 0);"],
3205f8a9ae 2015-10-12 33: 'ids': ["create table ids (id integer, name text, mask integer, date integer);", "create unique index ids__id_name on ids(id, name);"],
3205f8a9ae 2015-10-12 34: }
3205f8a9ae 2015-10-12 35:
3205f8a9ae 2015-10-12 36: class Folder:
3205f8a9ae 2015-10-12 37: def __init__(this, filename):
3205f8a9ae 2015-10-12 38: this.db = sqlite3.connect(filename)
3205f8a9ae 2015-10-12 39: this.id = None
3205f8a9ae 2015-10-12 40: found = set()
3205f8a9ae 2015-10-12 41: for row in this.db.execute("select name from sqlite_master where type = 'table';"):
3205f8a9ae 2015-10-12 42: found.add(row[0])
3205f8a9ae 2015-10-12 43: for absent in set(tables.keys()).difference(found):
3205f8a9ae 2015-10-12 44: for query in tables[absent]:
3205f8a9ae 2015-10-12 45: this.db.execute(query)
3205f8a9ae 2015-10-12 46:
3205f8a9ae 2015-10-12 47: def select(this, folderName):
3205f8a9ae 2015-10-12 48: this.name = folderName
3205f8a9ae 2015-10-12 49: this.id = None
3205f8a9ae 2015-10-12 50: while True:
3205f8a9ae 2015-10-12 51: present = False
3205f8a9ae 2015-10-12 52: for row in this.db.execute("select id, last from list where name = ?;", [folderName]):
3205f8a9ae 2015-10-12 53: present = True
3205f8a9ae 2015-10-12 54: this.id = row[0]
3205f8a9ae 2015-10-12 55: this.last = row[1]
3205f8a9ae 2015-10-12 56: if present:
3205f8a9ae 2015-10-12 57: break
3205f8a9ae 2015-10-12 58: this.db.execute("insert into list(name) values (?);", [folderName])
3205f8a9ae 2015-10-12 59: if this.id == None:
3205f8a9ae 2015-10-12 60: print('Id not found.')
3205f8a9ae 2015-10-12 61: exit(1)
3205f8a9ae 2015-10-12 62: this.mask = {}
3205f8a9ae 2015-10-12 63: this.get_count()
3205f8a9ae 2015-10-12 64:
3205f8a9ae 2015-10-12 65: def get_count(this):
3205f8a9ae 2015-10-12 66: this.count = 0
3205f8a9ae 2015-10-12 67: for row in this.db.execute("select count(*) from ids where id = ? and mask in (3, 1);", [this.id]):
3205f8a9ae 2015-10-12 68: this.count = row[0]
3205f8a9ae 2015-10-12 69:
3205f8a9ae 2015-10-12 70: def get_record_count(this, mask):
3205f8a9ae 2015-10-12 71: for row in this.db.execute("select count(*) from ids where id = ? and mask = ?;", [this.id, mask]):
3205f8a9ae 2015-10-12 72: return(row[0])
3205f8a9ae 2015-10-12 73:
3205f8a9ae 2015-10-12 74: def check(this, name):
3205f8a9ae 2015-10-12 75: if name in this.mask:
3205f8a9ae 2015-10-12 76: return(this.mask[name])
3205f8a9ae 2015-10-12 77: for row in this.db.execute("select mask from ids where id = ? and name = ?;", [this.id, name]):
3205f8a9ae 2015-10-12 78: this.mask[name] = row[0]
3205f8a9ae 2015-10-12 79: return(row[0])
3205f8a9ae 2015-10-12 80:
3205f8a9ae 2015-10-12 81: def addlast(this, count):
3205f8a9ae 2015-10-12 82: this.last += count
3205f8a9ae 2015-10-12 83: this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
3205f8a9ae 2015-10-12 84:
3205f8a9ae 2015-10-12 85: def droplast(this):
3205f8a9ae 2015-10-12 86: this.last = 0
3205f8a9ae 2015-10-12 87: this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
3205f8a9ae 2015-10-12 88:
3205f8a9ae 2015-10-12 89: def addmail(this, mid):
3205f8a9ae 2015-10-12 90: mask = this.check(mid)
3205f8a9ae 2015-10-12 91: if mask in (3, 2):
3205f8a9ae 2015-10-12 92: this.db.execute("update ids set mask = 3 where id = ? and name = ?;", [this.id, mid])
3205f8a9ae 2015-10-12 93: this.mask[mid] = 3
3205f8a9ae 2015-10-12 94: else:
3205f8a9ae 2015-10-12 95: this.db.execute("insert into ids(id, name, mask) values(?, ?, ?);", [this.id, mid, 1])
3205f8a9ae 2015-10-12 96: this.count += 1
3205f8a9ae 2015-10-12 97: this.mask[mid] = 1
3205f8a9ae 2015-10-12 98:
3205f8a9ae 2015-10-12 99: def addnews(this, mid, date = None):
3205f8a9ae 2015-10-12 100: mask = this.check(mid)
3205f8a9ae 2015-10-12 101: if mask in (1, 3):
3205f8a9ae 2015-10-12 102: this.db.execute("update ids set mask = 3, date = ? where id = ? and name = ?;", [date, this.id, mid])
3205f8a9ae 2015-10-12 103: this.mask[mid] = 3
3205f8a9ae 2015-10-12 104: else:
3205f8a9ae 2015-10-12 105: this.db.execute("insert into ids(id, name, mask, date) values(?, ?, ?, ?);", [this.id, mid, 2, date])
3205f8a9ae 2015-10-12 106: this.count += 1
3205f8a9ae 2015-10-12 107: this.mask[mid] = 2
3205f8a9ae 2015-10-12 108:
3205f8a9ae 2015-10-12 109: def zeromail(this):
3205f8a9ae 2015-10-12 110: this.mask = {}
3205f8a9ae 2015-10-12 111: this.db.execute("update ids set mask = 2 where id = ? and mask = 3;", [this.id])
3205f8a9ae 2015-10-12 112: this.db.execute("delete from ids where id = ? and mask = 1;", [this.id])
3205f8a9ae 2015-10-12 113: this.sync()
3205f8a9ae 2015-10-12 114: this.get_count()
3205f8a9ae 2015-10-12 115:
3205f8a9ae 2015-10-12 116: def zeronews(this):
3205f8a9ae 2015-10-12 117: this.mask = {}
3205f8a9ae 2015-10-12 118: this.db.execute("update ids set mask = 1 where id = ? and mask = 3;", [this.id])
3205f8a9ae 2015-10-12 119: this.db.execute("delete from ids where id = ? and mask = 2;", [this.id])
3205f8a9ae 2015-10-12 120: this.droplast()
3205f8a9ae 2015-10-12 121: this.sync()
3205f8a9ae 2015-10-12 122:
3205f8a9ae 2015-10-12 123: def sync(this):
3205f8a9ae 2015-10-12 124: this.db.commit()
3205f8a9ae 2015-10-12 125:
3205f8a9ae 2015-10-12 126: def get_unfetched(this):
3205f8a9ae 2015-10-12 127: return(this.db.execute("select name, date from ids where id = ? and mask = 2 order by date desc;", [this.id]))
3205f8a9ae 2015-10-12 128:
3205f8a9ae 2015-10-12 129: def forget(this, mid):
3205f8a9ae 2015-10-12 130: this.db.execute("delete from ids where id = ? and name = ?;", [this.id, mid])
3205f8a9ae 2015-10-12 131:
3205f8a9ae 2015-10-12 132: def check_folder(mserver, folder, folderName):
3205f8a9ae 2015-10-12 133: folder.zeromail()
3205f8a9ae 2015-10-12 134: deleted = 0
3205f8a9ae 2015-10-12 135: mserver.select(folderName)
3205f8a9ae 2015-10-12 136: typ, data = mserver.search(None, 'NOT DELETED')
3205f8a9ae 2015-10-12 137: count = 0
3205f8a9ae 2015-10-12 138: print(' - building imap index', folderName, '[', end='')
3205f8a9ae 2015-10-12 139: for num in data[0].split():
3205f8a9ae 2015-10-12 140: found = False
3205f8a9ae 2015-10-12 141: typ, data = mserver.fetch(num, '(ENVELOPE)')
3205f8a9ae 2015-10-12 142: field = 0
3205f8a9ae 2015-10-12 143: for rec in data:
3205f8a9ae 2015-10-12 144: if type(rec) == tuple:
3205f8a9ae 2015-10-12 145: data[field] = ''.join(i.decode('utf-8', 'ignore') for i in rec)
3205f8a9ae 2015-10-12 146: else:
3205f8a9ae 2015-10-12 147: data[field] = rec.decode('utf-8', 'ignore')
3205f8a9ae 2015-10-12 148: field += 1
3205f8a9ae 2015-10-12 149: data = ''.join(data)
3205f8a9ae 2015-10-12 150: isMid = reMessageId.search(data)
3205f8a9ae 2015-10-12 151: if isMid:
3205f8a9ae 2015-10-12 152: mid = isMid.group(1)
3205f8a9ae 2015-10-12 153: mask = folder.check(mid)
3205f8a9ae 2015-10-12 154: if not mask in (1, 3):
3205f8a9ae 2015-10-12 155: folder.addmail(mid)
3205f8a9ae 2015-10-12 156: count += 1
3205f8a9ae 2015-10-12 157: else:
3205f8a9ae 2015-10-12 158: mserver.store(num, '+FLAGS', '\\Deleted')
3205f8a9ae 2015-10-12 159: deleted += 1
3205f8a9ae 2015-10-12 160: sys.stdout.write('x')
3205f8a9ae 2015-10-12 161: sys.stdout.flush()
3205f8a9ae 2015-10-12 162: else:
3205f8a9ae 2015-10-12 163: print('Message id not found.')
3205f8a9ae 2015-10-12 164: print(repr(data))
3205f8a9ae 2015-10-12 165: exit(1)
3205f8a9ae 2015-10-12 166: if (count % 1000) == 0:
3205f8a9ae 2015-10-12 167: sys.stdout.write('.')
3205f8a9ae 2015-10-12 168: sys.stdout.flush()
7513432477 2018-10-28 169: print('], deleted:', deleted)
3205f8a9ae 2015-10-12 170: folder.sync()
3205f8a9ae 2015-10-12 171: mserver.expunge()
3205f8a9ae 2015-10-12 172:
3205f8a9ae 2015-10-12 173: folder = Folder('nntpdup.sqlite')
3205f8a9ae 2015-10-12 174:
3205f8a9ae 2015-10-12 175: limits = [0, 0]
3205f8a9ae 2015-10-12 176: limitSteps = [headerLimit / len(config['groups']), mailLimit / len(config['groups'])]
3205f8a9ae 2015-10-12 177:
3205f8a9ae 2015-10-12 178: maxlength = 0
3205f8a9ae 2015-10-12 179: for folderName in (config['groups'].keys()):
3205f8a9ae 2015-10-12 180: maxlength = max(maxlength, len(folderName))
3205f8a9ae 2015-10-12 181:
3205f8a9ae 2015-10-12 182: skew = 1 + int(maxlength / 8)
3205f8a9ae 2015-10-12 183:
3205f8a9ae 2015-10-12 184: for folderName in (set(config['groups'].keys())):
3205f8a9ae 2015-10-12 185: stats = [0, 0]
3205f8a9ae 2015-10-12 186: folder.select(folderName)
7513432477 2018-10-28 187: localFolderName = folderName
3205f8a9ae 2015-10-12 188:
7513432477 2018-10-28 189: resp = mserver.select(localFolderName)
7513432477 2018-10-28 190: print('#--', localFolderName, ':', resp)
3205f8a9ae 2015-10-12 191: if resp[0] != 'OK':
7513432477 2018-10-28 192: localFolderName = folderName.replace('.', '/')
7513432477 2018-10-28 193: resp = mserver.select(localFolderName)
7513432477 2018-10-28 194: if resp[0] != 'OK':
7513432477 2018-10-28 195: print("Can't open folder.")
7513432477 2018-10-28 196: exit(1)
3205f8a9ae 2015-10-12 197: if int(resp[1][0]) != folder.count:
7513432477 2018-10-28 198: check_folder(mserver, folder, localFolderName)
3205f8a9ae 2015-10-12 199:
3205f8a9ae 2015-10-12 200: _, count, first, last, _ = server.group(folderName)
3205f8a9ae 2015-10-12 201: limits[0] += limitSteps[0]
3205f8a9ae 2015-10-12 202: if last > folder.last:
3205f8a9ae 2015-10-12 203: count = 0
3205f8a9ae 2015-10-12 204: # we need to fetch new ids
3205f8a9ae 2015-10-12 205: request = min(last, folder.last + limits[0])
3205f8a9ae 2015-10-12 206: try:
3205f8a9ae 2015-10-12 207: for record in server.over((int(folder.last) + 1, int(request)))[1]:
3205f8a9ae 2015-10-12 208: mid = record[1]['message-id']
3205f8a9ae 2015-10-12 209: if len(record[1]['message-id']) > 0:
3205f8a9ae 2015-10-12 210: try:
3205f8a9ae 2015-10-12 211: folder.addnews(record[1]['message-id'], email.utils.parsedate_to_datetime(record[1]['date']).timestamp())
3205f8a9ae 2015-10-12 212: except OverflowError as err:
3205f8a9ae 2015-10-12 213: folder.addnews(record[1]['message-id'])
3205f8a9ae 2015-10-12 214: except TypeError as err:
3205f8a9ae 2015-10-12 215: folder.addnews(record[1]['message-id'])
3205f8a9ae 2015-10-12 216: count += 1
3205f8a9ae 2015-10-12 217: except nntplib.NNTPTemporaryError as err:
3205f8a9ae 2015-10-12 218: if err.response.startswith('423 '):
3205f8a9ae 2015-10-12 219: pass
3205f8a9ae 2015-10-12 220: else:
3205f8a9ae 2015-10-12 221: raise(err)
3205f8a9ae 2015-10-12 222: except nntplib.NNTPPermanentError as err:
3205f8a9ae 2015-10-12 223: print(folder.last, request)
3205f8a9ae 2015-10-12 224: raise(err)
3205f8a9ae 2015-10-12 225: except sqlite3.IntegrityError as err:
3205f8a9ae 2015-10-12 226: print(repr(record))
3205f8a9ae 2015-10-12 227: print([x for x in map(repr, folder.db.execute("select * from ids where id = ? and name = ?;", [folder.id, record[1]['message-id']]))])
3205f8a9ae 2015-10-12 228: raise(err)
3205f8a9ae 2015-10-12 229: stats[0] = count
3205f8a9ae 2015-10-12 230: limits[0] -= count
3205f8a9ae 2015-10-12 231: folder.addlast(request - folder.last)
3205f8a9ae 2015-10-12 232: folder.sync()
3205f8a9ae 2015-10-12 233: elif folder.get_record_count(1) > 0:
3205f8a9ae 2015-10-12 234: folder.droplast()
3205f8a9ae 2015-10-12 235:
3205f8a9ae 2015-10-12 236: limits[1] += limitSteps[1]
3205f8a9ae 2015-10-12 237: if folder.get_record_count(2) > 0:
3205f8a9ae 2015-10-12 238: count = 0
3205f8a9ae 2015-10-12 239: # there are extra articles
3205f8a9ae 2015-10-12 240: raw_date = []
3205f8a9ae 2015-10-12 241: unfetched = []
3205f8a9ae 2015-10-12 242: for item, env_date in folder.get_unfetched():
3205f8a9ae 2015-10-12 243: mask = folder.check(item)
3205f8a9ae 2015-10-12 244: if mask == 2:
3205f8a9ae 2015-10-12 245: unfetched += (item, env_date),
3205f8a9ae 2015-10-12 246: for item, env_date in unfetched:
3205f8a9ae 2015-10-12 247: try:
3205f8a9ae 2015-10-12 248: _, info = server.article(item)
3205f8a9ae 2015-10-12 249: if env_date == None or env_date < 0:
3205f8a9ae 2015-10-12 250: date = None
3205f8a9ae 2015-10-12 251: backup_date = None
3205f8a9ae 2015-10-12 252: out = []
3205f8a9ae 2015-10-12 253: for line in info.lines:
3205f8a9ae 2015-10-12 254: if len(line) == 0:
3205f8a9ae 2015-10-12 255: mesg = email.message_from_string('\n'.join(out))
3205f8a9ae 2015-10-12 256: for header in mesg._headers:
3205f8a9ae 2015-10-12 257: if header[0] == 'Date':
3205f8a9ae 2015-10-12 258: raw_date += header[1],
3205f8a9ae 2015-10-12 259: date = email.utils.parsedate(header[1])
3205f8a9ae 2015-10-12 260: elif header[0] == 'Original-Received':
3205f8a9ae 2015-10-12 261: raw_date += header[1],
3205f8a9ae 2015-10-12 262: tmp_date = email.utils.parsedate(header[1].split(';')[-1])
3205f8a9ae 2015-10-12 263: if tmp_date != None and tmp_date[0] >= 1970:
3205f8a9ae 2015-10-12 264: backup_date = tmp_date
3205f8a9ae 2015-10-12 265: if date == None and backup_date == None:
3205f8a9ae 2015-10-12 266: print('Date missed.')
3205f8a9ae 2015-10-12 267: print(repr(out))
3205f8a9ae 2015-10-12 268: exit(1)
3205f8a9ae 2015-10-12 269: elif date == None:
3205f8a9ae 2015-10-12 270: date = backup_date
3205f8a9ae 2015-10-12 271: break
3205f8a9ae 2015-10-12 272: try:
3205f8a9ae 2015-10-12 273: out.append(line.decode('ascii', 'ignore'))
3205f8a9ae 2015-10-12 274: except UnicodeDecodeError:
3205f8a9ae 2015-10-12 275: print(repr(line))
3205f8a9ae 2015-10-12 276: exit(1)
3205f8a9ae 2015-10-12 277: out.append('\n')
3205f8a9ae 2015-10-12 278: try:
3205f8a9ae 2015-10-12 279: #print('*', item, date, type(date))
7513432477 2018-10-28 280: mserver.append(localFolderName, None, date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 281: except AttributeError as err:
3205f8a9ae 2015-10-12 282: #print('*', item, raw_date, repr(date))
3205f8a9ae 2015-10-12 283: #raise(err)
7513432477 2018-10-28 284: mserver.append(localFolderName, None, backup_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 285: except OverflowError as err:
3205f8a9ae 2015-10-12 286: #print('*', item, raw_date, repr(date))
3205f8a9ae 2015-10-12 287: #raise(err)
7513432477 2018-10-28 288: mserver.append(localFolderName, None, backup_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 289: else:
3205f8a9ae 2015-10-12 290: #print('*', item, env_date, type(env_date))
7513432477 2018-10-28 291: mserver.append(localFolderName, None, env_date, b'\n'.join(info.lines))
3205f8a9ae 2015-10-12 292: folder.addmail(item)
3205f8a9ae 2015-10-12 293: folder.sync()
3205f8a9ae 2015-10-12 294: count += 1
3205f8a9ae 2015-10-12 295: if count >= limits[1]:
3205f8a9ae 2015-10-12 296: break
3205f8a9ae 2015-10-12 297: except nntplib.NNTPTemporaryError as err:
3205f8a9ae 2015-10-12 298: if err.response.startswith('430 No such article'):
3205f8a9ae 2015-10-12 299: folder.forget(item)
3205f8a9ae 2015-10-12 300: else:
3205f8a9ae 2015-10-12 301: print(err.response, item, env_date)
3205f8a9ae 2015-10-12 302: raise(err)
3205f8a9ae 2015-10-12 303: stats[1] = count
3205f8a9ae 2015-10-12 304: limits[1] -= count
3205f8a9ae 2015-10-12 305:
3205f8a9ae 2015-10-12 306: if stats[0] != 0 or stats[1] != 0:
3205f8a9ae 2015-10-12 307: print('# ', folderName, '\t'*(skew - int((len(folderName) + 2) / 8)), '\t'.join(map(str, stats)), sep = '')
3205f8a9ae 2015-10-12 308: folder.sync()