Lines of
nntpdup.py
from check-in 295fec7f85
that are changed by the sequence of edits moving toward
check-in 7513432477:
1: #!/usr/bin/env python3
2:
3: import configparser, email.utils, getpass, imaplib, nntplib, re, sqlite3, sys
295fec7f85 2016-09-17 4: imaplib._MAXLINE = 1024 * 1024
295fec7f85 2016-09-17 5: nntplib._MAXLINE = 1024 * 1024
6:
7: config = configparser.ConfigParser(allow_no_value = True)
8: config.read('nntpdup.conf')
9:
10: try:
295fec7f85 2016-09-17 11: server = nntplib.NNTP_SSL(config['connection']['newsserver'])
12: except nntplib.NNTPTemporaryError as err:
13: if err.response.startswith('400 load at '):
14: print(err.response)
15: exit(0)
16: else:
17: raise(err)
18: mserver = imaplib.IMAP4_SSL(config['connection']['mailserver'])
19: reMessageId = re.compile('(<[-\][a-zA-Z0-9@.%/=_\$+!&~#\?}]+>)"?\)\)(\d+ \(FLAGS\(\)\))?$')
20: mserver.login(config['connection']['mail_user'], config['connection']['mail_password'])
21: if 'mail_limit' in config['connection']:
22: mailLimit = int(config['connection']['mail_limit'])
23: else:
24: mailLimit = 100
25: if 'header_limit' in config['connection']:
26: headerLimit = int(config['connection']['header_limit'])
27: else:
28: headerLimit = 1000
29:
30: tables = {
31: 'list': ["create table list (id integer primary key, name text, last integer default 0);"],
32: 'ids': ["create table ids (id integer, name text, mask integer, date integer);", "create unique index ids__id_name on ids(id, name);"],
33: }
34:
35: class Folder:
36: def __init__(this, filename):
37: this.db = sqlite3.connect(filename)
38: this.id = None
39: found = set()
40: for row in this.db.execute("select name from sqlite_master where type = 'table';"):
41: found.add(row[0])
42: for absent in set(tables.keys()).difference(found):
43: for query in tables[absent]:
44: this.db.execute(query)
45:
46: def select(this, folderName):
47: this.name = folderName
48: this.id = None
49: while True:
50: present = False
51: for row in this.db.execute("select id, last from list where name = ?;", [folderName]):
52: present = True
53: this.id = row[0]
54: this.last = row[1]
55: if present:
56: break
57: this.db.execute("insert into list(name) values (?);", [folderName])
58: if this.id == None:
59: print('Id not found.')
60: exit(1)
61: this.mask = {}
62: this.get_count()
63:
64: def get_count(this):
65: this.count = 0
66: for row in this.db.execute("select count(*) from ids where id = ? and mask in (3, 1);", [this.id]):
67: this.count = row[0]
68:
69: def get_record_count(this, mask):
70: for row in this.db.execute("select count(*) from ids where id = ? and mask = ?;", [this.id, mask]):
71: return(row[0])
72:
73: def check(this, name):
74: if name in this.mask:
75: return(this.mask[name])
76: for row in this.db.execute("select mask from ids where id = ? and name = ?;", [this.id, name]):
77: this.mask[name] = row[0]
78: return(row[0])
79:
80: def addlast(this, count):
81: this.last += count
82: this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
83:
84: def droplast(this):
85: this.last = 0
86: this.db.execute("update list set last = ? where id = ?;", [this.last, this.id])
87:
88: def addmail(this, mid):
89: mask = this.check(mid)
90: if mask in (3, 2):
91: this.db.execute("update ids set mask = 3 where id = ? and name = ?;", [this.id, mid])
92: this.mask[mid] = 3
93: else:
94: this.db.execute("insert into ids(id, name, mask) values(?, ?, ?);", [this.id, mid, 1])
95: this.count += 1
96: this.mask[mid] = 1
97:
98: def addnews(this, mid, date = None):
99: mask = this.check(mid)
100: if mask in (1, 3):
101: this.db.execute("update ids set mask = 3, date = ? where id = ? and name = ?;", [date, this.id, mid])
102: this.mask[mid] = 3
103: else:
104: this.db.execute("insert into ids(id, name, mask, date) values(?, ?, ?, ?);", [this.id, mid, 2, date])
105: this.count += 1
106: this.mask[mid] = 2
107:
108: def zeromail(this):
109: this.mask = {}
110: this.db.execute("update ids set mask = 2 where id = ? and mask = 3;", [this.id])
111: this.db.execute("delete from ids where id = ? and mask = 1;", [this.id])
112: this.sync()
113: this.get_count()
114:
115: def zeronews(this):
116: this.mask = {}
117: this.db.execute("update ids set mask = 1 where id = ? and mask = 3;", [this.id])
118: this.db.execute("delete from ids where id = ? and mask = 2;", [this.id])
119: this.droplast()
120: this.sync()
121:
122: def sync(this):
123: this.db.commit()
124:
125: def get_unfetched(this):
126: return(this.db.execute("select name, date from ids where id = ? and mask = 2 order by date desc;", [this.id]))
127:
128: def forget(this, mid):
129: this.db.execute("delete from ids where id = ? and name = ?;", [this.id, mid])
130:
131: def check_folder(mserver, folder, folderName):
132: folder.zeromail()
133: deleted = 0
134: mserver.select(folderName)
135: typ, data = mserver.search(None, 'NOT DELETED')
136: count = 0
137: print(' - building imap index', folderName, '[', end='')
138: for num in data[0].split():
139: found = False
140: typ, data = mserver.fetch(num, '(ENVELOPE)')
141: field = 0
142: for rec in data:
143: if type(rec) == tuple:
144: data[field] = ''.join(i.decode('utf-8', 'ignore') for i in rec)
145: else:
146: data[field] = rec.decode('utf-8', 'ignore')
147: field += 1
148: data = ''.join(data)
149: isMid = reMessageId.search(data)
150: if isMid:
151: mid = isMid.group(1)
152: mask = folder.check(mid)
153: if not mask in (1, 3):
154: folder.addmail(mid)
155: count += 1
156: else:
157: mserver.store(num, '+FLAGS', '\\Deleted')
158: deleted += 1
159: sys.stdout.write('x')
160: sys.stdout.flush()
161: else:
162: print('Message id not found.')
163: print(repr(data))
164: exit(1)
165: if (count % 1000) == 0:
166: sys.stdout.write('.')
167: sys.stdout.flush()
295fec7f85 2016-09-17 168: print('], deleted:', deleted, end = '')
169: folder.sync()
170: mserver.expunge()
171:
172: folder = Folder('nntpdup.sqlite')
173:
174: limits = [0, 0]
175: limitSteps = [headerLimit / len(config['groups']), mailLimit / len(config['groups'])]
176:
177: maxlength = 0
178: for folderName in (config['groups'].keys()):
179: maxlength = max(maxlength, len(folderName))
180:
181: skew = 1 + int(maxlength / 8)
182:
183: for folderName in (set(config['groups'].keys())):
184: stats = [0, 0]
185: folder.select(folderName)
295fec7f85 2016-09-17 186:
295fec7f85 2016-09-17 187: resp = mserver.select(folderName)
295fec7f85 2016-09-17 188: if resp[0] != 'OK':
295fec7f85 2016-09-17 189: print("Can't open folder.")
295fec7f85 2016-09-17 190: exit(1)
295fec7f85 2016-09-17 191: if int(resp[1][0]) != folder.count:
295fec7f85 2016-09-17 192: check_folder(mserver, folder, folderName)
193:
194: _, count, first, last, _ = server.group(folderName)
195: limits[0] += limitSteps[0]
196: if last > folder.last:
197: count = 0
198: # we need to fetch new ids
199: request = min(last, folder.last + limits[0])
200: try:
201: for record in server.over((int(folder.last) + 1, int(request)))[1]:
202: mid = record[1]['message-id']
203: if len(record[1]['message-id']) > 0:
204: try:
205: folder.addnews(record[1]['message-id'], email.utils.parsedate_to_datetime(record[1]['date']).timestamp())
206: except OverflowError as err:
207: folder.addnews(record[1]['message-id'])
208: except TypeError as err:
209: folder.addnews(record[1]['message-id'])
210: count += 1
211: except nntplib.NNTPTemporaryError as err:
212: if err.response.startswith('423 '):
213: pass
214: else:
215: raise(err)
216: except nntplib.NNTPPermanentError as err:
217: print(folder.last, request)
218: raise(err)
219: except sqlite3.IntegrityError as err:
220: print(repr(record))
221: print([x for x in map(repr, folder.db.execute("select * from ids where id = ? and name = ?;", [folder.id, record[1]['message-id']]))])
222: raise(err)
223: stats[0] = count
224: limits[0] -= count
225: folder.addlast(request - folder.last)
226: folder.sync()
227: elif folder.get_record_count(1) > 0:
228: folder.droplast()
229:
230: limits[1] += limitSteps[1]
231: if folder.get_record_count(2) > 0:
232: count = 0
233: # there are extra articles
234: raw_date = []
235: unfetched = []
236: for item, env_date in folder.get_unfetched():
237: mask = folder.check(item)
238: if mask == 2:
239: unfetched += (item, env_date),
240: for item, env_date in unfetched:
241: try:
242: _, info = server.article(item)
243: if env_date == None or env_date < 0:
244: date = None
245: backup_date = None
246: out = []
247: for line in info.lines:
248: if len(line) == 0:
249: mesg = email.message_from_string('\n'.join(out))
250: for header in mesg._headers:
251: if header[0] == 'Date':
252: raw_date += header[1],
253: date = email.utils.parsedate(header[1])
254: elif header[0] == 'Original-Received':
255: raw_date += header[1],
256: tmp_date = email.utils.parsedate(header[1].split(';')[-1])
257: if tmp_date != None and tmp_date[0] >= 1970:
258: backup_date = tmp_date
259: if date == None and backup_date == None:
260: print('Date missed.')
261: print(repr(out))
262: exit(1)
263: elif date == None:
264: date = backup_date
265: break
266: try:
267: out.append(line.decode('ascii', 'ignore'))
268: except UnicodeDecodeError:
269: print(repr(line))
270: exit(1)
271: out.append('\n')
272: try:
273: #print('*', item, date, type(date))
295fec7f85 2016-09-17 274: mserver.append(folderName, None, date, b'\n'.join(info.lines))
275: except AttributeError as err:
276: #print('*', item, raw_date, repr(date))
277: #raise(err)
295fec7f85 2016-09-17 278: mserver.append(folderName, None, backup_date, b'\n'.join(info.lines))
279: except OverflowError as err:
280: #print('*', item, raw_date, repr(date))
281: #raise(err)
295fec7f85 2016-09-17 282: mserver.append(folderName, None, backup_date, b'\n'.join(info.lines))
283: else:
284: #print('*', item, env_date, type(env_date))
295fec7f85 2016-09-17 285: mserver.append(folderName, None, env_date, b'\n'.join(info.lines))
286: folder.addmail(item)
287: folder.sync()
288: count += 1
289: if count >= limits[1]:
290: break
291: except nntplib.NNTPTemporaryError as err:
292: if err.response.startswith('430 No such article'):
293: folder.forget(item)
294: else:
295: print(err.response, item, env_date)
296: raise(err)
297: stats[1] = count
298: limits[1] -= count
299:
300: if stats[0] != 0 or stats[1] != 0:
301: print('# ', folderName, '\t'*(skew - int((len(folderName) + 2) / 8)), '\t'.join(map(str, stats)), sep = '')
302: folder.sync()