Overview
Comment: | enchanced logging |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | master | trunk |
Files: | files | file ages | folders |
SHA3-256: |
53dcfdb8f7db80d7be8799050f72acfb |
User & Date: | c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-07-05 14:52:39.000 |
Other Links: | branch diff | manifest | tags |
Context
2010-07-06
| ||
15:02 | changed logic and some log messages check-in: 083ec707ea user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
2010-07-05
| ||
14:52 | enchanced logging check-in: 53dcfdb8f7 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
2010-07-01
| ||
14:04 | some block size and headers fixed, some download information check-in: 7b27f1db02 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
Changes
Modified samesite.py
from [07383aff21]
to [65464ee62d].
︙ | ︙ | |||
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | # creating empty placeholder in index if not url in index: index[url] = {} reload = False # creating file name from url file_name = options.dir + re.compile('%20').sub(' ', url) # forcibly checking file if no file present if not os.access(file_name, os.R_OK): reload = True # forcibly checking file if file size doesn't match with index data elif 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']): print('File size is', os.stat(file_name).st_size, 'and stored file size is', index[url]['Content-Length']) reload = True # forcibly checking file if index hods Pragma header if 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache': reload = True # skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago if not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0: continue | > > > < > | 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | # creating empty placeholder in index if not url in index: index[url] = {} reload = False # creating file name from url file_name = options.dir + re.compile('%20').sub(' ', url) print('Checking file:', url) # forcibly checking file if no file present if not os.access(file_name, os.R_OK): print('File not found or inaccessible.') reload = True # forcibly checking file if file size doesn't match with index data elif 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']): print('File size is', os.stat(file_name).st_size, 'and stored file size is', index[url]['Content-Length']) reload = True # forcibly checking file if index hods Pragma header if 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache': print('Pragma on: recheck iminent.') reload = True # skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago if not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0: continue try: print('Contacting website.') with urllib.request.urlopen(options.root + url) as source: new_headers = {} headers = source.info() # stripping unneeded headers (XXX make this inplace?) for header in headers: if header in desc_fields: |
︙ | ︙ | |||
92 93 94 95 96 97 98 | old_keys = set(index[url].keys()) old_keys.discard('__time__') old_keys.discard('Pragma') more_keys = set(new_headers.keys()) - old_keys more_keys.discard('Pragma') less_keys = old_keys - set(new_headers.keys()) if len(more_keys) > 0: | > > > | | 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | old_keys = set(index[url].keys()) old_keys.discard('__time__') old_keys.discard('Pragma') more_keys = set(new_headers.keys()) - old_keys more_keys.discard('Pragma') less_keys = old_keys - set(new_headers.keys()) if len(more_keys) > 0: if len(old_keys) == 0: print('No data on that file yet.') else: print('More headers appear:', more_keys) reload = True elif len(less_keys) > 0: print('Less headers appear:', less_keys) reload = True else: for key in index[url].keys(): if key not in ('__time__', 'Pragma') and not index[url][key] == new_headers[key]: |
︙ | ︙ |