Index: samesite.py ================================================================== --- samesite.py +++ samesite.py @@ -44,40 +44,43 @@ this_line = log_line.match(line.strip()) if this_line: unchecked_files.add(this_line.group(2)) for url in unchecked_files: + reload = False + recheck = False + print('Checking file:', url) # creating empty placeholder in index if not url in index: + print('This one is new.') index[url] = {} - reload = False + reload = True # creating file name from url file_name = options.dir + re.compile('%20').sub(' ', url) - print('Checking file:', url) # forcibly checking file if no file present - if not os.access(file_name, os.R_OK): + if not reload and not os.access(file_name, os.R_OK): print('File not found or inaccessible.') reload = True # forcibly checking file if file size doesn't match with index data - elif 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']): - print('File size is', os.stat(file_name).st_size, 'and stored file size is', index[url]['Content-Length']) + elif not reload and 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']): + print('File size is ', os.stat(file_name).st_size, ' and stored file size is ', index[url]['Content-Length'], '.', sep='') reload = True # forcibly checking file if index hods Pragma header - if 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache': - print('Pragma on: recheck iminent.') - reload = True + if not reload and 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache': + print('Pragma on: recheck imminent.') + recheck = True # skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago - if not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0: + if not recheck and not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0: continue + try: - print('Contacting website.') with urllib.request.urlopen(options.root + url) as source: new_headers = {} headers = source.info() # stripping unneeded headers (XXX make this inplace?) @@ -97,13 +100,11 @@ old_keys.discard('Pragma') more_keys = set(new_headers.keys()) - old_keys more_keys.discard('Pragma') less_keys = old_keys - set(new_headers.keys()) if len(more_keys) > 0: - if len(old_keys) == 0: - print('No data on that file yet.') - else: + if not len(old_keys) == 0: print('More headers appear:', more_keys) reload = True elif len(less_keys) > 0: print('Less headers appear:', less_keys) reload = True