Index: samesite.py ================================================================== --- samesite.py +++ samesite.py @@ -5,32 +5,27 @@ parser = optparse.OptionParser() parser.add_option('-v', '--verbose', action = 'store_true', dest = 'verbose', help = 'turns on verbose status notifications', metavar = 'bool', default = False) parser.add_option('-d', '--dir', action = 'store', dest = 'dir', help = 'specify directory where the files should be stored', metavar = 'string', default = None) parser.add_option('-r', '--root', action = 'store', dest = 'root', help = 'specify a site from which data should be mirrored', metavar = 'string', default = None) parser.add_option('-l', '--log', action = 'store', dest = 'log', help = 'specify a log file to process', metavar = 'string', default = None) +parser.add_option('-e', '--skip-etag', action = 'store_true', dest = 'noetag', help = 'do not process etags', metavar = 'bool', default = False) (options, args) = parser.parse_args() -if not options.dir: - print('Directory not specified') - exit(1) - -if not options.root: - print('Server not specified') - exit(1) - -if not options.log: - print('Log file not specified') - exit(1) - -if not os.access(options.log, os.R_OK): - print('Log file unreadable') - exit(1) +assert options.dir, 'Directory not specified' +assert options.root, 'Server not specified' +assert options.log, 'Log file not specified' +assert os.access(options.log, os.R_OK), 'Log file unreadable' # this is file index - everything is stored in this file index = shelve.open(options.dir + '/.index') -desc_fields = ('Content-Length', 'ETag', 'Pragma', 'Last-Modified') -ignore_fields = ('Accept-Ranges', 'Age', 'Cache-Control', 'Connection', 'Content-Type', 'Date', 'Expires', 'Server', 'Via', 'X-Cache', 'X-Cache-Lookup') +desc_fields = ('Content-Length', 'Pragma', 'Last-Modified') +ignore_fields = ('Accept-Ranges', 'Age', 'Cache-Control', 'Connection', 'Content-Type', 'Date', 'Expires', 'Server', 'Via', 'X-Cache', 'X-Cache-Lookup', 'X-Powered-By') + +if not options.noetag: + desc_fields += 'ETag', +else: + ignore_fields += 'ETag', block_size = 32768 while True: unchecked_files = set() @@ -105,11 +100,10 @@ if not len(old_keys) == 0: print('More headers appear:', more_keys) reload = True elif len(less_keys) > 0: print('Less headers appear:', less_keys) - reload = True else: for key in index[url].keys(): if key not in ('__time__', 'Pragma') and not index[url][key] == new_headers[key]: print('Header "', key, '" changed from [', index[url][key], '] to [', new_headers[key], ']', sep='') reload = True