Overview
| Comment: | changed logic and some log messages |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | master | trunk |
| Files: | files | file ages | folders |
| SHA3-256: |
083ec707eae26f95b17a1a822c165f2e |
| User & Date: | c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-07-06 15:02:07.000 |
| Other Links: | branch diff | manifest | tags |
Context
|
2010-07-26
| ||
| 09:38 | option to skip ETag processing, X-Powered-By ignored check-in: 38b25713eb user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
|
2010-07-06
| ||
| 15:02 | changed logic and some log messages check-in: 083ec707ea user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
|
2010-07-05
| ||
| 14:52 | enchanced logging check-in: 53dcfdb8f7 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
Changes
Modified samesite.py
from [65464ee62d]
to [16e005263b].
| ︙ | ︙ | |||
42 43 44 45 46 47 48 49 50 51 52 |
log_line = re.compile('^[^ ]+ - - \[.*] "(GET|HEAD) (.*?)(\?.*)? HTTP/1.1" (\d+) \d+ "(.*)" "(.*)"$')
for line in log_file:
this_line = log_line.match(line.strip())
if this_line:
unchecked_files.add(this_line.group(2))
for url in unchecked_files:
# creating empty placeholder in index
if not url in index:
index[url] = {}
| > > > > | < | | | | | | | > < | 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
log_line = re.compile('^[^ ]+ - - \[.*] "(GET|HEAD) (.*?)(\?.*)? HTTP/1.1" (\d+) \d+ "(.*)" "(.*)"$')
for line in log_file:
this_line = log_line.match(line.strip())
if this_line:
unchecked_files.add(this_line.group(2))
for url in unchecked_files:
reload = False
recheck = False
print('Checking file:', url)
# creating empty placeholder in index
if not url in index:
print('This one is new.')
index[url] = {}
reload = True
# creating file name from url
file_name = options.dir + re.compile('%20').sub(' ', url)
# forcibly checking file if no file present
if not reload and not os.access(file_name, os.R_OK):
print('File not found or inaccessible.')
reload = True
# forcibly checking file if file size doesn't match with index data
elif not reload and 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']):
print('File size is ', os.stat(file_name).st_size, ' and stored file size is ', index[url]['Content-Length'], '.', sep='')
reload = True
# forcibly checking file if index hods Pragma header
if not reload and 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache':
print('Pragma on: recheck imminent.')
recheck = True
# skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago
if not recheck and not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0:
continue
try:
with urllib.request.urlopen(options.root + url) as source:
new_headers = {}
headers = source.info()
# stripping unneeded headers (XXX make this inplace?)
for header in headers:
if header in desc_fields:
|
| ︙ | ︙ | |||
95 96 97 98 99 100 101 |
old_keys = set(index[url].keys())
old_keys.discard('__time__')
old_keys.discard('Pragma')
more_keys = set(new_headers.keys()) - old_keys
more_keys.discard('Pragma')
less_keys = old_keys - set(new_headers.keys())
if len(more_keys) > 0:
| | < < | 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
old_keys = set(index[url].keys())
old_keys.discard('__time__')
old_keys.discard('Pragma')
more_keys = set(new_headers.keys()) - old_keys
more_keys.discard('Pragma')
less_keys = old_keys - set(new_headers.keys())
if len(more_keys) > 0:
if not len(old_keys) == 0:
print('More headers appear:', more_keys)
reload = True
elif len(less_keys) > 0:
print('Less headers appear:', less_keys)
reload = True
else:
for key in index[url].keys():
|
| ︙ | ︙ |