Overview
| Comment: | some block size and headers fixed, some download information |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | master | trunk |
| Files: | files | file ages | folders |
| SHA3-256: |
7b27f1db022d79296309b4dbb0982ea8 |
| User & Date: | c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-07-01 14:04:38.000 |
| Other Links: | branch diff | manifest | tags |
Context
|
2010-07-05
| ||
| 14:52 | enchanced logging check-in: 53dcfdb8f7 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
|
2010-07-01
| ||
| 14:04 | some block size and headers fixed, some download information check-in: 7b27f1db02 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
|
2010-06-25
| ||
| 10:53 | working version, already tested and works on FreeRealms game check-in: 08ae38b6ce user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
Changes
Modified samesite.py
from [873110a573]
to [07383aff21].
| ︙ | ︙ | |||
26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
exit(1)
# this is file index - everything is stored in this file
index = shelve.open(options.dir + '/.index')
desc_fields = ('Content-Length', 'ETag', 'Pragma', 'Last-Modified')
ignore_fields = ('Accept-Ranges', 'Age', 'Cache-Control', 'Connection', 'Content-Type', 'Date', 'Expires', 'Server', 'Via', 'X-Cache', 'X-Cache-Lookup')
while True:
unchecked_files = set()
checked_files = 0
# reading log and storing found urls for processing
# check file mtime XXX
with open(options.log, 'r') as log_file:
| > > | | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
exit(1)
# this is file index - everything is stored in this file
index = shelve.open(options.dir + '/.index')
desc_fields = ('Content-Length', 'ETag', 'Pragma', 'Last-Modified')
ignore_fields = ('Accept-Ranges', 'Age', 'Cache-Control', 'Connection', 'Content-Type', 'Date', 'Expires', 'Server', 'Via', 'X-Cache', 'X-Cache-Lookup')
block_size = 32768
while True:
unchecked_files = set()
checked_files = 0
# reading log and storing found urls for processing
# check file mtime XXX
with open(options.log, 'r') as log_file:
log_line = re.compile('^[^ ]+ - - \[.*] "(GET|HEAD) (.*?)(\?.*)? HTTP/1.1" (\d+) \d+ "(.*)" "(.*)"$')
for line in log_file:
this_line = log_line.match(line.strip())
if this_line:
unchecked_files.add(this_line.group(2))
for url in unchecked_files:
# creating empty placeholder in index
if not url in index:
index[url] = {}
reload = False
|
| ︙ | ︙ | |||
78 79 80 81 82 83 84 |
# stripping unneeded headers (XXX make this inplace?)
for header in headers:
if header in desc_fields:
if header == 'Pragma' and headers[header] != 'no-cache':
print('Pragma:', headers[header])
new_headers[header] = headers[header]
elif not header in ignore_fields:
| | | | > > | > > > > > > | 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# stripping unneeded headers (XXX make this inplace?)
for header in headers:
if header in desc_fields:
if header == 'Pragma' and headers[header] != 'no-cache':
print('Pragma:', headers[header])
new_headers[header] = headers[header]
elif not header in ignore_fields:
print('Undefined header "', header, '": ', headers[header], sep='')
# comparing headers with data found in index
# if any header has changed (except Pragma) file is fully downloaded
# same if we get more or less headers
old_keys = set(index[url].keys())
old_keys.discard('__time__')
old_keys.discard('Pragma')
more_keys = set(new_headers.keys()) - old_keys
more_keys.discard('Pragma')
less_keys = old_keys - set(new_headers.keys())
if len(more_keys) > 0:
print('More headers appear:', more_keys)
reload = True
elif len(less_keys) > 0:
print('Less headers appear:', less_keys)
reload = True
else:
for key in index[url].keys():
if key not in ('__time__', 'Pragma') and not index[url][key] == new_headers[key]:
print('Header "', key, '" changed from [', index[url][key], '] to [', new_headers[key], ']', sep='')
reload = True
# downloading file
if reload:
if 'Content-Length' in headers:
print('Downloading', headers['Content-Length'], 'bytes [', end='')
else:
print('Downloading [', end='')
sys.stdout.flush()
# file is created at temporary location and moved in place only when download completes
temp_file = open(options.dir + '/.tmp', 'wb')
buffer = source.read(block_size)
blocks = 0
megs = 0
while len(buffer) > 0:
temp_file.write(buffer)
print('.', end='')
sys.stdout.flush()
buffer = source.read(block_size)
blocks += 1
if blocks > 1024*1024/block_size:
blocks = blocks - 1024*1024/block_size
megs += 1
print('{}Mb'.format(megs), end='')
temp_file.close()
print(']')
os.renames(options.dir + '/.tmp', file_name)
checked_files += 1
# storing new time mark and storing new headers
new_headers['__time__'] = datetime.datetime.now()
index[url] = new_headers
index.sync()
except urllib.error.HTTPError as error:
# in case of error we don't need to do anything actually,
# if file download stalls or fails the file would not be moved to it's location
print(error)
print('[', len(unchecked_files), '/', checked_files, ']')
# checking if there were any files downloaded, if yes - restarting sequence
if checked_files == 0:
break
|