Overview
Comment: | some block size and headers fixed, some download information |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | master | trunk |
Files: | files | file ages | folders |
SHA3-256: |
7b27f1db022d79296309b4dbb0982ea8 |
User & Date: | c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-07-01 14:04:38.000 |
Other Links: | branch diff | manifest | tags |
Context
2010-07-05
| ||
14:52 | enchanced logging check-in: 53dcfdb8f7 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
2010-07-01
| ||
14:04 | some block size and headers fixed, some download information check-in: 7b27f1db02 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
2010-06-25
| ||
10:53 | working version, already tested and works on FreeRealms game check-in: 08ae38b6ce user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
Changes
Modified samesite.py
from [873110a573]
to [07383aff21].
︙ | ︙ | |||
26 27 28 29 30 31 32 33 34 35 36 37 38 39 | exit(1) # this is file index - everything is stored in this file index = shelve.open(options.dir + '/.index') desc_fields = ('Content-Length', 'ETag', 'Pragma', 'Last-Modified') ignore_fields = ('Accept-Ranges', 'Age', 'Cache-Control', 'Connection', 'Content-Type', 'Date', 'Expires', 'Server', 'Via', 'X-Cache', 'X-Cache-Lookup') while True: unchecked_files = set() checked_files = 0 # reading log and storing found urls for processing # check file mtime XXX with open(options.log, 'r') as log_file: | > > | | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | exit(1) # this is file index - everything is stored in this file index = shelve.open(options.dir + '/.index') desc_fields = ('Content-Length', 'ETag', 'Pragma', 'Last-Modified') ignore_fields = ('Accept-Ranges', 'Age', 'Cache-Control', 'Connection', 'Content-Type', 'Date', 'Expires', 'Server', 'Via', 'X-Cache', 'X-Cache-Lookup') block_size = 32768 while True: unchecked_files = set() checked_files = 0 # reading log and storing found urls for processing # check file mtime XXX with open(options.log, 'r') as log_file: log_line = re.compile('^[^ ]+ - - \[.*] "(GET|HEAD) (.*?)(\?.*)? HTTP/1.1" (\d+) \d+ "(.*)" "(.*)"$') for line in log_file: this_line = log_line.match(line.strip()) if this_line: unchecked_files.add(this_line.group(2)) for url in unchecked_files: # creating empty placeholder in index if not url in index: index[url] = {} reload = False |
︙ | ︙ | |||
78 79 80 81 82 83 84 | # stripping unneeded headers (XXX make this inplace?) for header in headers: if header in desc_fields: if header == 'Pragma' and headers[header] != 'no-cache': print('Pragma:', headers[header]) new_headers[header] = headers[header] elif not header in ignore_fields: | | | | > > | > > > > > > | 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | # stripping unneeded headers (XXX make this inplace?) for header in headers: if header in desc_fields: if header == 'Pragma' and headers[header] != 'no-cache': print('Pragma:', headers[header]) new_headers[header] = headers[header] elif not header in ignore_fields: print('Undefined header "', header, '": ', headers[header], sep='') # comparing headers with data found in index # if any header has changed (except Pragma) file is fully downloaded # same if we get more or less headers old_keys = set(index[url].keys()) old_keys.discard('__time__') old_keys.discard('Pragma') more_keys = set(new_headers.keys()) - old_keys more_keys.discard('Pragma') less_keys = old_keys - set(new_headers.keys()) if len(more_keys) > 0: print('More headers appear:', more_keys) reload = True elif len(less_keys) > 0: print('Less headers appear:', less_keys) reload = True else: for key in index[url].keys(): if key not in ('__time__', 'Pragma') and not index[url][key] == new_headers[key]: print('Header "', key, '" changed from [', index[url][key], '] to [', new_headers[key], ']', sep='') reload = True # downloading file if reload: if 'Content-Length' in headers: print('Downloading', headers['Content-Length'], 'bytes [', end='') else: print('Downloading [', end='') sys.stdout.flush() # file is created at temporary location and moved in place only when download completes temp_file = open(options.dir + '/.tmp', 'wb') buffer = source.read(block_size) blocks = 0 megs = 0 while len(buffer) > 0: temp_file.write(buffer) print('.', end='') sys.stdout.flush() buffer = source.read(block_size) blocks += 1 if blocks > 1024*1024/block_size: blocks = blocks - 1024*1024/block_size megs += 1 print('{}Mb'.format(megs), end='') temp_file.close() print(']') os.renames(options.dir + '/.tmp', file_name) checked_files += 1 # storing new time mark and storing new headers new_headers['__time__'] = datetime.datetime.now() index[url] = new_headers index.sync() except urllib.error.HTTPError as error: # in case of error we don't need to do anything actually, # if file download stalls or fails the file would not be moved to it's location print(error) print('[', len(unchecked_files), '/', checked_files, ']') # checking if there were any files downloaded, if yes - restarting sequence if checked_files == 0: break |