Overview
Comment: | More features: * path substitution * more fields ignored in requests; * request now uses original path - not modified by stripping or substituting; * some headers are proxied to request; * when there is no file to send 502 error is returned; * recheck file before moving it to full dir; * Last-Modified only sent when present. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | master | trunk |
Files: | files | file ages | folders |
SHA3-256: |
cab908195f3b29791b79f2137ba2e7f4 |
User & Date: | c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-09-06 20:53:47.000 |
Other Links: | branch diff | manifest | tags |
Context
2010-09-07
| ||
11:24 | Some changes: * one more header known; * new code to select which parts of file should be downloaded, fixes (possibly) a very bad bug that makes continuing download totally unsable. check-in: 439e1753a4 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
2010-09-06
| ||
20:53 | More features: * path substitution * more fields ignored in requests; * request now uses original path - not modified by stripping or substituting; * some headers are proxied to request; * when there is no file to send 502 error is returned; * recheck file before moving it to full dir; * Last-Modified only sent when present. check-in: cab908195f user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
2010-08-26
| ||
15:06 | optimized remains detection check-in: b0975a28fb user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk | |
Changes
Modified samesite.py
from [ed1a5254d3]
to [f557383e69].
︙ | ︙ | |||
8 9 10 11 12 13 14 15 16 17 18 19 20 21 | 'general': { 'port': '8008', }, '_other': { 'verbose': 'no', 'noetag': 'no', 'noparts': 'no', },} # function to read in config file def __init__(self): import configparser, optparse parser = optparse.OptionParser() | > > | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | 'general': { 'port': '8008', }, '_other': { 'verbose': 'no', 'noetag': 'no', 'noparts': 'no', 'strip': '', 'sub': '', },} # function to read in config file def __init__(self): import configparser, optparse parser = optparse.OptionParser() |
︙ | ︙ | |||
70 71 72 73 74 75 76 | self._config.set(self._section, name, None) return(self._config.get(self._section, name)) config = Config() #assert options.port or os.access(options.log, os.R_OK), 'Log file unreadable' | | > > | > > > > > > | 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | self._config.set(self._section, name, None) return(self._config.get(self._section, name)) config = Config() #assert options.port or os.access(options.log, os.R_OK), 'Log file unreadable' const_desc_fields = set(['Content-Length', 'Last-Modified', 'Pragma']) const_ignore_fields = set([ 'Accept-Ranges', 'Age', 'Cache-Control', 'Connection', 'Content-Type', 'Date', 'Expires', 'Server', 'Via', 'X-Cache', 'X-Cache-Lookup', 'X-Powered-By' ]) block_size = 4096 ''' # later, kqueue would be good but later class Connection: __slots__ = frozenset(('__address', '__input', '__socket', '__status', 'error', 'method', 'url', 'http_version')) |
︙ | ︙ | |||
191 192 193 194 195 196 197 | recheck = False # file_stat means file definitely exists file_stat = None # requested_ranges holds data about any range requested requested_ranges = None # records holds data from index locally, should be written back upon successfull completion record = None | < > > > > > > | > | > | < > | 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | recheck = False # file_stat means file definitely exists file_stat = None # requested_ranges holds data about any range requested requested_ranges = None # records holds data from index locally, should be written back upon successfull completion record = None myPath = re.compile('^(.*?)(\?.*)$').match(self.path) if myPath: my_path = myPath.group(1) else: my_path = self.path config.section(self.headers['Host']) if config['sub'] != None and config['strip'] != None and len(config['strip']) > 0: string = re.compile(config['strip']).sub(config['sub'], my_path) my_path = string info = 'Checking file: ' + my_path if not os.access(config['dir'], os.X_OK): os.mkdir(config['dir']) # this is file index - everything is stored in this file # _parts - list of stored parts of file # _time - last time the file was checked # everything else is just the headers index = shelve.open(config['dir'] + os.sep + '.index') desc_fields = const_desc_fields.copy() ignore_fields = const_ignore_fields.copy() if not config['noetag']: desc_fields.add('ETag') else: ignore_fields.add('ETag') proxy_ignored = set([ 'Accept', 'Accept-Charset', 'Accept-Encoding', 'Accept-Language', 'Cache-Control', 'Connection', 'Content-Length', 'Cookie', 'Host', 'If-Modified-Since', 'If-Unmodified-Since', 'Referer', 'User-Agent', 'Via', 'X-Forwarded-For', 'X-REMOVED', ]) print('===============[ {} request ]==='.format(self.command)) for header in self.headers: if header in proxy_ignored: pass elif header in ('Range'): |
︙ | ︙ | |||
269 270 271 272 273 274 275 | file_stat = os.stat(temp_name) else: info += '\nFile not found or inaccessible.' index[my_path]['_parts'] = None reload = True record = index[my_path] | < < | 286 287 288 289 290 291 292 293 294 295 296 297 298 299 | file_stat = os.stat(temp_name) else: info += '\nFile not found or inaccessible.' index[my_path]['_parts'] = None reload = True record = index[my_path] if not '_parts' in record: record['_parts'] = None if record['_parts'] == None: recheck = True # forcibly checking file if file size doesn't match with index data |
︙ | ︙ | |||
298 299 300 301 302 303 304 | if not recheck and not reload and '_time' in record and (datetime.datetime.now() - datetime.timedelta(hours = 4) - record['_time']).days < 0: recheck = True print(info) if reload or recheck: try: | | > > > > > > > | | 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 | if not recheck and not reload and '_time' in record and (datetime.datetime.now() - datetime.timedelta(hours = 4) - record['_time']).days < 0: recheck = True print(info) if reload or recheck: try: request = 'http://' + config['root'] + self.path my_headers = {} for header in ('Cache-Control', 'Cookie', 'Referer', 'User-Agent'): if header in self.headers: my_headers[header] = self.headers[header] needed = None # XXX and if we specify full file we don't go partial? if requested_ranges != None: if '_parts' in record and record['_parts'] != None: if config['noparts']: needed = record['_parts'] else: needed = record['_parts'] | requested_ranges elif not config['noparts']: needed = requested_ranges ranges = () print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed)) if needed != None and len(needed) > 0: needed.rewind() while True: range = needed.pop() if range[0] == None: break ranges += '{}-{}'.format(range[0], range[1] - 1), my_headers['Range'] = 'bytes=' + ','.join(ranges) request = urllib.request.Request(request, headers = my_headers) with urllib.request.urlopen(request) as source: new_record = {} new_record['_parts'] = record['_parts'] headers = source.info() # stripping unneeded headers (XXX make this inplace?) |
︙ | ︙ | |||
433 434 435 436 437 438 439 | index.sync() except urllib.error.HTTPError as error: # in case of error we don't need to do anything actually, # if file download stalls or fails the file would not be moved to it's location print(error) | | > > > > > | 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 | index.sync() except urllib.error.HTTPError as error: # in case of error we don't need to do anything actually, # if file download stalls or fails the file would not be moved to it's location print(error) if not os.access(file_name, os.R_OK) and os.access(temp_name, os.R_OK) and '_parts' in index[my_path] and index[my_path]['_parts'] == spacemap.SpaceMap(): # just moving # drop old dirs XXX print('Moving temporary file to new destination.') os.renames(temp_name, file_name) if not my_path in index: self.send_response(502) self.end_headers() return if self.command == 'HEAD': self.send_response(200) if 'Content-Length' in index[my_path]: self.send_header('Content-Length', index[my_path]['Content-Length']) self.send_header('Accept-Ranges', 'bytes') self.send_header('Content-Type', 'application/octet-stream') |
︙ | ︙ | |||
468 469 470 471 472 473 474 | break ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)), self.send_header('Content-Range', 'bytes {}/{}'.format(','.join(ranges), index[my_path]['Content-Length'])) else: self.send_response(200) self.send_header('Content-Length', str(file_stat.st_size)) requested_ranges = spacemap.SpaceMap({0: file_stat.st_size}) | > | | 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 | break ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)), self.send_header('Content-Range', 'bytes {}/{}'.format(','.join(ranges), index[my_path]['Content-Length'])) else: self.send_response(200) self.send_header('Content-Length', str(file_stat.st_size)) requested_ranges = spacemap.SpaceMap({0: file_stat.st_size}) if 'Last-Modified' in index[my_path]: self.send_header('Last-Modified', index[my_path]['Last-Modified']) self.send_header('Content-Type', 'application/octet-stream') self.end_headers() if self.command in ('GET'): if len(requested_ranges) > 0: requested_ranges.rewind() (start, end) = requested_ranges.pop() else: |
︙ | ︙ |