601ec56da6 2011-12-19 1: #!/usr/bin/env python
601ec56da6 2011-12-19 2:
601ec56da6 2011-12-19 3: from __future__ import unicode_literals, print_function
601ec56da6 2011-12-19 4:
82969b1fc2 2012-01-25 5: #import gevent.monkey
82969b1fc2 2012-01-25 6: #gevent.monkey.patch_all()
82969b1fc2 2012-01-25 7:
601ec56da6 2011-12-19 8: import bsddb.dbshelve, copy, datetime, os, BaseHTTPServer, sys, spacemap, re, urllib2
e7b837a681 2010-08-25 9:
e7b837a681 2010-08-25 10: class Config:
e7b837a681 2010-08-25 11: __slots__ = frozenset(['_config', '_default', '_section', 'options', 'root'])
e7b837a681 2010-08-25 12: _default = {
e7b837a681 2010-08-25 13: 'general': {
e7b837a681 2010-08-25 14: 'port': '8008',
e7b837a681 2010-08-25 15: },
e7b837a681 2010-08-25 16: '_other': {
e7b837a681 2010-08-25 17: 'verbose': 'no',
e7b837a681 2010-08-25 18: 'noetag': 'no',
e7b837a681 2010-08-25 19: 'noparts': 'no',
cab908195f 2010-09-06 20: 'strip': '',
cab908195f 2010-09-06 21: 'sub': '',
82969b1fc2 2012-01-25 22: 'proto': 'http',
e7b837a681 2010-08-25 23: },}
e7b837a681 2010-08-25 24:
e7b837a681 2010-08-25 25: # function to read in config file
e7b837a681 2010-08-25 26: def __init__(self):
601ec56da6 2011-12-19 27: import ConfigParser, optparse
e7b837a681 2010-08-25 28:
e7b837a681 2010-08-25 29: parser = optparse.OptionParser()
e7b837a681 2010-08-25 30: parser.add_option('-c', '--config', dest = 'config', help = 'config file location', metavar = 'FILE', default = 'samesite.conf')
e7b837a681 2010-08-25 31: (self.options, args) = parser.parse_args()
e7b837a681 2010-08-25 32:
e7b837a681 2010-08-25 33: assert os.access(self.options.config, os.R_OK), "Fatal error: can't read {}".format(self.options.config)
e7b837a681 2010-08-25 34:
e7b837a681 2010-08-25 35: configDir = re.compile('^(.*)/[^/]+$').match(self.options.config)
e7b837a681 2010-08-25 36: if configDir:
e7b837a681 2010-08-25 37: self.root = configDir.group(1)
e7b837a681 2010-08-25 38: else:
e7b837a681 2010-08-25 39: self.root = os.getcwd()
e7b837a681 2010-08-25 40:
601ec56da6 2011-12-19 41: self._config = ConfigParser.ConfigParser()
e7b837a681 2010-08-25 42: self._config.readfp(open(self.options.config))
e7b837a681 2010-08-25 43:
e7b837a681 2010-08-25 44: for section in self._config.sections():
e7b837a681 2010-08-25 45: if section != 'general':
e7b837a681 2010-08-25 46: if self._config.has_option(section, 'dir'):
e7b837a681 2010-08-25 47: if re.compile('^/$').match(self._config.get(section, 'dir')):
e7b837a681 2010-08-25 48: self._config.set(section, 'dir', self.root + os.sep + section)
e7b837a681 2010-08-25 49: thisDir = re.compile('^(.*)/$').match(self._config.get(section, 'dir'))
e7b837a681 2010-08-25 50: if thisDir:
e7b837a681 2010-08-25 51: self._config.set(section, 'dir', thisDir.group(1))
e7b837a681 2010-08-25 52: if not re.compile('^/(.*)$').match(self._config.get(section, 'dir')):
e7b837a681 2010-08-25 53: self._config.set(section, 'dir', self.root + os.sep + self._config.get(section, 'dir'))
e7b837a681 2010-08-25 54: else:
e7b837a681 2010-08-25 55: self._config.set(section, 'dir', self.root + os.sep + section)
e7b837a681 2010-08-25 56:
e7b837a681 2010-08-25 57: if not self._config.has_option(section, 'root'):
e7b837a681 2010-08-25 58: self._config.set(section, 'root', section)
e7b837a681 2010-08-25 59:
e7b837a681 2010-08-25 60: # function to select config file section or create one
e7b837a681 2010-08-25 61: def section(self, section):
e7b837a681 2010-08-25 62: if not self._config.has_section(section):
e7b837a681 2010-08-25 63: self._config.add_section(section)
e7b837a681 2010-08-25 64: self._section = section
e7b837a681 2010-08-25 65:
e7b837a681 2010-08-25 66: # function to get config parameter, if parameter doesn't exists the default
e7b837a681 2010-08-25 67: # value or None is substituted
e7b837a681 2010-08-25 68: def __getitem__(self, name):
e7b837a681 2010-08-25 69: if not self._config.has_option(self._section, name):
e7b837a681 2010-08-25 70: if self._section in self._default:
e7b837a681 2010-08-25 71: if name in self._default[self._section]:
e7b837a681 2010-08-25 72: self._config.set(self._section, name, self._default[self._section][name])
e7b837a681 2010-08-25 73: else:
e7b837a681 2010-08-25 74: self._config.set(self._section, name, None)
e7b837a681 2010-08-25 75: elif name in self._default['_other']:
e7b837a681 2010-08-25 76: self._config.set(self._section, name, self._default['_other'][name])
e7b837a681 2010-08-25 77: else:
e7b837a681 2010-08-25 78: self._config.set(self._section, name, None)
e7b837a681 2010-08-25 79: return(self._config.get(self._section, name))
e7b837a681 2010-08-25 80:
e7b837a681 2010-08-25 81: config = Config()
e7b837a681 2010-08-25 82:
e7b837a681 2010-08-25 83: #assert options.port or os.access(options.log, os.R_OK), 'Log file unreadable'
e7b837a681 2010-08-25 84:
601ec56da6 2011-12-19 85: const_desc_fields = set(['content-length', 'last-modified', 'pragma'])
90160dbf50 2011-03-06 86: const_ignore_fields = set([
601ec56da6 2011-12-19 87: 'accept-ranges', 'age',
601ec56da6 2011-12-19 88: 'cache-control', 'connection', 'content-type',
601ec56da6 2011-12-19 89: 'date',
601ec56da6 2011-12-19 90: 'expires',
601ec56da6 2011-12-19 91: 'referer',
601ec56da6 2011-12-19 92: 'server',
601ec56da6 2011-12-19 93: 'via',
601ec56da6 2011-12-19 94: 'x-cache', 'x-cache-lookup', 'x-livetool', 'x-powered-by',
90160dbf50 2011-03-06 95: ])
90160dbf50 2011-03-06 96:
82969b1fc2 2012-01-25 97: block_size = 8192
90160dbf50 2011-03-06 98:
601ec56da6 2011-12-19 99: class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
90160dbf50 2011-03-06 100: def __process(self):
90160dbf50 2011-03-06 101: # reload means file needs to be reloaded to serve request
90160dbf50 2011-03-06 102: reload = False
90160dbf50 2011-03-06 103: # recheck means file needs to be checked, this also means that if file hav been modified we can serve older copy
90160dbf50 2011-03-06 104: recheck = False
90160dbf50 2011-03-06 105: # file_stat means file definitely exists
90160dbf50 2011-03-06 106: file_stat = None
90160dbf50 2011-03-06 107: # requested_ranges holds data about any range requested
90160dbf50 2011-03-06 108: requested_ranges = None
90160dbf50 2011-03-06 109: # records holds data from index locally, should be written back upon successfull completion
90160dbf50 2011-03-06 110: record = None
90160dbf50 2011-03-06 111:
90160dbf50 2011-03-06 112: myPath = re.compile('^(.*?)(\?.*)$').match(self.path)
90160dbf50 2011-03-06 113: if myPath:
90160dbf50 2011-03-06 114: my_path = myPath.group(1)
90160dbf50 2011-03-06 115: else:
90160dbf50 2011-03-06 116: my_path = self.path
90160dbf50 2011-03-06 117:
601ec56da6 2011-12-19 118: config.section(self.headers['host'])
90160dbf50 2011-03-06 119:
90160dbf50 2011-03-06 120: if config['sub'] != None and config['strip'] != None and len(config['strip']) > 0:
90160dbf50 2011-03-06 121: string = re.compile(config['strip']).sub(config['sub'], my_path)
90160dbf50 2011-03-06 122: my_path = string
90160dbf50 2011-03-06 123:
90160dbf50 2011-03-06 124: info = 'Checking file: ' + my_path
90160dbf50 2011-03-06 125:
90160dbf50 2011-03-06 126: if not os.access(config['dir'], os.X_OK):
90160dbf50 2011-03-06 127: os.mkdir(config['dir'])
90160dbf50 2011-03-06 128: # this is file index - everything is stored in this file
90160dbf50 2011-03-06 129: # _parts - list of stored parts of file
90160dbf50 2011-03-06 130: # _time - last time the file was checked
90160dbf50 2011-03-06 131: # everything else is just the headers
601ec56da6 2011-12-19 132: index = bsddb.dbshelve.open(config['dir'] + os.sep + '.index')
90160dbf50 2011-03-06 133:
90160dbf50 2011-03-06 134: desc_fields = const_desc_fields.copy()
90160dbf50 2011-03-06 135: ignore_fields = const_ignore_fields.copy()
90160dbf50 2011-03-06 136: if config['noetag'] == 'no':
601ec56da6 2011-12-19 137: desc_fields.add('etag')
90160dbf50 2011-03-06 138: else:
601ec56da6 2011-12-19 139: ignore_fields.add('etag')
90160dbf50 2011-03-06 140:
90160dbf50 2011-03-06 141: proxy_ignored = set([
601ec56da6 2011-12-19 142: 'accept', 'accept-charset', 'accept-encoding', 'accept-language',
601ec56da6 2011-12-19 143: 'cache-control', 'connection', 'content-length', 'cookie',
601ec56da6 2011-12-19 144: 'host',
601ec56da6 2011-12-19 145: 'if-modified-since', 'if-unmodified-since',
601ec56da6 2011-12-19 146: 'referer',
82969b1fc2 2012-01-25 147: 'ua-cpu', 'user-agent',
601ec56da6 2011-12-19 148: 'via',
601ec56da6 2011-12-19 149: 'x-forwarded-for', 'x-last-hr', 'x-last-http-status-code', 'x-removed', 'x-real-ip', 'x-retry-count',
90160dbf50 2011-03-06 150: ])
90160dbf50 2011-03-06 151:
90160dbf50 2011-03-06 152: print('===============[ {} request ]==='.format(self.command))
90160dbf50 2011-03-06 153:
90160dbf50 2011-03-06 154: for header in self.headers:
90160dbf50 2011-03-06 155: if header in proxy_ignored:
90160dbf50 2011-03-06 156: pass
601ec56da6 2011-12-19 157: elif header in ('range'):
90160dbf50 2011-03-06 158: isRange = re.compile('bytes=(\d+)-(\d+)').match(self.headers[header])
90160dbf50 2011-03-06 159: if isRange:
90160dbf50 2011-03-06 160: requested_ranges = spacemap.SpaceMap({int(isRange.group(1)): int(isRange.group(2)) + 1})
90160dbf50 2011-03-06 161: else:
90160dbf50 2011-03-06 162: return()
601ec56da6 2011-12-19 163: elif header in ('pragma'):
90160dbf50 2011-03-06 164: if my_path in index:
90160dbf50 2011-03-06 165: index[my_path][header] = self.headers[header]
90160dbf50 2011-03-06 166: else:
90160dbf50 2011-03-06 167: print('Unknown header - ', header, ': ', self.headers[header], sep='')
90160dbf50 2011-03-06 168: return()
90160dbf50 2011-03-06 169: print(header, self.headers[header])
90160dbf50 2011-03-06 170:
90160dbf50 2011-03-06 171: # creating file name from my_path
90160dbf50 2011-03-06 172: file_name = config['dir'] + os.sep + re.compile('%20').sub(' ', my_path)
90160dbf50 2011-03-06 173: # partial file or unfinished download
90160dbf50 2011-03-06 174: temp_name = config['dir'] + os.sep + '.parts' + re.compile('%20').sub(' ', my_path)
90160dbf50 2011-03-06 175:
90160dbf50 2011-03-06 176: # creating empty placeholder in index
90160dbf50 2011-03-06 177: # if there's no space map and there's no file in real directory - we have no file
90160dbf50 2011-03-06 178: # if there's an empty space map - file is full
90160dbf50 2011-03-06 179: # space map generally covers every bit of file we don't posess currently
90160dbf50 2011-03-06 180: if not my_path in index:
90160dbf50 2011-03-06 181: info += '\nThis one is new.'
90160dbf50 2011-03-06 182: reload = True
90160dbf50 2011-03-06 183: record = {}
90160dbf50 2011-03-06 184: else:
90160dbf50 2011-03-06 185: # forcibly checking file if no file present
90160dbf50 2011-03-06 186: record = index[my_path]
90160dbf50 2011-03-06 187: if os.access(file_name, os.R_OK):
90160dbf50 2011-03-06 188: info += '\nFull file found.'
90160dbf50 2011-03-06 189: file_stat = os.stat(file_name)
90160dbf50 2011-03-06 190: elif '_parts' in index[my_path] and os.access(temp_name, os.R_OK):
90160dbf50 2011-03-06 191: info += '\nPartial file found.'
90160dbf50 2011-03-06 192: file_stat = os.stat(temp_name)
d1fa9d0737 2012-01-16 193: recheck = True
90160dbf50 2011-03-06 194: else:
90160dbf50 2011-03-06 195: info += '\nFile not found or inaccessible.'
90160dbf50 2011-03-06 196: record['_parts'] = None
90160dbf50 2011-03-06 197: reload = True
90160dbf50 2011-03-06 198:
90160dbf50 2011-03-06 199: if not '_parts' in record:
90160dbf50 2011-03-06 200: record['_parts'] = None
90160dbf50 2011-03-06 201:
90160dbf50 2011-03-06 202: if record['_parts'] == None:
90160dbf50 2011-03-06 203: recheck = True
90160dbf50 2011-03-06 204:
90160dbf50 2011-03-06 205: # forcibly checking file if file size doesn't match with index data
90160dbf50 2011-03-06 206: if not reload:
90160dbf50 2011-03-06 207: if '_parts' in record and record['_parts'] == spacemap.SpaceMap():
601ec56da6 2011-12-19 208: if 'content-length' in record and file_stat and file_stat.st_size != int(record['content-length']):
601ec56da6 2011-12-19 209: info += '\nFile size is {} and stored file size is {}.'.format(file_stat.st_size, record['content-length'])
90160dbf50 2011-03-06 210: record['_parts'] = None
90160dbf50 2011-03-06 211: reload = True
90160dbf50 2011-03-06 212:
90160dbf50 2011-03-06 213: # forcibly checking file if index holds Pragma header
601ec56da6 2011-12-19 214: if not reload and 'pragma' in record and record['pragma'] == 'no-cache':
90160dbf50 2011-03-06 215: info +='\nPragma on: recheck imminent.'
90160dbf50 2011-03-06 216: recheck = True
90160dbf50 2011-03-06 217:
90160dbf50 2011-03-06 218: # skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago
8425e2e393 2011-12-14 219: if not recheck and not reload and '_time' in record and (record['_time'] - datetime.datetime.now() + datetime.timedelta(hours = 4)).days < 0:
8425e2e393 2011-12-14 220: info += '\nFile is old - rechecking.'
90160dbf50 2011-03-06 221: recheck = True
90160dbf50 2011-03-06 222:
90160dbf50 2011-03-06 223: print(info)
90160dbf50 2011-03-06 224: if reload or recheck:
90160dbf50 2011-03-06 225:
90160dbf50 2011-03-06 226: try:
82969b1fc2 2012-01-25 227: request = config['proto'] + '://' + config['root'] + self.path
90160dbf50 2011-03-06 228: my_headers = {}
601ec56da6 2011-12-19 229: for header in ('cache-control', 'cookie', 'referer', 'user-agent'):
90160dbf50 2011-03-06 230: if header in self.headers:
90160dbf50 2011-03-06 231: my_headers[header] = self.headers[header]
90160dbf50 2011-03-06 232:
90160dbf50 2011-03-06 233: needed = None
b5c328f916 2012-01-04 234: if self.command not in ('HEAD'):
b5c328f916 2012-01-04 235: if '_parts' in record and record['_parts'] != None:
b5c328f916 2012-01-04 236: if config['noparts'] != 'no' or requested_ranges == None or requested_ranges == spacemap.SpaceMap():
b5c328f916 2012-01-04 237: needed = record['_parts']
b5c328f916 2012-01-04 238: else:
b5c328f916 2012-01-04 239: needed = record['_parts'] & requested_ranges
b5c328f916 2012-01-04 240: elif config['noparts'] =='no' and requested_ranges != None and requested_ranges != spacemap.SpaceMap():
b5c328f916 2012-01-04 241: needed = requested_ranges
b5c328f916 2012-01-04 242: ranges = ()
b5c328f916 2012-01-04 243: print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed))
b5c328f916 2012-01-04 244: if needed != None and len(needed) > 0:
b5c328f916 2012-01-04 245: needed.rewind()
b5c328f916 2012-01-04 246: while True:
b5c328f916 2012-01-04 247: range = needed.pop()
b5c328f916 2012-01-04 248: if range[0] == None:
b5c328f916 2012-01-04 249: break
b5c328f916 2012-01-04 250: ranges += '{}-{}'.format(range[0], range[1] - 1),
b5c328f916 2012-01-04 251: my_headers['range'] = 'bytes=' + ','.join(ranges)
b5c328f916 2012-01-04 252:
82969b1fc2 2012-01-25 253: my_headers['Accept-Encoding'] = 'gzip, compress, deflate, identity; q=0'
601ec56da6 2011-12-19 254: request = urllib2.Request(request, headers = my_headers)
601ec56da6 2011-12-19 255:
a81f1a70fb 2012-01-16 256: source = urllib2.urlopen(request, timeout = 60)
601ec56da6 2011-12-19 257: new_record = {}
601ec56da6 2011-12-19 258: new_record['_parts'] = record['_parts']
601ec56da6 2011-12-19 259: headers = source.info()
62e6d8a7ab 2012-01-16 260:
62e6d8a7ab 2012-01-16 261: if 'content-encoding' in headers and headers['content-encoding'] == 'gzip':
a81f1a70fb 2012-01-16 262: import gzip
a81f1a70fb 2012-01-16 263: source = gzip.GzipFile(fileobj=source)
601ec56da6 2011-12-19 264:
601ec56da6 2011-12-19 265: # stripping unneeded headers (XXX make this inplace?)
601ec56da6 2011-12-19 266: for header in headers:
601ec56da6 2011-12-19 267: if header in desc_fields:
601ec56da6 2011-12-19 268: #if header == 'Pragma' and headers[header] != 'no-cache':
601ec56da6 2011-12-19 269: if header == 'content-length':
601ec56da6 2011-12-19 270: if 'content-range' not in headers:
601ec56da6 2011-12-19 271: new_record[header] = int(headers[header])
601ec56da6 2011-12-19 272: else:
601ec56da6 2011-12-19 273: new_record[header] = headers[header]
601ec56da6 2011-12-19 274: elif header == 'content-range':
601ec56da6 2011-12-19 275: range = re.compile('^bytes (\d+)-(\d+)/(\d+)$').match(headers[header])
601ec56da6 2011-12-19 276: if range:
601ec56da6 2011-12-19 277: new_record['content-length'] = int(range.group(3))
601ec56da6 2011-12-19 278: else:
601ec56da6 2011-12-19 279: assert False, 'Content-Range unrecognized.'
601ec56da6 2011-12-19 280: elif not header in ignore_fields:
601ec56da6 2011-12-19 281: print('Undefined header "', header, '": ', headers[header], sep='')
601ec56da6 2011-12-19 282:
601ec56da6 2011-12-19 283: # comparing headers with data found in index
601ec56da6 2011-12-19 284: # if any header has changed (except Pragma) file is fully downloaded
601ec56da6 2011-12-19 285: # same if we get more or less headers
601ec56da6 2011-12-19 286: old_keys = set(record.keys())
601ec56da6 2011-12-19 287: old_keys.discard('_time')
601ec56da6 2011-12-19 288: old_keys.discard('pragma')
601ec56da6 2011-12-19 289: more_keys = set(new_record.keys()) - old_keys
601ec56da6 2011-12-19 290: more_keys.discard('pragma')
601ec56da6 2011-12-19 291: less_keys = old_keys - set(new_record.keys())
601ec56da6 2011-12-19 292: if len(more_keys) > 0:
601ec56da6 2011-12-19 293: if len(old_keys) != 0:
601ec56da6 2011-12-19 294: print('More headers appear:', more_keys)
601ec56da6 2011-12-19 295: reload = True
601ec56da6 2011-12-19 296: elif len(less_keys) > 0:
601ec56da6 2011-12-19 297: print('Less headers appear:', less_keys)
601ec56da6 2011-12-19 298: else:
601ec56da6 2011-12-19 299: for key in record.keys():
601ec56da6 2011-12-19 300: if key[0] != '_' and key != 'pragma' and record[key] != new_record[key]:
601ec56da6 2011-12-19 301: print('Header "', key, '" changed from [', record[key], '] to [', new_record[key], ']', sep='')
601ec56da6 2011-12-19 302: print(type(record[key]), type(new_record[key]))
601ec56da6 2011-12-19 303: reload = True
601ec56da6 2011-12-19 304:
601ec56da6 2011-12-19 305: if reload:
601ec56da6 2011-12-19 306: print('Reloading.')
601ec56da6 2011-12-19 307: if os.access(temp_name, os.R_OK):
601ec56da6 2011-12-19 308: os.unlink(temp_name)
601ec56da6 2011-12-19 309: if os.access(file_name, os.R_OK):
601ec56da6 2011-12-19 310: os.unlink(file_name)
601ec56da6 2011-12-19 311: if 'content-length' in new_record:
601ec56da6 2011-12-19 312: new_record['_parts'] = spacemap.SpaceMap({0: int(new_record['content-length'])})
601ec56da6 2011-12-19 313: if not new_record['_parts']:
601ec56da6 2011-12-19 314: new_record['_parts'] = spacemap.SpaceMap()
601ec56da6 2011-12-19 315: print(new_record)
601ec56da6 2011-12-19 316:
601ec56da6 2011-12-19 317: # downloading file or segment
601ec56da6 2011-12-19 318: if 'content-length' in new_record:
601ec56da6 2011-12-19 319: if needed == None:
601ec56da6 2011-12-19 320: needed = new_record['_parts']
601ec56da6 2011-12-19 321: else:
601ec56da6 2011-12-19 322: if len(needed) > 1:
601ec56da6 2011-12-19 323: print("Multipart requests currently not supported.")
601ec56da6 2011-12-19 324: assert False, 'Skip this one for now.'
601ec56da6 2011-12-19 325: #else:
601ec56da6 2011-12-19 326: #assert False, 'No content-length or Content-Range header.'
601ec56da6 2011-12-19 327:
601ec56da6 2011-12-19 328: new_record['_time'] = datetime.datetime.now()
601ec56da6 2011-12-19 329: if self.command not in ('HEAD'):
601ec56da6 2011-12-19 330: # file is created at temporary location and moved in place only when download completes
601ec56da6 2011-12-19 331: if not os.access(temp_name, os.R_OK):
601ec56da6 2011-12-19 332: empty_name = config['dir'] + os.sep + '.tmp'
601ec56da6 2011-12-19 333: with open(empty_name, 'w+b') as some_file:
601ec56da6 2011-12-19 334: pass
601ec56da6 2011-12-19 335: os.renames(empty_name, temp_name)
601ec56da6 2011-12-19 336: temp_file = open(temp_name, 'r+b')
601ec56da6 2011-12-19 337: if requested_ranges == None and needed == None:
601ec56da6 2011-12-19 338: needed = new_record['_parts']
601ec56da6 2011-12-19 339: needed.rewind()
601ec56da6 2011-12-19 340: while True:
62e6d8a7ab 2012-01-16 341: # XXX can make this implicit - one request per range
601ec56da6 2011-12-19 342: (start, end) = needed.pop()
601ec56da6 2011-12-19 343: if start == None:
601ec56da6 2011-12-19 344: break
601ec56da6 2011-12-19 345: stream_last = start
601ec56da6 2011-12-19 346: old_record = copy.copy(new_record)
601ec56da6 2011-12-19 347: if end - start < block_size:
601ec56da6 2011-12-19 348: req_block_size = end - start
601ec56da6 2011-12-19 349: else:
601ec56da6 2011-12-19 350: req_block_size = block_size
601ec56da6 2011-12-19 351: buffer = source.read(req_block_size)
601ec56da6 2011-12-19 352: length = len(buffer)
601ec56da6 2011-12-19 353: while length > 0 and stream_last < end:
601ec56da6 2011-12-19 354: stream_pos = stream_last + length
601ec56da6 2011-12-19 355: assert stream_pos <= end, 'Received more data then requested: pos:{} start:{} end:{}.'.format(stream_pos, start, end)
601ec56da6 2011-12-19 356: temp_file.seek(stream_last)
601ec56da6 2011-12-19 357: temp_file.write(buffer)
601ec56da6 2011-12-19 358: x = new_record['_parts'] - spacemap.SpaceMap({stream_last: stream_pos})
601ec56da6 2011-12-19 359: new_record['_parts'] = new_record['_parts'] - spacemap.SpaceMap({stream_last: stream_pos})
601ec56da6 2011-12-19 360: index[my_path] = old_record
601ec56da6 2011-12-19 361: index.sync()
601ec56da6 2011-12-19 362: old_record = copy.copy(new_record)
601ec56da6 2011-12-19 363: stream_last = stream_pos
601ec56da6 2011-12-19 364: if end - stream_last < block_size:
601ec56da6 2011-12-19 365: req_block_size = end - stream_last
601ec56da6 2011-12-19 366: buffer = source.read(req_block_size)
601ec56da6 2011-12-19 367: length = len(buffer)
601ec56da6 2011-12-19 368: # moving downloaded data to real file
601ec56da6 2011-12-19 369: temp_file.close()
601ec56da6 2011-12-19 370:
601ec56da6 2011-12-19 371: index[my_path] = new_record
601ec56da6 2011-12-19 372: index.sync()
601ec56da6 2011-12-19 373:
601ec56da6 2011-12-19 374: except urllib2.HTTPError as error:
90160dbf50 2011-03-06 375: # in case of error we don't need to do anything actually,
90160dbf50 2011-03-06 376: # if file download stalls or fails the file would not be moved to it's location
90160dbf50 2011-03-06 377: print(error)
90160dbf50 2011-03-06 378:
90160dbf50 2011-03-06 379: print(index[my_path])
90160dbf50 2011-03-06 380:
90160dbf50 2011-03-06 381: if not os.access(file_name, os.R_OK) and os.access(temp_name, os.R_OK) and '_parts' in index[my_path] and index[my_path]['_parts'] == spacemap.SpaceMap():
90160dbf50 2011-03-06 382: # just moving
90160dbf50 2011-03-06 383: # drop old dirs XXX
90160dbf50 2011-03-06 384: print('Moving temporary file to new destination.')
90160dbf50 2011-03-06 385: os.renames(temp_name, file_name)
90160dbf50 2011-03-06 386:
90160dbf50 2011-03-06 387: if not my_path in index:
90160dbf50 2011-03-06 388: self.send_response(502)
90160dbf50 2011-03-06 389: self.end_headers()
90160dbf50 2011-03-06 390: return
90160dbf50 2011-03-06 391:
90160dbf50 2011-03-06 392: if self.command == 'HEAD':
90160dbf50 2011-03-06 393: self.send_response(200)
601ec56da6 2011-12-19 394: if 'content-length' in index[my_path]:
601ec56da6 2011-12-19 395: self.send_header('content-length', index[my_path]['content-length'])
601ec56da6 2011-12-19 396: self.send_header('accept-ranges', 'bytes')
601ec56da6 2011-12-19 397: self.send_header('content-type', 'application/octet-stream')
601ec56da6 2011-12-19 398: if 'last-modified' in index[my_path]:
601ec56da6 2011-12-19 399: self.send_header('last-modified', index[my_path]['last-modified'])
90160dbf50 2011-03-06 400: self.end_headers()
90160dbf50 2011-03-06 401: else:
90160dbf50 2011-03-06 402: if ('_parts' in index[my_path] and index[my_path]['_parts'] != spacemap.SpaceMap()) or not os.access(file_name, os.R_OK):
90160dbf50 2011-03-06 403: file_name = temp_name
90160dbf50 2011-03-06 404:
90160dbf50 2011-03-06 405: with open(file_name, 'rb') as real_file:
90160dbf50 2011-03-06 406: file_stat = os.stat(file_name)
601ec56da6 2011-12-19 407: if 'range' in self.headers:
90160dbf50 2011-03-06 408: self.send_response(206)
90160dbf50 2011-03-06 409: ranges = ()
90160dbf50 2011-03-06 410: requested_ranges.rewind()
90160dbf50 2011-03-06 411: while True:
90160dbf50 2011-03-06 412: pair = requested_ranges.pop()
90160dbf50 2011-03-06 413: if pair[0] == None:
90160dbf50 2011-03-06 414: break
90160dbf50 2011-03-06 415: ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)),
601ec56da6 2011-12-19 416: self.send_header('content-range', 'bytes {}/{}'.format(','.join(ranges), index[my_path]['content-length']))
90160dbf50 2011-03-06 417: else:
90160dbf50 2011-03-06 418: self.send_response(200)
601ec56da6 2011-12-19 419: self.send_header('content-length', str(file_stat.st_size))
90160dbf50 2011-03-06 420: requested_ranges = spacemap.SpaceMap({0: file_stat.st_size})
601ec56da6 2011-12-19 421: if 'last-modified' in index[my_path]:
601ec56da6 2011-12-19 422: self.send_header('last-modified', index[my_path]['last-modified'])
601ec56da6 2011-12-19 423: self.send_header('content-type', 'application/octet-stream')
90160dbf50 2011-03-06 424: self.end_headers()
90160dbf50 2011-03-06 425: if self.command in ('GET'):
90160dbf50 2011-03-06 426: if len(requested_ranges) > 0:
90160dbf50 2011-03-06 427: requested_ranges.rewind()
90160dbf50 2011-03-06 428: (start, end) = requested_ranges.pop()
90160dbf50 2011-03-06 429: else:
90160dbf50 2011-03-06 430: start = 0
9a8a46bcf0 2011-09-06 431: # XXX ugly hack
601ec56da6 2011-12-19 432: if 'content-length' in index[my_path]:
601ec56da6 2011-12-19 433: end = index[my_path]['content-length']
9a8a46bcf0 2011-09-06 434: else:
9a8a46bcf0 2011-09-06 435: end = 0
90160dbf50 2011-03-06 436: real_file.seek(start)
90160dbf50 2011-03-06 437: if block_size > end - start:
90160dbf50 2011-03-06 438: req_block_size = end - start
90160dbf50 2011-03-06 439: else:
90160dbf50 2011-03-06 440: req_block_size = block_size
90160dbf50 2011-03-06 441: buffer = real_file.read(req_block_size)
90160dbf50 2011-03-06 442: length = len(buffer)
90160dbf50 2011-03-06 443: while length > 0:
90160dbf50 2011-03-06 444: self.wfile.write(buffer)
90160dbf50 2011-03-06 445: start += len(buffer)
90160dbf50 2011-03-06 446: if req_block_size > end - start:
90160dbf50 2011-03-06 447: req_block_size = end - start
90160dbf50 2011-03-06 448: if req_block_size == 0:
90160dbf50 2011-03-06 449: break
90160dbf50 2011-03-06 450: buffer = real_file.read(req_block_size)
90160dbf50 2011-03-06 451: length = len(buffer)
90160dbf50 2011-03-06 452:
90160dbf50 2011-03-06 453: def do_HEAD(self):
90160dbf50 2011-03-06 454: return self.__process()
90160dbf50 2011-03-06 455: def do_GET(self):
90160dbf50 2011-03-06 456: return self.__process()
90160dbf50 2011-03-06 457:
90160dbf50 2011-03-06 458: config.section('general')
601ec56da6 2011-12-19 459: server = BaseHTTPServer.HTTPServer(('127.0.0.1', int(config['port'])), MyRequestHandler)
90160dbf50 2011-03-06 460: server.serve_forever()
82969b1fc2 2012-01-25 461:
82969b1fc2 2012-01-25 462: #gevent.joinall()