Samesite - proxy that can cache partial transfers

Annotation For samesite.py
anonymous

Annotation For samesite.py

Origin for each line in samesite.py from check-in d1fa9d0737:

601ec56da6 2011-12-19    1: #!/usr/bin/env python
601ec56da6 2011-12-19    2: 
601ec56da6 2011-12-19    3: from __future__ import unicode_literals, print_function
e7b837a681 2010-08-25    4: 
601ec56da6 2011-12-19    5: import bsddb.dbshelve, copy, datetime, os, BaseHTTPServer, sys, spacemap, re, urllib2
e7b837a681 2010-08-25    6: 
e7b837a681 2010-08-25    7: class Config:
e7b837a681 2010-08-25    8: 	__slots__ = frozenset(['_config', '_default', '_section', 'options', 'root'])
e7b837a681 2010-08-25    9: 	_default = {
e7b837a681 2010-08-25   10: 		'general': {
e7b837a681 2010-08-25   11: 			'port': '8008',
e7b837a681 2010-08-25   12: 		},
e7b837a681 2010-08-25   13: 		'_other': {
e7b837a681 2010-08-25   14: 			'verbose': 'no',
e7b837a681 2010-08-25   15: 			'noetag': 'no',
e7b837a681 2010-08-25   16: 			'noparts': 'no',
cab908195f 2010-09-06   17: 			'strip': '',
cab908195f 2010-09-06   18: 			'sub': '',
e7b837a681 2010-08-25   19: 	},}
e7b837a681 2010-08-25   20: 
e7b837a681 2010-08-25   21: 	# function to read in config file
e7b837a681 2010-08-25   22: 	def __init__(self):
601ec56da6 2011-12-19   23: 		import ConfigParser, optparse
e7b837a681 2010-08-25   24: 
e7b837a681 2010-08-25   25: 		parser = optparse.OptionParser()
e7b837a681 2010-08-25   26: 		parser.add_option('-c', '--config', dest = 'config', help = 'config file location', metavar = 'FILE', default = 'samesite.conf')
e7b837a681 2010-08-25   27: 		(self.options, args) = parser.parse_args()
e7b837a681 2010-08-25   28: 
e7b837a681 2010-08-25   29: 		assert os.access(self.options.config, os.R_OK), "Fatal error: can't read {}".format(self.options.config)
e7b837a681 2010-08-25   30: 
e7b837a681 2010-08-25   31: 		configDir = re.compile('^(.*)/[^/]+$').match(self.options.config)
e7b837a681 2010-08-25   32: 		if configDir:
e7b837a681 2010-08-25   33: 			self.root = configDir.group(1)
e7b837a681 2010-08-25   34: 		else:
e7b837a681 2010-08-25   35: 			self.root = os.getcwd()
e7b837a681 2010-08-25   36: 
601ec56da6 2011-12-19   37: 		self._config = ConfigParser.ConfigParser()
e7b837a681 2010-08-25   38: 		self._config.readfp(open(self.options.config))
e7b837a681 2010-08-25   39: 
e7b837a681 2010-08-25   40: 		for section in self._config.sections():
e7b837a681 2010-08-25   41: 			if section != 'general':
e7b837a681 2010-08-25   42: 				if self._config.has_option(section, 'dir'):
e7b837a681 2010-08-25   43: 					if re.compile('^/$').match(self._config.get(section, 'dir')):
e7b837a681 2010-08-25   44: 						self._config.set(section, 'dir', self.root + os.sep + section)
e7b837a681 2010-08-25   45: 					thisDir = re.compile('^(.*)/$').match(self._config.get(section, 'dir'))
e7b837a681 2010-08-25   46: 					if thisDir:
e7b837a681 2010-08-25   47: 						self._config.set(section, 'dir', thisDir.group(1))
e7b837a681 2010-08-25   48: 					if not re.compile('^/(.*)$').match(self._config.get(section, 'dir')):
e7b837a681 2010-08-25   49: 						self._config.set(section, 'dir', self.root + os.sep + self._config.get(section, 'dir'))
e7b837a681 2010-08-25   50: 				else:
e7b837a681 2010-08-25   51: 					self._config.set(section, 'dir', self.root + os.sep + section)
e7b837a681 2010-08-25   52: 
e7b837a681 2010-08-25   53: 				if not self._config.has_option(section, 'root'):
e7b837a681 2010-08-25   54: 					self._config.set(section, 'root', section)
e7b837a681 2010-08-25   55: 
e7b837a681 2010-08-25   56: 	# function to select config file section or create one
e7b837a681 2010-08-25   57: 	def section(self, section):
e7b837a681 2010-08-25   58: 		if not self._config.has_section(section):
e7b837a681 2010-08-25   59: 			self._config.add_section(section)
e7b837a681 2010-08-25   60: 		self._section = section
e7b837a681 2010-08-25   61: 
e7b837a681 2010-08-25   62: 	# function to get config parameter, if parameter doesn't exists the default
e7b837a681 2010-08-25   63: 	# value or None is substituted
e7b837a681 2010-08-25   64: 	def __getitem__(self, name):
e7b837a681 2010-08-25   65: 		if not self._config.has_option(self._section, name):
e7b837a681 2010-08-25   66: 			if self._section in self._default:
e7b837a681 2010-08-25   67: 				if name in self._default[self._section]:
e7b837a681 2010-08-25   68: 					self._config.set(self._section, name, self._default[self._section][name])
e7b837a681 2010-08-25   69: 				else:
e7b837a681 2010-08-25   70: 					self._config.set(self._section, name, None)
e7b837a681 2010-08-25   71: 			elif name in self._default['_other']:
e7b837a681 2010-08-25   72: 				self._config.set(self._section, name, self._default['_other'][name])
e7b837a681 2010-08-25   73: 			else:
e7b837a681 2010-08-25   74: 				self._config.set(self._section, name, None)
e7b837a681 2010-08-25   75: 		return(self._config.get(self._section, name))
e7b837a681 2010-08-25   76: 
e7b837a681 2010-08-25   77: config = Config()
e7b837a681 2010-08-25   78: 
e7b837a681 2010-08-25   79: #assert options.port or os.access(options.log, os.R_OK), 'Log file unreadable'
e7b837a681 2010-08-25   80: 
601ec56da6 2011-12-19   81: const_desc_fields = set(['content-length', 'last-modified', 'pragma'])
cab908195f 2010-09-06   82: const_ignore_fields = set([
601ec56da6 2011-12-19   83: 	'accept-ranges', 'age',
601ec56da6 2011-12-19   84: 	'cache-control', 'connection', 'content-type',
601ec56da6 2011-12-19   85: 	'date',
601ec56da6 2011-12-19   86: 	'expires',
601ec56da6 2011-12-19   87: 	'referer',
601ec56da6 2011-12-19   88: 	'server',
601ec56da6 2011-12-19   89: 	'via',
601ec56da6 2011-12-19   90: 	'x-cache', 'x-cache-lookup', 'x-livetool', 'x-powered-by',
cab908195f 2010-09-06   91: ])
cab908195f 2010-09-06   92: 
cab908195f 2010-09-06   93: block_size = 4096
cab908195f 2010-09-06   94: 
601ec56da6 2011-12-19   95: class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
90160dbf50 2011-03-06   96: 	def __process(self):
90160dbf50 2011-03-06   97: 		# reload means file needs to be reloaded to serve request
90160dbf50 2011-03-06   98: 		reload = False
90160dbf50 2011-03-06   99: 		# recheck means file needs to be checked, this also means that if file hav been modified we can serve older copy
90160dbf50 2011-03-06  100: 		recheck = False
90160dbf50 2011-03-06  101: 		# file_stat means file definitely exists
90160dbf50 2011-03-06  102: 		file_stat = None
90160dbf50 2011-03-06  103: 		# requested_ranges holds data about any range requested
90160dbf50 2011-03-06  104: 		requested_ranges = None
90160dbf50 2011-03-06  105: 		# records holds data from index locally, should be written back upon successfull completion
90160dbf50 2011-03-06  106: 		record = None
90160dbf50 2011-03-06  107: 
90160dbf50 2011-03-06  108: 		myPath = re.compile('^(.*?)(\?.*)$').match(self.path)
90160dbf50 2011-03-06  109: 		if myPath:
90160dbf50 2011-03-06  110: 			my_path = myPath.group(1)
90160dbf50 2011-03-06  111: 		else:
90160dbf50 2011-03-06  112: 			my_path = self.path
90160dbf50 2011-03-06  113: 
601ec56da6 2011-12-19  114: 		config.section(self.headers['host'])
90160dbf50 2011-03-06  115: 
90160dbf50 2011-03-06  116: 		if config['sub'] != None and config['strip'] != None and len(config['strip']) > 0:
90160dbf50 2011-03-06  117: 			string = re.compile(config['strip']).sub(config['sub'], my_path)
90160dbf50 2011-03-06  118: 			my_path = string
90160dbf50 2011-03-06  119: 
90160dbf50 2011-03-06  120: 		info = 'Checking file: ' + my_path
90160dbf50 2011-03-06  121: 
90160dbf50 2011-03-06  122: 		if not os.access(config['dir'], os.X_OK):
90160dbf50 2011-03-06  123: 			os.mkdir(config['dir'])
90160dbf50 2011-03-06  124: 		# this is file index - everything is stored in this file
90160dbf50 2011-03-06  125: 		# _parts - list of stored parts of file
90160dbf50 2011-03-06  126: 		# _time - last time the file was checked
90160dbf50 2011-03-06  127: 		# everything else is just the headers
601ec56da6 2011-12-19  128: 		index = bsddb.dbshelve.open(config['dir'] + os.sep + '.index')
90160dbf50 2011-03-06  129: 
90160dbf50 2011-03-06  130: 		desc_fields = const_desc_fields.copy()
90160dbf50 2011-03-06  131: 		ignore_fields = const_ignore_fields.copy()
90160dbf50 2011-03-06  132: 		if config['noetag'] == 'no':
601ec56da6 2011-12-19  133: 			desc_fields.add('etag')
90160dbf50 2011-03-06  134: 		else:
601ec56da6 2011-12-19  135: 			ignore_fields.add('etag')
90160dbf50 2011-03-06  136: 
90160dbf50 2011-03-06  137: 		proxy_ignored = set([
601ec56da6 2011-12-19  138: 			'accept', 'accept-charset', 'accept-encoding', 'accept-language',
601ec56da6 2011-12-19  139: 			'cache-control', 'connection', 'content-length', 'cookie',
601ec56da6 2011-12-19  140: 			'host',
601ec56da6 2011-12-19  141: 			'if-modified-since', 'if-unmodified-since',
601ec56da6 2011-12-19  142: 			'referer',
601ec56da6 2011-12-19  143: 			'user-agent',
601ec56da6 2011-12-19  144: 			'via',
601ec56da6 2011-12-19  145: 			'x-forwarded-for', 'x-last-hr', 'x-last-http-status-code', 'x-removed', 'x-real-ip', 'x-retry-count',
90160dbf50 2011-03-06  146: 		])
90160dbf50 2011-03-06  147: 
90160dbf50 2011-03-06  148: 		print('===============[ {} request ]==='.format(self.command))
90160dbf50 2011-03-06  149: 
90160dbf50 2011-03-06  150: 		for header in self.headers:
90160dbf50 2011-03-06  151: 			if header in proxy_ignored:
90160dbf50 2011-03-06  152: 				pass
601ec56da6 2011-12-19  153: 			elif header in ('range'):
90160dbf50 2011-03-06  154: 				isRange = re.compile('bytes=(\d+)-(\d+)').match(self.headers[header])
90160dbf50 2011-03-06  155: 				if isRange:
90160dbf50 2011-03-06  156: 					requested_ranges = spacemap.SpaceMap({int(isRange.group(1)): int(isRange.group(2)) + 1})
90160dbf50 2011-03-06  157: 				else:
90160dbf50 2011-03-06  158: 					return()
601ec56da6 2011-12-19  159: 			elif header in ('pragma'):
90160dbf50 2011-03-06  160: 				if my_path in index:
90160dbf50 2011-03-06  161: 					index[my_path][header] = self.headers[header]
90160dbf50 2011-03-06  162: 			else:
90160dbf50 2011-03-06  163: 				print('Unknown header - ', header, ': ', self.headers[header], sep='')
90160dbf50 2011-03-06  164: 				return()
90160dbf50 2011-03-06  165: 			print(header, self.headers[header])
90160dbf50 2011-03-06  166: 
90160dbf50 2011-03-06  167: 		# creating file name from my_path
90160dbf50 2011-03-06  168: 		file_name = config['dir'] + os.sep + re.compile('%20').sub(' ', my_path)
90160dbf50 2011-03-06  169: 		# partial file or unfinished download
90160dbf50 2011-03-06  170: 		temp_name = config['dir'] + os.sep + '.parts' + re.compile('%20').sub(' ', my_path)
90160dbf50 2011-03-06  171: 
90160dbf50 2011-03-06  172: 		# creating empty placeholder in index
90160dbf50 2011-03-06  173: 		# if there's no space map and there's no file in real directory - we have no file
90160dbf50 2011-03-06  174: 		# if there's an empty space map - file is full
90160dbf50 2011-03-06  175: 		# space map generally covers every bit of file we don't posess currently
90160dbf50 2011-03-06  176: 		if not my_path in index:
90160dbf50 2011-03-06  177: 			info += '\nThis one is new.'
90160dbf50 2011-03-06  178: 			reload = True
90160dbf50 2011-03-06  179: 			record = {}
90160dbf50 2011-03-06  180: 		else:
90160dbf50 2011-03-06  181: 			# forcibly checking file if no file present
90160dbf50 2011-03-06  182: 			record = index[my_path]
90160dbf50 2011-03-06  183: 			if os.access(file_name, os.R_OK):
90160dbf50 2011-03-06  184: 				info += '\nFull file found.'
90160dbf50 2011-03-06  185: 				file_stat = os.stat(file_name)
90160dbf50 2011-03-06  186: 			elif '_parts' in index[my_path] and os.access(temp_name, os.R_OK):
90160dbf50 2011-03-06  187: 				info += '\nPartial file found.'
90160dbf50 2011-03-06  188: 				file_stat = os.stat(temp_name)
d1fa9d0737 2012-01-16  189: 				recheck = True
90160dbf50 2011-03-06  190: 			else:
90160dbf50 2011-03-06  191: 				info += '\nFile not found or inaccessible.'
90160dbf50 2011-03-06  192: 				record['_parts'] = None
90160dbf50 2011-03-06  193: 				reload = True
90160dbf50 2011-03-06  194: 
90160dbf50 2011-03-06  195: 		if not '_parts' in record:
90160dbf50 2011-03-06  196: 			record['_parts'] = None
90160dbf50 2011-03-06  197: 
90160dbf50 2011-03-06  198: 		if record['_parts'] == None:
90160dbf50 2011-03-06  199: 			recheck = True
90160dbf50 2011-03-06  200: 
90160dbf50 2011-03-06  201: 		# forcibly checking file if file size doesn't match with index data
90160dbf50 2011-03-06  202: 		if not reload:
90160dbf50 2011-03-06  203: 			if '_parts' in record and record['_parts'] == spacemap.SpaceMap():
601ec56da6 2011-12-19  204: 				if 'content-length' in record and file_stat and file_stat.st_size != int(record['content-length']):
601ec56da6 2011-12-19  205: 					info += '\nFile size is {} and stored file size is {}.'.format(file_stat.st_size, record['content-length'])
90160dbf50 2011-03-06  206: 					record['_parts'] = None
90160dbf50 2011-03-06  207: 					reload = True
90160dbf50 2011-03-06  208: 
90160dbf50 2011-03-06  209: 		# forcibly checking file if index holds Pragma header
601ec56da6 2011-12-19  210: 		if not reload and 'pragma' in record and record['pragma'] == 'no-cache':
90160dbf50 2011-03-06  211: 			info +='\nPragma on: recheck imminent.'
90160dbf50 2011-03-06  212: 			recheck = True
90160dbf50 2011-03-06  213: 
90160dbf50 2011-03-06  214: 		# skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago
8425e2e393 2011-12-14  215: 		if not recheck and not reload and '_time' in record and (record['_time'] - datetime.datetime.now() + datetime.timedelta(hours = 4)).days < 0:
8425e2e393 2011-12-14  216: 			info += '\nFile is old - rechecking.'
90160dbf50 2011-03-06  217: 			recheck = True
90160dbf50 2011-03-06  218: 
90160dbf50 2011-03-06  219: 		print(info)
90160dbf50 2011-03-06  220: 		if reload or recheck:
90160dbf50 2011-03-06  221: 
90160dbf50 2011-03-06  222: 			try:
90160dbf50 2011-03-06  223: 				request = 'http://' + config['root'] + self.path
90160dbf50 2011-03-06  224: 				my_headers = {}
601ec56da6 2011-12-19  225: 				for header in ('cache-control', 'cookie', 'referer', 'user-agent'):
90160dbf50 2011-03-06  226: 					if header in self.headers:
90160dbf50 2011-03-06  227: 						my_headers[header] = self.headers[header]
90160dbf50 2011-03-06  228: 
90160dbf50 2011-03-06  229: 				needed = None
b5c328f916 2012-01-04  230: 				if self.command not in ('HEAD'):
b5c328f916 2012-01-04  231: 					if '_parts' in record and record['_parts'] != None:
b5c328f916 2012-01-04  232: 						if config['noparts'] != 'no' or requested_ranges == None or requested_ranges == spacemap.SpaceMap():
b5c328f916 2012-01-04  233: 							needed = record['_parts']
b5c328f916 2012-01-04  234: 						else:
b5c328f916 2012-01-04  235: 							needed = record['_parts'] & requested_ranges
b5c328f916 2012-01-04  236: 					elif config['noparts'] =='no' and requested_ranges != None and requested_ranges != spacemap.SpaceMap():
b5c328f916 2012-01-04  237: 						needed = requested_ranges
b5c328f916 2012-01-04  238: 					ranges = ()
b5c328f916 2012-01-04  239: 					print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed))
b5c328f916 2012-01-04  240: 					if needed != None and len(needed) > 0:
b5c328f916 2012-01-04  241: 						needed.rewind()
b5c328f916 2012-01-04  242: 						while True:
b5c328f916 2012-01-04  243: 							range = needed.pop()
b5c328f916 2012-01-04  244: 							if range[0] == None:
b5c328f916 2012-01-04  245: 								break
b5c328f916 2012-01-04  246: 							ranges += '{}-{}'.format(range[0], range[1] - 1),
b5c328f916 2012-01-04  247: 						my_headers['range'] = 'bytes=' + ','.join(ranges)
b5c328f916 2012-01-04  248: 
31a8af9ff1 2012-01-16  249: 				my_headers['Accept-Encoding'] = 'gzip'
601ec56da6 2011-12-19  250: 				request = urllib2.Request(request, headers = my_headers)
601ec56da6 2011-12-19  251: 
a81f1a70fb 2012-01-16  252: 				source = urllib2.urlopen(request, timeout = 60)
601ec56da6 2011-12-19  253: 				new_record = {}
601ec56da6 2011-12-19  254: 				new_record['_parts'] = record['_parts']
601ec56da6 2011-12-19  255: 				headers = source.info()
62e6d8a7ab 2012-01-16  256: 
62e6d8a7ab 2012-01-16  257: 				if 'content-encoding' in headers and headers['content-encoding'] == 'gzip':
a81f1a70fb 2012-01-16  258: 					import gzip
a81f1a70fb 2012-01-16  259: 					source = gzip.GzipFile(fileobj=source)
601ec56da6 2011-12-19  260: 
601ec56da6 2011-12-19  261: 				# stripping unneeded headers (XXX make this inplace?)
601ec56da6 2011-12-19  262: 				for header in headers:
601ec56da6 2011-12-19  263: 					if header in desc_fields:
601ec56da6 2011-12-19  264: 						#if header == 'Pragma' and headers[header] != 'no-cache':
601ec56da6 2011-12-19  265: 						if header == 'content-length':
601ec56da6 2011-12-19  266: 							if 'content-range' not in headers:
601ec56da6 2011-12-19  267: 								new_record[header] = int(headers[header])
601ec56da6 2011-12-19  268: 						else:
601ec56da6 2011-12-19  269: 							new_record[header] = headers[header]
601ec56da6 2011-12-19  270: 					elif header == 'content-range':
601ec56da6 2011-12-19  271: 						range = re.compile('^bytes (\d+)-(\d+)/(\d+)$').match(headers[header])
601ec56da6 2011-12-19  272: 						if range:
601ec56da6 2011-12-19  273: 							new_record['content-length'] = int(range.group(3))
601ec56da6 2011-12-19  274: 						else:	
601ec56da6 2011-12-19  275: 							assert False, 'Content-Range unrecognized.'
601ec56da6 2011-12-19  276: 					elif not header in ignore_fields:
601ec56da6 2011-12-19  277: 						print('Undefined header "', header, '": ', headers[header], sep='')
601ec56da6 2011-12-19  278: 
601ec56da6 2011-12-19  279: 				# comparing headers with data found in index
601ec56da6 2011-12-19  280: 				# if any header has changed (except Pragma) file is fully downloaded
601ec56da6 2011-12-19  281: 				# same if we get more or less headers
601ec56da6 2011-12-19  282: 				old_keys = set(record.keys())
601ec56da6 2011-12-19  283: 				old_keys.discard('_time')
601ec56da6 2011-12-19  284: 				old_keys.discard('pragma')
601ec56da6 2011-12-19  285: 				more_keys = set(new_record.keys()) - old_keys
601ec56da6 2011-12-19  286: 				more_keys.discard('pragma')
601ec56da6 2011-12-19  287: 				less_keys = old_keys - set(new_record.keys())
601ec56da6 2011-12-19  288: 				if len(more_keys) > 0:
601ec56da6 2011-12-19  289: 					if len(old_keys) != 0:
601ec56da6 2011-12-19  290: 						print('More headers appear:', more_keys)
601ec56da6 2011-12-19  291: 					reload = True
601ec56da6 2011-12-19  292: 				elif len(less_keys) > 0:
601ec56da6 2011-12-19  293: 					print('Less headers appear:', less_keys)
601ec56da6 2011-12-19  294: 				else:
601ec56da6 2011-12-19  295: 					for key in record.keys():
601ec56da6 2011-12-19  296: 						if key[0] != '_' and key != 'pragma' and record[key] != new_record[key]:
601ec56da6 2011-12-19  297: 							print('Header "', key, '" changed from [', record[key], '] to [', new_record[key], ']', sep='')
601ec56da6 2011-12-19  298: 							print(type(record[key]), type(new_record[key]))
601ec56da6 2011-12-19  299: 							reload = True
601ec56da6 2011-12-19  300: 
601ec56da6 2011-12-19  301: 				if reload:
601ec56da6 2011-12-19  302: 					print('Reloading.')
601ec56da6 2011-12-19  303: 					if os.access(temp_name, os.R_OK):
601ec56da6 2011-12-19  304: 						os.unlink(temp_name)
601ec56da6 2011-12-19  305: 					if os.access(file_name, os.R_OK):
601ec56da6 2011-12-19  306: 						os.unlink(file_name)
601ec56da6 2011-12-19  307: 					if 'content-length' in new_record:
601ec56da6 2011-12-19  308: 						new_record['_parts'] = spacemap.SpaceMap({0: int(new_record['content-length'])})
601ec56da6 2011-12-19  309: 				if not new_record['_parts']:
601ec56da6 2011-12-19  310: 					new_record['_parts'] = spacemap.SpaceMap()
601ec56da6 2011-12-19  311: 				print(new_record)
601ec56da6 2011-12-19  312: 
601ec56da6 2011-12-19  313: 				# downloading file or segment
601ec56da6 2011-12-19  314: 				if 'content-length' in new_record:
601ec56da6 2011-12-19  315: 					if needed == None:
601ec56da6 2011-12-19  316: 						needed = new_record['_parts']
601ec56da6 2011-12-19  317: 					else:
601ec56da6 2011-12-19  318: 						if len(needed) > 1:
601ec56da6 2011-12-19  319: 							print("Multipart requests currently not supported.")
601ec56da6 2011-12-19  320: 							assert False, 'Skip this one for now.'
601ec56da6 2011-12-19  321: 				#else:
601ec56da6 2011-12-19  322: 					#assert False, 'No content-length or Content-Range header.'
601ec56da6 2011-12-19  323: 
601ec56da6 2011-12-19  324: 				new_record['_time'] = datetime.datetime.now()
601ec56da6 2011-12-19  325: 				if self.command not in ('HEAD'):
601ec56da6 2011-12-19  326: 					# file is created at temporary location and moved in place only when download completes
601ec56da6 2011-12-19  327: 					if not os.access(temp_name, os.R_OK):
601ec56da6 2011-12-19  328: 						empty_name = config['dir'] + os.sep + '.tmp'
601ec56da6 2011-12-19  329: 						with open(empty_name, 'w+b') as some_file:
601ec56da6 2011-12-19  330: 							pass
601ec56da6 2011-12-19  331: 						os.renames(empty_name, temp_name)
601ec56da6 2011-12-19  332: 					temp_file = open(temp_name, 'r+b')
601ec56da6 2011-12-19  333: 					if requested_ranges == None and needed == None:
601ec56da6 2011-12-19  334: 						needed = new_record['_parts']
601ec56da6 2011-12-19  335: 					needed.rewind()
601ec56da6 2011-12-19  336: 					while True:
62e6d8a7ab 2012-01-16  337: 						# XXX can make this implicit - one request per range
601ec56da6 2011-12-19  338: 						(start, end) = needed.pop()
601ec56da6 2011-12-19  339: 						if start == None:
601ec56da6 2011-12-19  340: 							break
601ec56da6 2011-12-19  341: 						stream_last = start
601ec56da6 2011-12-19  342: 						old_record = copy.copy(new_record)
601ec56da6 2011-12-19  343: 						if end - start < block_size:
601ec56da6 2011-12-19  344: 							req_block_size = end - start
601ec56da6 2011-12-19  345: 						else:
601ec56da6 2011-12-19  346: 							req_block_size = block_size
601ec56da6 2011-12-19  347: 						buffer = source.read(req_block_size)
601ec56da6 2011-12-19  348: 						length = len(buffer)
601ec56da6 2011-12-19  349: 						while length > 0 and stream_last < end:
601ec56da6 2011-12-19  350: 							stream_pos = stream_last + length
601ec56da6 2011-12-19  351: 							assert stream_pos <= end, 'Received more data then requested: pos:{} start:{} end:{}.'.format(stream_pos, start, end)
601ec56da6 2011-12-19  352: 							temp_file.seek(stream_last)
601ec56da6 2011-12-19  353: 							temp_file.write(buffer)
601ec56da6 2011-12-19  354: 							x = new_record['_parts'] - spacemap.SpaceMap({stream_last: stream_pos})
601ec56da6 2011-12-19  355: 							new_record['_parts'] = new_record['_parts'] - spacemap.SpaceMap({stream_last: stream_pos})
601ec56da6 2011-12-19  356: 							index[my_path] = old_record
601ec56da6 2011-12-19  357: 							index.sync()
601ec56da6 2011-12-19  358: 							old_record = copy.copy(new_record)
601ec56da6 2011-12-19  359: 							stream_last = stream_pos
601ec56da6 2011-12-19  360: 							if end - stream_last < block_size:
601ec56da6 2011-12-19  361: 								req_block_size = end - stream_last
601ec56da6 2011-12-19  362: 							buffer = source.read(req_block_size)
601ec56da6 2011-12-19  363: 							length = len(buffer)
601ec56da6 2011-12-19  364: 					# moving downloaded data to real file
601ec56da6 2011-12-19  365: 					temp_file.close()
601ec56da6 2011-12-19  366: 
601ec56da6 2011-12-19  367: 				index[my_path] = new_record
601ec56da6 2011-12-19  368: 				index.sync()
601ec56da6 2011-12-19  369: 
601ec56da6 2011-12-19  370: 			except urllib2.HTTPError as error:
90160dbf50 2011-03-06  371: 				# in case of error we don't need to do anything actually,
90160dbf50 2011-03-06  372: 				# if file download stalls or fails the file would not be moved to it's location
90160dbf50 2011-03-06  373: 				print(error)
90160dbf50 2011-03-06  374: 
90160dbf50 2011-03-06  375: 		print(index[my_path])
90160dbf50 2011-03-06  376: 
90160dbf50 2011-03-06  377: 		if not os.access(file_name, os.R_OK) and os.access(temp_name, os.R_OK) and '_parts' in index[my_path] and index[my_path]['_parts'] == spacemap.SpaceMap():
90160dbf50 2011-03-06  378: 			# just moving
90160dbf50 2011-03-06  379: 			# drop old dirs XXX
90160dbf50 2011-03-06  380: 			print('Moving temporary file to new destination.')
90160dbf50 2011-03-06  381: 			os.renames(temp_name, file_name)
90160dbf50 2011-03-06  382: 
90160dbf50 2011-03-06  383: 		if not my_path in index:
90160dbf50 2011-03-06  384: 			self.send_response(502)
90160dbf50 2011-03-06  385: 			self.end_headers()
90160dbf50 2011-03-06  386: 			return
90160dbf50 2011-03-06  387: 
90160dbf50 2011-03-06  388: 		if self.command == 'HEAD':
90160dbf50 2011-03-06  389: 			self.send_response(200)
601ec56da6 2011-12-19  390: 			if 'content-length' in index[my_path]:
601ec56da6 2011-12-19  391: 				self.send_header('content-length', index[my_path]['content-length'])
601ec56da6 2011-12-19  392: 			self.send_header('accept-ranges', 'bytes')
601ec56da6 2011-12-19  393: 			self.send_header('content-type', 'application/octet-stream')
601ec56da6 2011-12-19  394: 			if 'last-modified' in index[my_path]:
601ec56da6 2011-12-19  395: 				self.send_header('last-modified', index[my_path]['last-modified'])
90160dbf50 2011-03-06  396: 			self.end_headers()
90160dbf50 2011-03-06  397: 		else:
90160dbf50 2011-03-06  398: 			if ('_parts' in index[my_path] and index[my_path]['_parts'] != spacemap.SpaceMap()) or not os.access(file_name, os.R_OK):
90160dbf50 2011-03-06  399: 				file_name = temp_name
90160dbf50 2011-03-06  400: 
90160dbf50 2011-03-06  401: 			with open(file_name, 'rb') as real_file:
90160dbf50 2011-03-06  402: 				file_stat = os.stat(file_name)
601ec56da6 2011-12-19  403: 				if 'range' in self.headers:
90160dbf50 2011-03-06  404: 					self.send_response(206)
90160dbf50 2011-03-06  405: 					ranges = ()
90160dbf50 2011-03-06  406: 					requested_ranges.rewind()
90160dbf50 2011-03-06  407: 					while True:
90160dbf50 2011-03-06  408: 						pair = requested_ranges.pop()
90160dbf50 2011-03-06  409: 						if pair[0] == None:
90160dbf50 2011-03-06  410: 							break
90160dbf50 2011-03-06  411: 						ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)),
601ec56da6 2011-12-19  412: 					self.send_header('content-range', 'bytes {}/{}'.format(','.join(ranges), index[my_path]['content-length']))
90160dbf50 2011-03-06  413: 				else:
90160dbf50 2011-03-06  414: 					self.send_response(200)
601ec56da6 2011-12-19  415: 					self.send_header('content-length', str(file_stat.st_size))
90160dbf50 2011-03-06  416: 					requested_ranges = spacemap.SpaceMap({0: file_stat.st_size})
601ec56da6 2011-12-19  417: 				if 'last-modified' in index[my_path]:
601ec56da6 2011-12-19  418: 					self.send_header('last-modified', index[my_path]['last-modified'])
601ec56da6 2011-12-19  419: 				self.send_header('content-type', 'application/octet-stream')
90160dbf50 2011-03-06  420: 				self.end_headers()
90160dbf50 2011-03-06  421: 				if self.command in ('GET'):
90160dbf50 2011-03-06  422: 					if len(requested_ranges) > 0:
90160dbf50 2011-03-06  423: 						requested_ranges.rewind()
90160dbf50 2011-03-06  424: 						(start, end) = requested_ranges.pop()
90160dbf50 2011-03-06  425: 					else:
90160dbf50 2011-03-06  426: 						start = 0
9a8a46bcf0 2011-09-06  427: 						# XXX ugly hack
601ec56da6 2011-12-19  428: 						if 'content-length' in index[my_path]:
601ec56da6 2011-12-19  429: 							end = index[my_path]['content-length']
9a8a46bcf0 2011-09-06  430: 						else:
9a8a46bcf0 2011-09-06  431: 							end = 0
90160dbf50 2011-03-06  432: 					real_file.seek(start)
90160dbf50 2011-03-06  433: 					if block_size > end - start:
90160dbf50 2011-03-06  434: 						req_block_size = end - start
90160dbf50 2011-03-06  435: 					else:
90160dbf50 2011-03-06  436: 						req_block_size = block_size
90160dbf50 2011-03-06  437: 					buffer = real_file.read(req_block_size)
90160dbf50 2011-03-06  438: 					length = len(buffer)
90160dbf50 2011-03-06  439: 					while length > 0:
90160dbf50 2011-03-06  440: 						self.wfile.write(buffer)
90160dbf50 2011-03-06  441: 						start += len(buffer)
90160dbf50 2011-03-06  442: 						if req_block_size > end - start:
90160dbf50 2011-03-06  443: 							req_block_size = end - start
90160dbf50 2011-03-06  444: 						if req_block_size == 0:
90160dbf50 2011-03-06  445: 							break
90160dbf50 2011-03-06  446: 						buffer = real_file.read(req_block_size)
90160dbf50 2011-03-06  447: 						length = len(buffer)
90160dbf50 2011-03-06  448: 				
90160dbf50 2011-03-06  449: 	def do_HEAD(self):
90160dbf50 2011-03-06  450: 		return self.__process()
90160dbf50 2011-03-06  451: 	def do_GET(self):
90160dbf50 2011-03-06  452: 		return self.__process()
90160dbf50 2011-03-06  453: 
90160dbf50 2011-03-06  454: config.section('general')
601ec56da6 2011-12-19  455: server = BaseHTTPServer.HTTPServer(('127.0.0.1', int(config['port'])), MyRequestHandler)
90160dbf50 2011-03-06  456: server.serve_forever()