Samesite - proxy that can cache partial transfers

Check-in [439e1753a4]
anonymous

Check-in [439e1753a4]

Overview
Comment:Some changes: * one more header known; * new code to select which parts of file should be downloaded, fixes (possibly) a very bad bug that makes continuing download totally unsable.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: 439e1753a4d6f176f2ff3c0250ee6d4a2bad9b83aaaba9eb4f971ddd040cd372
User & Date: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-09-07 11:24:22.000
Other Links: branch diff | manifest | tags
Context
2010-09-16
12:31
Bunch of changes: * now when file is found in .parts directory it's always rechecked; * correct handling of noetag, noparts; * avoid asynchronous reading/writing to the index. check-in: c3db1a007e user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-09-07
11:24
Some changes: * one more header known; * new code to select which parts of file should be downloaded, fixes (possibly) a very bad bug that makes continuing download totally unsable. check-in: 439e1753a4 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-09-06
20:53
More features: * path substitution * more fields ignored in requests; * request now uses original path - not modified by stripping or substituting; * some headers are proxied to request; * when there is no file to send 502 error is returned; * recheck file before moving it to full dir; * Last-Modified only sent when present. check-in: cab908195f user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
Changes
78
79
80
81
82
83
84

85
86
87
88
89
90
91

const_desc_fields = set(['Content-Length', 'Last-Modified', 'Pragma'])
const_ignore_fields = set([
	'Accept-Ranges', 'Age',
	'Cache-Control', 'Connection', 'Content-Type',
	'Date',
	'Expires',

	'Server',
	'Via',
	'X-Cache', 'X-Cache-Lookup', 'X-Powered-By'
])

block_size = 4096








>







78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

const_desc_fields = set(['Content-Length', 'Last-Modified', 'Pragma'])
const_ignore_fields = set([
	'Accept-Ranges', 'Age',
	'Cache-Control', 'Connection', 'Content-Type',
	'Date',
	'Expires',
	'Referer',
	'Server',
	'Via',
	'X-Cache', 'X-Cache-Lookup', 'X-Powered-By'
])

block_size = 4096

277
278
279
280
281
282
283

284
285

286
287
288
289
290
291
292
			if not my_path in index:
				info += '\nThis one is new.'
				reload = True
				record = {}
			else:
				# forcibly checking file if no file present
				if os.access(file_name, os.R_OK):

					file_stat = os.stat(file_name)
				elif '_parts' in index[my_path] and os.access(temp_name, os.R_OK):

					file_stat = os.stat(temp_name)
				else:
					info += '\nFile not found or inaccessible.'
					index[my_path]['_parts'] = None
					reload = True
				record = index[my_path]








>


>







278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
			if not my_path in index:
				info += '\nThis one is new.'
				reload = True
				record = {}
			else:
				# forcibly checking file if no file present
				if os.access(file_name, os.R_OK):
					info += '\nFull file found.'
					file_stat = os.stat(file_name)
				elif '_parts' in index[my_path] and os.access(temp_name, os.R_OK):
					info += '\nPartial file found.'
					file_stat = os.stat(temp_name)
				else:
					info += '\nFile not found or inaccessible.'
					index[my_path]['_parts'] = None
					reload = True
				record = index[my_path]

320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
					request = 'http://' + config['root'] + self.path
					my_headers = {}
					for header in ('Cache-Control', 'Cookie', 'Referer', 'User-Agent'):
						if header in self.headers:
							my_headers[header] = self.headers[header]

					needed = None
					# XXX and if we specify full file we don't go partial?
					if requested_ranges != None:
						if '_parts' in record and record['_parts'] != None:
							if config['noparts']:
								needed = record['_parts']
							else:
								needed = record['_parts'] | requested_ranges
						elif not config['noparts']:
							needed = requested_ranges
						ranges = ()
						print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed))
						if needed != None and len(needed) > 0:
							needed.rewind()
							while True:
								range = needed.pop()
								if range[0] == None:
									break
								ranges += '{}-{}'.format(range[0], range[1] - 1),
							my_headers['Range'] = 'bytes=' + ','.join(ranges)

					request = urllib.request.Request(request, headers = my_headers)

					with urllib.request.urlopen(request) as source:
						new_record = {}
						new_record['_parts'] = record['_parts']
						headers = source.info()







<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







323
324
325
326
327
328
329


330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
					request = 'http://' + config['root'] + self.path
					my_headers = {}
					for header in ('Cache-Control', 'Cookie', 'Referer', 'User-Agent'):
						if header in self.headers:
							my_headers[header] = self.headers[header]

					needed = None


					if '_parts' in record and record['_parts'] != None:
						if config['noparts'] or requested_ranges == None:
							needed = record['_parts']
						else:
							needed = record['_parts'] | requested_ranges
					elif not config['noparts']:
						needed = requested_ranges
					ranges = ()
					print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed))
					if needed != None and len(needed) > 0:
						needed.rewind()
						while True:
							range = needed.pop()
							if range[0] == None:
								break
							ranges += '{}-{}'.format(range[0], range[1] - 1),
						my_headers['Range'] = 'bytes=' + ','.join(ranges)

					request = urllib.request.Request(request, headers = my_headers)

					with urllib.request.urlopen(request) as source:
						new_record = {}
						new_record['_parts'] = record['_parts']
						headers = source.info()
416
417
418
419
420
421
422


423
424
425
426
427
428
429
							# file is created at temporary location and moved in place only when download completes
							if not os.access(temp_name, os.R_OK):
								empty_name = config['dir'] + os.sep + '.tmp'
								with open(empty_name, 'w+b') as some_file:
									pass
								os.renames(empty_name, temp_name)
							temp_file = open(temp_name, 'r+b')


							needed.rewind()
							while True:
								(start, end) = needed.pop()
								if start == None:
									break
								stream_last = start
								old_record = new_record







>
>







417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
							# file is created at temporary location and moved in place only when download completes
							if not os.access(temp_name, os.R_OK):
								empty_name = config['dir'] + os.sep + '.tmp'
								with open(empty_name, 'w+b') as some_file:
									pass
								os.renames(empty_name, temp_name)
							temp_file = open(temp_name, 'r+b')
							if requested_ranges == None and needed == None:
								needed = new_record['_parts']
							needed.rewind()
							while True:
								(start, end) = needed.pop()
								if start == None:
									break
								stream_last = start
								old_record = new_record