Samesite - proxy that can cache partial transfers

Check-in [c3db1a007e]
anonymous

Check-in [c3db1a007e]

Overview
Comment:Bunch of changes: * now when file is found in .parts directory it's always rechecked; * correct handling of noetag, noparts; * avoid asynchronous reading/writing to the index.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: c3db1a007e3f30cc1a49155dabe24fb25680505971427affb7ea5778c9ef4ce9
User & Date: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-09-16 12:31:47.000
Other Links: branch diff | manifest | tags
Context
2011-03-06
09:39
cleaning up a bit check-in: 90160dbf50 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-09-16
12:31
Bunch of changes: * now when file is found in .parts directory it's always rechecked; * correct handling of noetag, noparts; * avoid asynchronous reading/writing to the index. check-in: c3db1a007e user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-09-07
11:24
Some changes: * one more header known; * new code to select which parts of file should be downloaded, fixes (possibly) a very bad bug that makes continuing download totally unsable. check-in: 439e1753a4 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
Changes
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
			# _parts - list of stored parts of file
			# _time - last time the file was checked
			# everything else is just the headers
			index = shelve.open(config['dir'] + os.sep + '.index')

			desc_fields = const_desc_fields.copy()
			ignore_fields = const_ignore_fields.copy()
			if not config['noetag']:
				desc_fields.add('ETag')
			else:
				ignore_fields.add('ETag')

			proxy_ignored = set([
				'Accept', 'Accept-Charset', 'Accept-Encoding', 'Accept-Language',
				'Cache-Control', 'Connection', 'Content-Length', 'Cookie',







|







227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
			# _parts - list of stored parts of file
			# _time - last time the file was checked
			# everything else is just the headers
			index = shelve.open(config['dir'] + os.sep + '.index')

			desc_fields = const_desc_fields.copy()
			ignore_fields = const_ignore_fields.copy()
			if config['noetag'] == 'no':
				desc_fields.add('ETag')
			else:
				ignore_fields.add('ETag')

			proxy_ignored = set([
				'Accept', 'Accept-Charset', 'Accept-Encoding', 'Accept-Language',
				'Cache-Control', 'Connection', 'Content-Length', 'Cookie',
277
278
279
280
281
282
283

284
285
286
287
288
289

290
291
292
293
294
295
296
297
298
299
300
301
			# space map generally covers every bit of file we don't posess currently
			if not my_path in index:
				info += '\nThis one is new.'
				reload = True
				record = {}
			else:
				# forcibly checking file if no file present

				if os.access(file_name, os.R_OK):
					info += '\nFull file found.'
					file_stat = os.stat(file_name)
				elif '_parts' in index[my_path] and os.access(temp_name, os.R_OK):
					info += '\nPartial file found.'
					file_stat = os.stat(temp_name)

				else:
					info += '\nFile not found or inaccessible.'
					index[my_path]['_parts'] = None
					reload = True
				record = index[my_path]

			if not '_parts' in record:
				record['_parts'] = None

			if record['_parts'] == None:
				recheck = True








>






>


|

<







277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295

296
297
298
299
300
301
302
			# space map generally covers every bit of file we don't posess currently
			if not my_path in index:
				info += '\nThis one is new.'
				reload = True
				record = {}
			else:
				# forcibly checking file if no file present
				record = index[my_path]
				if os.access(file_name, os.R_OK):
					info += '\nFull file found.'
					file_stat = os.stat(file_name)
				elif '_parts' in index[my_path] and os.access(temp_name, os.R_OK):
					info += '\nPartial file found.'
					file_stat = os.stat(temp_name)
					recheck = True
				else:
					info += '\nFile not found or inaccessible.'
					record['_parts'] = None
					reload = True


			if not '_parts' in record:
				record['_parts'] = None

			if record['_parts'] == None:
				recheck = True

324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
					my_headers = {}
					for header in ('Cache-Control', 'Cookie', 'Referer', 'User-Agent'):
						if header in self.headers:
							my_headers[header] = self.headers[header]

					needed = None
					if '_parts' in record and record['_parts'] != None:
						if config['noparts'] or requested_ranges == None:
							needed = record['_parts']
						else:
							needed = record['_parts'] | requested_ranges
					elif not config['noparts']:
						needed = requested_ranges
					ranges = ()
					print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed))
					if needed != None and len(needed) > 0:
						needed.rewind()
						while True:
							range = needed.pop()







|



|







325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
					my_headers = {}
					for header in ('Cache-Control', 'Cookie', 'Referer', 'User-Agent'):
						if header in self.headers:
							my_headers[header] = self.headers[header]

					needed = None
					if '_parts' in record and record['_parts'] != None:
						if config['noparts'] != 'no' or requested_ranges == None or requested_ranges == spacemap.SpaceMap():
							needed = record['_parts']
						else:
							needed = record['_parts'] | requested_ranges
					elif config['noparts'] =='no' and requested_ranges != None and requested_ranges != spacemap.SpaceMap():
						needed = requested_ranges
					ranges = ()
					print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed))
					if needed != None and len(needed) > 0:
						needed.rewind()
						while True:
							range = needed.pop()
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463


464
465
466
467
468
469
470
									if end - stream_last < block_size:
										req_block_size = end - stream_last
									buffer = source.read(req_block_size)
									length = len(buffer)
							# moving downloaded data to real file
							temp_file.close()

						print(new_record)
						index[my_path] = new_record
						index.sync()

				except urllib.error.HTTPError as error:
					# in case of error we don't need to do anything actually,
					# if file download stalls or fails the file would not be moved to it's location
					print(error)



			if not os.access(file_name, os.R_OK) and os.access(temp_name, os.R_OK) and '_parts' in index[my_path] and index[my_path]['_parts'] == spacemap.SpaceMap():
				# just moving
				# drop old dirs XXX
				print('Moving temporary file to new destination.')
				os.renames(temp_name, file_name)








<







>
>







450
451
452
453
454
455
456

457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
									if end - stream_last < block_size:
										req_block_size = end - stream_last
									buffer = source.read(req_block_size)
									length = len(buffer)
							# moving downloaded data to real file
							temp_file.close()


						index[my_path] = new_record
						index.sync()

				except urllib.error.HTTPError as error:
					# in case of error we don't need to do anything actually,
					# if file download stalls or fails the file would not be moved to it's location
					print(error)

			print(index[my_path])

			if not os.access(file_name, os.R_OK) and os.access(temp_name, os.R_OK) and '_parts' in index[my_path] and index[my_path]['_parts'] == spacemap.SpaceMap():
				# just moving
				# drop old dirs XXX
				print('Moving temporary file to new destination.')
				os.renames(temp_name, file_name)