Samesite - proxy that can cache partial transfers

Check-in [c3db1a007e]
anonymous

Check-in [c3db1a007e]

Overview
Comment:Bunch of changes: * now when file is found in .parts directory it's always rechecked; * correct handling of noetag, noparts; * avoid asynchronous reading/writing to the index.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: c3db1a007e3f30cc1a49155dabe24fb25680505971427affb7ea5778c9ef4ce9
User & Date: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-09-16 12:31:47.000
Other Links: branch diff | manifest | tags
Context
2011-03-06
09:39
cleaning up a bit check-in: 90160dbf50 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-09-16
12:31
Bunch of changes: * now when file is found in .parts directory it's always rechecked; * correct handling of noetag, noparts; * avoid asynchronous reading/writing to the index. check-in: c3db1a007e user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-09-07
11:24
Some changes: * one more header known; * new code to select which parts of file should be downloaded, fixes (possibly) a very bad bug that makes continuing download totally unsable. check-in: 439e1753a4 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
Changes
227
228
229
230
231
232
233
234

235
236
237
238
239
240
241
227
228
229
230
231
232
233

234
235
236
237
238
239
240
241







-
+







			# _parts - list of stored parts of file
			# _time - last time the file was checked
			# everything else is just the headers
			index = shelve.open(config['dir'] + os.sep + '.index')

			desc_fields = const_desc_fields.copy()
			ignore_fields = const_ignore_fields.copy()
			if not config['noetag']:
			if config['noetag'] == 'no':
				desc_fields.add('ETag')
			else:
				ignore_fields.add('ETag')

			proxy_ignored = set([
				'Accept', 'Accept-Charset', 'Accept-Encoding', 'Accept-Language',
				'Cache-Control', 'Connection', 'Content-Length', 'Cookie',
277
278
279
280
281
282
283

284
285
286
287
288
289

290
291
292

293
294
295
296
297
298
299
300
301
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

294
295

296
297
298
299
300
301
302







+






+


-
+

-







			# space map generally covers every bit of file we don't posess currently
			if not my_path in index:
				info += '\nThis one is new.'
				reload = True
				record = {}
			else:
				# forcibly checking file if no file present
				record = index[my_path]
				if os.access(file_name, os.R_OK):
					info += '\nFull file found.'
					file_stat = os.stat(file_name)
				elif '_parts' in index[my_path] and os.access(temp_name, os.R_OK):
					info += '\nPartial file found.'
					file_stat = os.stat(temp_name)
					recheck = True
				else:
					info += '\nFile not found or inaccessible.'
					index[my_path]['_parts'] = None
					record['_parts'] = None
					reload = True
				record = index[my_path]

			if not '_parts' in record:
				record['_parts'] = None

			if record['_parts'] == None:
				recheck = True

324
325
326
327
328
329
330
331

332
333
334
335

336
337
338
339
340
341
342
325
326
327
328
329
330
331

332
333
334
335

336
337
338
339
340
341
342
343







-
+



-
+







					my_headers = {}
					for header in ('Cache-Control', 'Cookie', 'Referer', 'User-Agent'):
						if header in self.headers:
							my_headers[header] = self.headers[header]

					needed = None
					if '_parts' in record and record['_parts'] != None:
						if config['noparts'] or requested_ranges == None:
						if config['noparts'] != 'no' or requested_ranges == None or requested_ranges == spacemap.SpaceMap():
							needed = record['_parts']
						else:
							needed = record['_parts'] | requested_ranges
					elif not config['noparts']:
					elif config['noparts'] =='no' and requested_ranges != None and requested_ranges != spacemap.SpaceMap():
						needed = requested_ranges
					ranges = ()
					print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed))
					if needed != None and len(needed) > 0:
						needed.rewind()
						while True:
							range = needed.pop()
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463


464
465
466
467
468
469
470
450
451
452
453
454
455
456

457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472







-







+
+







									if end - stream_last < block_size:
										req_block_size = end - stream_last
									buffer = source.read(req_block_size)
									length = len(buffer)
							# moving downloaded data to real file
							temp_file.close()

						print(new_record)
						index[my_path] = new_record
						index.sync()

				except urllib.error.HTTPError as error:
					# in case of error we don't need to do anything actually,
					# if file download stalls or fails the file would not be moved to it's location
					print(error)

			print(index[my_path])

			if not os.access(file_name, os.R_OK) and os.access(temp_name, os.R_OK) and '_parts' in index[my_path] and index[my_path]['_parts'] == spacemap.SpaceMap():
				# just moving
				# drop old dirs XXX
				print('Moving temporary file to new destination.')
				os.renames(temp_name, file_name)