Samesite - proxy that can cache partial transfers

Diff
anonymous

Diff

Differences From Artifact [1daa91439f]:

To Artifact [314c46b43b]:


152
153
154
155
156
157
158






159
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
			# file_stat means file definitely exists
			file_stat = None
			# requested_ranges holds data about any range requested
			requested_ranges = None
			# records holds data from index locally, should be written back upon successfull completion
			record = None
			info = 'Checking file: ' + self.path







			proxy_ignored = ('Accept', 'Accept-Encoding',
				'Cache-Control', 'Connection',
				'Host',
				'User-Agent',
				'Via',
				'X-Forwarded-For',
			)


			print('Command:', self.command)

			for header in self.headers:
				if header in proxy_ignored:
					pass
				elif header in ('Range'):
					isRange = re.compile('bytes=(\d+)-(\d+)').match(self.headers[header])
					if isRange:
						requested_ranges = SpaceMap({int(isRange.group(1)): int(isRange.group(2)) + 1})
					else:
						return()
				else:
					print('Unknown header - ', header, ': ', self.headers[header], sep='')
					return()
				print(header, self.headers[header])
			print(self.path)

			# creating empty placeholder in index
			# if there's no space map and there's no file in real directory - we have no file
			# if there's an empty space map - file is full
			# space map generally covers every bit of file we don't posess currently
			if not self.path in index:
				info += '\nThis one is new.'
				reload = True
				record = {'_parts': None}
			else:
				record = index[self.path]
				if '_parts' in index[self.path]:
					print(record['_parts'])
					if index[self.path]['_parts'] == {0: -1}:
						index[self.path]['_parts'] = None

			# creating file name from self.path
			file_name = options.dir + os.sep + re.compile('%20').sub(' ', self.path)
			# partial file or unfinished download
			temp_name = options.dir + os.sep + '.parts' + re.compile('%20').sub(' ', self.path)

			# forcibly checking file if no file present
			if os.access(file_name, os.R_OK):
				file_stat = os.stat(file_name)
			elif '_parts' in record and os.access(temp_name, os.R_OK):
				file_stat = os.stat(temp_name)
			elif not reload:







>
>
>
>
>
>









>















|





|




|
|

|
|

|
|

|







152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
			# file_stat means file definitely exists
			file_stat = None
			# requested_ranges holds data about any range requested
			requested_ranges = None
			# records holds data from index locally, should be written back upon successfull completion
			record = None
			info = 'Checking file: ' + self.path

			myPath = re.compile('^(.*?)(\?.*)$').match(self.path)
			if myPath:
				my_path = myPath.group(1)
			else:
				my_path = self.path

			proxy_ignored = ('Accept', 'Accept-Encoding',
				'Cache-Control', 'Connection',
				'Host',
				'User-Agent',
				'Via',
				'X-Forwarded-For',
			)

			print('===============[ Request ]===')
			print('Command:', self.command)

			for header in self.headers:
				if header in proxy_ignored:
					pass
				elif header in ('Range'):
					isRange = re.compile('bytes=(\d+)-(\d+)').match(self.headers[header])
					if isRange:
						requested_ranges = SpaceMap({int(isRange.group(1)): int(isRange.group(2)) + 1})
					else:
						return()
				else:
					print('Unknown header - ', header, ': ', self.headers[header], sep='')
					return()
				print(header, self.headers[header])
			print(my_path)

			# creating empty placeholder in index
			# if there's no space map and there's no file in real directory - we have no file
			# if there's an empty space map - file is full
			# space map generally covers every bit of file we don't posess currently
			if not my_path in index:
				info += '\nThis one is new.'
				reload = True
				record = {'_parts': None}
			else:
				record = index[my_path]
				if '_parts' in index[my_path]:
					print(record['_parts'])
					if index[my_path]['_parts'] == {0: -1}:
						index[my_path]['_parts'] = None

			# creating file name from my_path
			file_name = options.dir + os.sep + re.compile('%20').sub(' ', my_path)
			# partial file or unfinished download
			temp_name = options.dir + os.sep + '.parts' + re.compile('%20').sub(' ', my_path)

			# forcibly checking file if no file present
			if os.access(file_name, os.R_OK):
				file_stat = os.stat(file_name)
			elif '_parts' in record and os.access(temp_name, os.R_OK):
				file_stat = os.stat(temp_name)
			elif not reload:
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
			if not recheck and not reload and '_time' in record and (datetime.datetime.now() - datetime.timedelta(hours = 4) - record['_time']).days < 0:
				recheck = True

			print(info)
			if reload or recheck:

				try:
					request = options.root + self.path
					if requested_ranges != None:
						if '_parts' in record and record['_parts'] != None:
							needed = record['_parts'] & requested_ranges
						else:
							needed = requested_ranges
						ranges = ()
						print('Requesting ranges:', ranges)







|







234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
			if not recheck and not reload and '_time' in record and (datetime.datetime.now() - datetime.timedelta(hours = 4) - record['_time']).days < 0:
				recheck = True

			print(info)
			if reload or recheck:

				try:
					request = options.root + my_path
					if requested_ranges != None:
						if '_parts' in record and record['_parts'] != None:
							needed = record['_parts'] & requested_ranges
						else:
							needed = requested_ranges
						ranges = ()
						print('Requesting ranges:', ranges)
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378

379

380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
										stream_pos = stream_last + length
										assert not stream_pos > end, 'Received more data then requested: pos:{} start:{} end:{}.'.format(stream_pos, start, end)
										print('Writing', length, 'bytes to temp file at position', stream_last)
										temp_file.seek(stream_last)
										temp_file.write(buffer)
										new_record['_parts'] = new_record['_parts'] - SpaceMap({stream_last: stream_pos})
										print(new_record)
										index[self.path] = old_record
										index.sync()
										old_record = new_record
										stream_last = stream_pos
										if end - stream_last < block_size:
											req_block_size = end - stream_last
										buffer = source.read(req_block_size)
										print(buffer)
										length = len(buffer)
								print(new_record)
								index[self.path] = new_record
								index.sync()
								temp_file.close()

								# moving downloaded data to real file
								if new_record['_parts'] == SpaceMap():
									if type(request) != str:
										# just moving
										# drop old dirs XXX
										print('Moving temporary file to new destination.')
										os.renames(temp_name, file_name)

				except urllib.error.HTTPError as error:
					# in case of error we don't need to do anything actually,
					# if file download stalls or fails the file would not be moved to it's location
					print(error)


			if self.command == 'HEAD':

				self.send_response(200)
				if 'Content-Length' in index[self.path]:
					self.send_header('Content-Length', index[self.path]['Content-Length'])
				self.send_header('Accept-Ranges', 'bytes')
				self.send_header('Content-Type', 'application/octet-stream')
				if 'Last-Modified' in index[self.path]:
					self.send_header('Last-Modified', index[self.path]['Last-Modified'])
				self.end_headers()
			else:
				if index[self.path]['_parts'] != SpaceMap():
					file_name = temp_name

				with open(file_name, 'rb') as real_file:
					file_stat = os.stat(file_name)
					self.send_response(200)
					self.send_header('Last-Modified', index[self.path]['Last-Modified'])
					if requested_ranges != None:
						ranges = ()
						requested_ranges.rewind()
						while True:
							pair = requested_ranges.pop()
							if pair[0] == None:
								break
							ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)),
						self.send_header('Content-Range', 'bytes ' + ','.join(ranges) + '/' + index[self.path]['Content-Length'])
					else:
						self.send_header('Content-Length', str(file_stat.st_size))
						requested_ranges = SpaceMap({0: file_stat.st_size})
					self.send_header('Content-Type', 'application/octet-stream')
					self.end_headers()
					if self.command in ('GET'):
						requested_ranges.rewind()







|









|





<
|
|
|
|






>

>

|
|


|
|


|





|








|







352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374

375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
										stream_pos = stream_last + length
										assert not stream_pos > end, 'Received more data then requested: pos:{} start:{} end:{}.'.format(stream_pos, start, end)
										print('Writing', length, 'bytes to temp file at position', stream_last)
										temp_file.seek(stream_last)
										temp_file.write(buffer)
										new_record['_parts'] = new_record['_parts'] - SpaceMap({stream_last: stream_pos})
										print(new_record)
										index[my_path] = old_record
										index.sync()
										old_record = new_record
										stream_last = stream_pos
										if end - stream_last < block_size:
											req_block_size = end - stream_last
										buffer = source.read(req_block_size)
										print(buffer)
										length = len(buffer)
								print(new_record)
								index[my_path] = new_record
								index.sync()
								temp_file.close()

								# moving downloaded data to real file
								if new_record['_parts'] == SpaceMap():

									# just moving
									# drop old dirs XXX
									print('Moving temporary file to new destination.')
									os.renames(temp_name, file_name)

				except urllib.error.HTTPError as error:
					# in case of error we don't need to do anything actually,
					# if file download stalls or fails the file would not be moved to it's location
					print(error)

			print('Sending response.')
			if self.command == 'HEAD':
				print('Sending HEAD response.')
				self.send_response(200)
				if 'Content-Length' in index[my_path]:
					self.send_header('Content-Length', index[my_path]['Content-Length'])
				self.send_header('Accept-Ranges', 'bytes')
				self.send_header('Content-Type', 'application/octet-stream')
				if 'Last-Modified' in index[my_path]:
					self.send_header('Last-Modified', index[my_path]['Last-Modified'])
				self.end_headers()
			else:
				if index[my_path]['_parts'] != SpaceMap():
					file_name = temp_name

				with open(file_name, 'rb') as real_file:
					file_stat = os.stat(file_name)
					self.send_response(200)
					self.send_header('Last-Modified', index[my_path]['Last-Modified'])
					if requested_ranges != None:
						ranges = ()
						requested_ranges.rewind()
						while True:
							pair = requested_ranges.pop()
							if pair[0] == None:
								break
							ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)),
						self.send_header('Content-Range', 'bytes ' + ','.join(ranges) + '/' + index[my_path]['Content-Length'])
					else:
						self.send_header('Content-Length', str(file_stat.st_size))
						requested_ranges = SpaceMap({0: file_stat.st_size})
					self.send_header('Content-Type', 'application/octet-stream')
					self.end_headers()
					if self.command in ('GET'):
						requested_ranges.rewind()