Samesite - proxy that can cache partial transfers

Diff
anonymous

Diff

Differences From Artifact [1daa91439f]:

To Artifact [314c46b43b]:


152
153
154
155
156
157
158






159
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

184
185
186
187
188
189

190
191
192
193
194
195


196
197
198


199
200
201


202
203

204
205
206
207
208
209
210
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189

190
191
192
193
194
195

196
197
198
199
200


201
202
203


204
205
206


207
208
209

210
211
212
213
214
215
216
217







+
+
+
+
+
+









+















-
+





-
+




-
-
+
+

-
-
+
+

-
-
+
+

-
+







			# file_stat means file definitely exists
			file_stat = None
			# requested_ranges holds data about any range requested
			requested_ranges = None
			# records holds data from index locally, should be written back upon successfull completion
			record = None
			info = 'Checking file: ' + self.path

			myPath = re.compile('^(.*?)(\?.*)$').match(self.path)
			if myPath:
				my_path = myPath.group(1)
			else:
				my_path = self.path

			proxy_ignored = ('Accept', 'Accept-Encoding',
				'Cache-Control', 'Connection',
				'Host',
				'User-Agent',
				'Via',
				'X-Forwarded-For',
			)

			print('===============[ Request ]===')
			print('Command:', self.command)

			for header in self.headers:
				if header in proxy_ignored:
					pass
				elif header in ('Range'):
					isRange = re.compile('bytes=(\d+)-(\d+)').match(self.headers[header])
					if isRange:
						requested_ranges = SpaceMap({int(isRange.group(1)): int(isRange.group(2)) + 1})
					else:
						return()
				else:
					print('Unknown header - ', header, ': ', self.headers[header], sep='')
					return()
				print(header, self.headers[header])
			print(self.path)
			print(my_path)

			# creating empty placeholder in index
			# if there's no space map and there's no file in real directory - we have no file
			# if there's an empty space map - file is full
			# space map generally covers every bit of file we don't posess currently
			if not self.path in index:
			if not my_path in index:
				info += '\nThis one is new.'
				reload = True
				record = {'_parts': None}
			else:
				record = index[self.path]
				if '_parts' in index[self.path]:
				record = index[my_path]
				if '_parts' in index[my_path]:
					print(record['_parts'])
					if index[self.path]['_parts'] == {0: -1}:
						index[self.path]['_parts'] = None
					if index[my_path]['_parts'] == {0: -1}:
						index[my_path]['_parts'] = None

			# creating file name from self.path
			file_name = options.dir + os.sep + re.compile('%20').sub(' ', self.path)
			# creating file name from my_path
			file_name = options.dir + os.sep + re.compile('%20').sub(' ', my_path)
			# partial file or unfinished download
			temp_name = options.dir + os.sep + '.parts' + re.compile('%20').sub(' ', self.path)
			temp_name = options.dir + os.sep + '.parts' + re.compile('%20').sub(' ', my_path)

			# forcibly checking file if no file present
			if os.access(file_name, os.R_OK):
				file_stat = os.stat(file_name)
			elif '_parts' in record and os.access(temp_name, os.R_OK):
				file_stat = os.stat(temp_name)
			elif not reload:
227
228
229
230
231
232
233
234

235
236
237
238
239
240
241
234
235
236
237
238
239
240

241
242
243
244
245
246
247
248







-
+







			if not recheck and not reload and '_time' in record and (datetime.datetime.now() - datetime.timedelta(hours = 4) - record['_time']).days < 0:
				recheck = True

			print(info)
			if reload or recheck:

				try:
					request = options.root + self.path
					request = options.root + my_path
					if requested_ranges != None:
						if '_parts' in record and record['_parts'] != None:
							needed = record['_parts'] & requested_ranges
						else:
							needed = requested_ranges
						ranges = ()
						print('Requesting ranges:', ranges)
345
346
347
348
349
350
351
352

353
354
355
356
357
358
359
360
361
362

363
364
365
366
367
368
369
370
371
372




373
374
375
376
377
378

379

380
381
382


383
384
385
386


387
388
389

390
391
392
393
394
395

396
397
398
399
400
401
402
403
404

405
406
407
408
409
410
411
352
353
354
355
356
357
358

359
360
361
362
363
364
365
366
367
368

369
370
371
372
373
374





375
376
377
378
379
380
381
382
383
384
385
386
387
388


389
390
391
392


393
394
395
396

397
398
399
400
401
402

403
404
405
406
407
408
409
410
411

412
413
414
415
416
417
418
419







-
+









-
+





-
-
-
-
-
+
+
+
+






+

+

-
-
+
+


-
-
+
+


-
+





-
+








-
+







										stream_pos = stream_last + length
										assert not stream_pos > end, 'Received more data then requested: pos:{} start:{} end:{}.'.format(stream_pos, start, end)
										print('Writing', length, 'bytes to temp file at position', stream_last)
										temp_file.seek(stream_last)
										temp_file.write(buffer)
										new_record['_parts'] = new_record['_parts'] - SpaceMap({stream_last: stream_pos})
										print(new_record)
										index[self.path] = old_record
										index[my_path] = old_record
										index.sync()
										old_record = new_record
										stream_last = stream_pos
										if end - stream_last < block_size:
											req_block_size = end - stream_last
										buffer = source.read(req_block_size)
										print(buffer)
										length = len(buffer)
								print(new_record)
								index[self.path] = new_record
								index[my_path] = new_record
								index.sync()
								temp_file.close()

								# moving downloaded data to real file
								if new_record['_parts'] == SpaceMap():
									if type(request) != str:
										# just moving
										# drop old dirs XXX
										print('Moving temporary file to new destination.')
										os.renames(temp_name, file_name)
									# just moving
									# drop old dirs XXX
									print('Moving temporary file to new destination.')
									os.renames(temp_name, file_name)

				except urllib.error.HTTPError as error:
					# in case of error we don't need to do anything actually,
					# if file download stalls or fails the file would not be moved to it's location
					print(error)

			print('Sending response.')
			if self.command == 'HEAD':
				print('Sending HEAD response.')
				self.send_response(200)
				if 'Content-Length' in index[self.path]:
					self.send_header('Content-Length', index[self.path]['Content-Length'])
				if 'Content-Length' in index[my_path]:
					self.send_header('Content-Length', index[my_path]['Content-Length'])
				self.send_header('Accept-Ranges', 'bytes')
				self.send_header('Content-Type', 'application/octet-stream')
				if 'Last-Modified' in index[self.path]:
					self.send_header('Last-Modified', index[self.path]['Last-Modified'])
				if 'Last-Modified' in index[my_path]:
					self.send_header('Last-Modified', index[my_path]['Last-Modified'])
				self.end_headers()
			else:
				if index[self.path]['_parts'] != SpaceMap():
				if index[my_path]['_parts'] != SpaceMap():
					file_name = temp_name

				with open(file_name, 'rb') as real_file:
					file_stat = os.stat(file_name)
					self.send_response(200)
					self.send_header('Last-Modified', index[self.path]['Last-Modified'])
					self.send_header('Last-Modified', index[my_path]['Last-Modified'])
					if requested_ranges != None:
						ranges = ()
						requested_ranges.rewind()
						while True:
							pair = requested_ranges.pop()
							if pair[0] == None:
								break
							ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)),
						self.send_header('Content-Range', 'bytes ' + ','.join(ranges) + '/' + index[self.path]['Content-Length'])
						self.send_header('Content-Range', 'bytes ' + ','.join(ranges) + '/' + index[my_path]['Content-Length'])
					else:
						self.send_header('Content-Length', str(file_stat.st_size))
						requested_ranges = SpaceMap({0: file_stat.st_size})
					self.send_header('Content-Type', 'application/octet-stream')
					self.end_headers()
					if self.command in ('GET'):
						requested_ranges.rewind()