Samesite - proxy that can cache partial transfers

Check-in [d0071bdbc7]
anonymous

Check-in [d0071bdbc7]

Overview
Comment:small fix for striping unneeded request parts from file path
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: d0071bdbc7b3fc6eba1a3b36b7ded4696b01d3a9b237212525b4a66bc251604f
User & Date: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-08-20 14:25:06.000
Other Links: branch diff | manifest | tags
Context
2010-08-21
10:59
added license and todo many fixes in web server, now it's mostly functional web server was tested by Windows Update (win7, WinXP, Win2003) check-in: fb10031536 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-08-20
14:25
small fix for striping unneeded request parts from file path check-in: d0071bdbc7 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
14:09
personal web server implemented: * can download, cache and serve data; * supports caching for partial requests. check-in: 80f8e3804a user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
Changes
152
153
154
155
156
157
158






159
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

184
185
186
187
188
189

190
191
192
193
194
195


196
197
198


199
200
201


202
203

204
205
206
207
208
209
210
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189

190
191
192
193
194
195

196
197
198
199
200


201
202
203


204
205
206


207
208
209

210
211
212
213
214
215
216
217







+
+
+
+
+
+









+















-
+





-
+




-
-
+
+

-
-
+
+

-
-
+
+

-
+







			# file_stat means file definitely exists
			file_stat = None
			# requested_ranges holds data about any range requested
			requested_ranges = None
			# records holds data from index locally, should be written back upon successfull completion
			record = None
			info = 'Checking file: ' + self.path

			myPath = re.compile('^(.*?)(\?.*)$').match(self.path)
			if myPath:
				my_path = myPath.group(1)
			else:
				my_path = self.path

			proxy_ignored = ('Accept', 'Accept-Encoding',
				'Cache-Control', 'Connection',
				'Host',
				'User-Agent',
				'Via',
				'X-Forwarded-For',
			)

			print('===============[ Request ]===')
			print('Command:', self.command)

			for header in self.headers:
				if header in proxy_ignored:
					pass
				elif header in ('Range'):
					isRange = re.compile('bytes=(\d+)-(\d+)').match(self.headers[header])
					if isRange:
						requested_ranges = SpaceMap({int(isRange.group(1)): int(isRange.group(2)) + 1})
					else:
						return()
				else:
					print('Unknown header - ', header, ': ', self.headers[header], sep='')
					return()
				print(header, self.headers[header])
			print(self.path)
			print(my_path)

			# creating empty placeholder in index
			# if there's no space map and there's no file in real directory - we have no file
			# if there's an empty space map - file is full
			# space map generally covers every bit of file we don't posess currently
			if not self.path in index:
			if not my_path in index:
				info += '\nThis one is new.'
				reload = True
				record = {'_parts': None}
			else:
				record = index[self.path]
				if '_parts' in index[self.path]:
				record = index[my_path]
				if '_parts' in index[my_path]:
					print(record['_parts'])
					if index[self.path]['_parts'] == {0: -1}:
						index[self.path]['_parts'] = None
					if index[my_path]['_parts'] == {0: -1}:
						index[my_path]['_parts'] = None

			# creating file name from self.path
			file_name = options.dir + os.sep + re.compile('%20').sub(' ', self.path)
			# creating file name from my_path
			file_name = options.dir + os.sep + re.compile('%20').sub(' ', my_path)
			# partial file or unfinished download
			temp_name = options.dir + os.sep + '.parts' + re.compile('%20').sub(' ', self.path)
			temp_name = options.dir + os.sep + '.parts' + re.compile('%20').sub(' ', my_path)

			# forcibly checking file if no file present
			if os.access(file_name, os.R_OK):
				file_stat = os.stat(file_name)
			elif '_parts' in record and os.access(temp_name, os.R_OK):
				file_stat = os.stat(temp_name)
			elif not reload:
227
228
229
230
231
232
233
234

235
236
237
238
239
240
241
234
235
236
237
238
239
240

241
242
243
244
245
246
247
248







-
+







			if not recheck and not reload and '_time' in record and (datetime.datetime.now() - datetime.timedelta(hours = 4) - record['_time']).days < 0:
				recheck = True

			print(info)
			if reload or recheck:

				try:
					request = options.root + self.path
					request = options.root + my_path
					if requested_ranges != None:
						if '_parts' in record and record['_parts'] != None:
							needed = record['_parts'] & requested_ranges
						else:
							needed = requested_ranges
						ranges = ()
						print('Requesting ranges:', ranges)
345
346
347
348
349
350
351
352

353
354
355
356
357
358
359
360
361
362

363
364
365
366
367
368
369
370
371
372




373
374
375
376
377
378

379

380
381
382


383
384
385
386


387
388
389

390
391
392
393
394
395

396
397
398
399
400
401
402
403
404

405
406
407
408
409
410
411
352
353
354
355
356
357
358

359
360
361
362
363
364
365
366
367
368

369
370
371
372
373
374





375
376
377
378
379
380
381
382
383
384
385
386
387
388


389
390
391
392


393
394
395
396

397
398
399
400
401
402

403
404
405
406
407
408
409
410
411

412
413
414
415
416
417
418
419







-
+









-
+





-
-
-
-
-
+
+
+
+






+

+

-
-
+
+


-
-
+
+


-
+





-
+








-
+







										stream_pos = stream_last + length
										assert not stream_pos > end, 'Received more data then requested: pos:{} start:{} end:{}.'.format(stream_pos, start, end)
										print('Writing', length, 'bytes to temp file at position', stream_last)
										temp_file.seek(stream_last)
										temp_file.write(buffer)
										new_record['_parts'] = new_record['_parts'] - SpaceMap({stream_last: stream_pos})
										print(new_record)
										index[self.path] = old_record
										index[my_path] = old_record
										index.sync()
										old_record = new_record
										stream_last = stream_pos
										if end - stream_last < block_size:
											req_block_size = end - stream_last
										buffer = source.read(req_block_size)
										print(buffer)
										length = len(buffer)
								print(new_record)
								index[self.path] = new_record
								index[my_path] = new_record
								index.sync()
								temp_file.close()

								# moving downloaded data to real file
								if new_record['_parts'] == SpaceMap():
									if type(request) != str:
										# just moving
										# drop old dirs XXX
										print('Moving temporary file to new destination.')
										os.renames(temp_name, file_name)
									# just moving
									# drop old dirs XXX
									print('Moving temporary file to new destination.')
									os.renames(temp_name, file_name)

				except urllib.error.HTTPError as error:
					# in case of error we don't need to do anything actually,
					# if file download stalls or fails the file would not be moved to it's location
					print(error)

			print('Sending response.')
			if self.command == 'HEAD':
				print('Sending HEAD response.')
				self.send_response(200)
				if 'Content-Length' in index[self.path]:
					self.send_header('Content-Length', index[self.path]['Content-Length'])
				if 'Content-Length' in index[my_path]:
					self.send_header('Content-Length', index[my_path]['Content-Length'])
				self.send_header('Accept-Ranges', 'bytes')
				self.send_header('Content-Type', 'application/octet-stream')
				if 'Last-Modified' in index[self.path]:
					self.send_header('Last-Modified', index[self.path]['Last-Modified'])
				if 'Last-Modified' in index[my_path]:
					self.send_header('Last-Modified', index[my_path]['Last-Modified'])
				self.end_headers()
			else:
				if index[self.path]['_parts'] != SpaceMap():
				if index[my_path]['_parts'] != SpaceMap():
					file_name = temp_name

				with open(file_name, 'rb') as real_file:
					file_stat = os.stat(file_name)
					self.send_response(200)
					self.send_header('Last-Modified', index[self.path]['Last-Modified'])
					self.send_header('Last-Modified', index[my_path]['Last-Modified'])
					if requested_ranges != None:
						ranges = ()
						requested_ranges.rewind()
						while True:
							pair = requested_ranges.pop()
							if pair[0] == None:
								break
							ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)),
						self.send_header('Content-Range', 'bytes ' + ','.join(ranges) + '/' + index[self.path]['Content-Length'])
						self.send_header('Content-Range', 'bytes ' + ','.join(ranges) + '/' + index[my_path]['Content-Length'])
					else:
						self.send_header('Content-Length', str(file_stat.st_size))
						requested_ranges = SpaceMap({0: file_stat.st_size})
					self.send_header('Content-Type', 'application/octet-stream')
					self.end_headers()
					if self.command in ('GET'):
						requested_ranges.rewind()