Samesite - proxy that can cache partial transfers

Check-in [827033dd7e]
anonymous

Check-in [827033dd7e]

Overview
Comment:adding verbose support, i.e. silencing most of stuff
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: 827033dd7e7d4c1f0c9ae7c656c9a0aa9d4862857210625c865e24248e265119
User & Date: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-08-04 07:55:30.000
Other Links: branch diff | manifest | tags
Context
2010-08-20
14:09
personal web server implemented: * can download, cache and serve data; * supports caching for partial requests. check-in: 80f8e3804a user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-08-04
07:55
adding verbose support, i.e. silencing most of stuff check-in: 827033dd7e user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-07-26
09:38
option to skip ETag processing, X-Powered-By ignored check-in: 38b25713eb user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
Changes
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70



71
72
73
74
75
76
77
78
79


80
81
82
83
84
85
86
			this_line = log_line.match(line.strip())
			if this_line:
				unchecked_files.add(this_line.group(2))

	for url in unchecked_files:
		reload = False
		recheck = False
		print('Checking file:', url)

		# creating empty placeholder in index
		if not url in index:
			print('This one is new.')
			index[url] = {}
			reload = True

		# creating file name from url
		file_name = options.dir + re.compile('%20').sub(' ', url)

		# forcibly checking file if no file present
		if not reload and not os.access(file_name, os.R_OK):
			print('File not found or inaccessible.')
			reload = True

		# forcibly checking file if file size doesn't match with index data
		elif not reload and 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']):
			print('File size is ', os.stat(file_name).st_size, ' and stored file size is ', index[url]['Content-Length'], '.', sep='')
			reload = True

		# forcibly checking file if index hods Pragma header
		if not reload and 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache':
			print('Pragma on: recheck imminent.')
			recheck = True




		# skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago
		if not recheck and not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0:
			continue

		try:
			with urllib.request.urlopen(options.root + url) as source:
				new_headers = {}
				headers = source.info()



				# stripping unneeded headers (XXX make this inplace?)
				for header in headers:
					if header in desc_fields:
						if header == 'Pragma' and headers[header] != 'no-cache':
							print('Pragma:', headers[header])
						new_headers[header] = headers[header]







|



|








|




|




|

>
>
>









>
>







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
			this_line = log_line.match(line.strip())
			if this_line:
				unchecked_files.add(this_line.group(2))

	for url in unchecked_files:
		reload = False
		recheck = False
		info = 'Checking file: ' + url

		# creating empty placeholder in index
		if not url in index:
			info += '\nThis one is new.'
			index[url] = {}
			reload = True

		# creating file name from url
		file_name = options.dir + re.compile('%20').sub(' ', url)

		# forcibly checking file if no file present
		if not reload and not os.access(file_name, os.R_OK):
			info += '\nFile not found or inaccessible.'
			reload = True

		# forcibly checking file if file size doesn't match with index data
		elif not reload and 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']):
			info += '\nFile size is ' + os.stat(file_name).st_size + ' and stored file size is ' + index[url]['Content-Length'] + '.'
			reload = True

		# forcibly checking file if index hods Pragma header
		if not reload and 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache':
			info +='\nPragma on: recheck imminent.'
			recheck = True

		if options.verbose:
			print(info)

		# skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago
		if not recheck and not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0:
			continue

		try:
			with urllib.request.urlopen(options.root + url) as source:
				new_headers = {}
				headers = source.info()
				if not options.verbose:
					print(info)

				# stripping unneeded headers (XXX make this inplace?)
				for header in headers:
					if header in desc_fields:
						if header == 'Pragma' and headers[header] != 'no-cache':
							print('Pragma:', headers[header])
						new_headers[header] = headers[header]
143
144
145
146
147
148
149

150
151
152
153
154
				index.sync()

		except urllib.error.HTTPError as error:
			# in case of error we don't need to do anything actually,
			# if file download stalls or fails the file would not be moved to it's location
			print(error)


	print('[', len(unchecked_files), '/', checked_files, ']')

	# checking if there were any files downloaded, if yes - restarting sequence
	if checked_files == 0:
		break







>
|




148
149
150
151
152
153
154
155
156
157
158
159
160
				index.sync()

		except urllib.error.HTTPError as error:
			# in case of error we don't need to do anything actually,
			# if file download stalls or fails the file would not be moved to it's location
			print(error)

	if options.verbose:
		print('[', len(unchecked_files), '/', checked_files, ']')

	# checking if there were any files downloaded, if yes - restarting sequence
	if checked_files == 0:
		break