Samesite - proxy that can cache partial transfers

Check-in [083ec707ea]
anonymous

Check-in [083ec707ea]

Overview
Comment:changed logic and some log messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: 083ec707eae26f95b17a1a822c165f2e5d222cc48919d6b0720ffe967362a714
User & Date: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-07-06 15:02:07.000
Other Links: branch diff | manifest | tags
Context
2010-07-26
09:38
option to skip ETag processing, X-Powered-By ignored check-in: 38b25713eb user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-07-06
15:02
changed logic and some log messages check-in: 083ec707ea user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-07-05
14:52
enchanced logging check-in: 53dcfdb8f7 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
Changes
42
43
44
45
46
47
48



49
50
51

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

77
78
79
80
81
82
83
84
85
		log_line = re.compile('^[^ ]+ - - \[.*] "(GET|HEAD) (.*?)(\?.*)? HTTP/1.1" (\d+) \d+ "(.*)" "(.*)"$')
		for line in log_file:
			this_line = log_line.match(line.strip())
			if this_line:
				unchecked_files.add(this_line.group(2))

	for url in unchecked_files:




		# creating empty placeholder in index
		if not url in index:

			index[url] = {}
		reload = False

		# creating file name from url
		file_name = options.dir + re.compile('%20').sub(' ', url)
		print('Checking file:', url)

		# forcibly checking file if no file present
		if not os.access(file_name, os.R_OK):
			print('File not found or inaccessible.')
			reload = True

		# forcibly checking file if file size doesn't match with index data
		elif 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']):
			print('File size is', os.stat(file_name).st_size, 'and stored file size is', index[url]['Content-Length'])
			reload = True

		# forcibly checking file if index hods Pragma header
		if 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache':
			print('Pragma on: recheck iminent.')
			reload = True

		# skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago
		if not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0:
			continue

		try:
			print('Contacting website.')
			with urllib.request.urlopen(options.root + url) as source:
				new_headers = {}
				headers = source.info()

				# stripping unneeded headers (XXX make this inplace?)
				for header in headers:
					if header in desc_fields:







>
>
>



>

|



<


|




|
|



|
|
|


|

>

<







42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
		log_line = re.compile('^[^ ]+ - - \[.*] "(GET|HEAD) (.*?)(\?.*)? HTTP/1.1" (\d+) \d+ "(.*)" "(.*)"$')
		for line in log_file:
			this_line = log_line.match(line.strip())
			if this_line:
				unchecked_files.add(this_line.group(2))

	for url in unchecked_files:
		reload = False
		recheck = False
		print('Checking file:', url)

		# creating empty placeholder in index
		if not url in index:
			print('This one is new.')
			index[url] = {}
			reload = True

		# creating file name from url
		file_name = options.dir + re.compile('%20').sub(' ', url)


		# forcibly checking file if no file present
		if not reload and not os.access(file_name, os.R_OK):
			print('File not found or inaccessible.')
			reload = True

		# forcibly checking file if file size doesn't match with index data
		elif not reload and 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']):
			print('File size is ', os.stat(file_name).st_size, ' and stored file size is ', index[url]['Content-Length'], '.', sep='')
			reload = True

		# forcibly checking file if index hods Pragma header
		if not reload and 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache':
			print('Pragma on: recheck imminent.')
			recheck = True

		# skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago
		if not recheck and not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0:
			continue

		try:

			with urllib.request.urlopen(options.root + url) as source:
				new_headers = {}
				headers = source.info()

				# stripping unneeded headers (XXX make this inplace?)
				for header in headers:
					if header in desc_fields:
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
				old_keys = set(index[url].keys())
				old_keys.discard('__time__')
				old_keys.discard('Pragma')
				more_keys = set(new_headers.keys()) - old_keys
				more_keys.discard('Pragma')
				less_keys = old_keys - set(new_headers.keys())
				if len(more_keys) > 0:
					if len(old_keys) == 0:
						print('No data on that file yet.')
					else:
						print('More headers appear:', more_keys)
					reload = True
				elif len(less_keys) > 0:
					print('Less headers appear:', less_keys)
					reload = True
				else:
					for key in index[url].keys():







|
<
<







98
99
100
101
102
103
104
105


106
107
108
109
110
111
112
				old_keys = set(index[url].keys())
				old_keys.discard('__time__')
				old_keys.discard('Pragma')
				more_keys = set(new_headers.keys()) - old_keys
				more_keys.discard('Pragma')
				less_keys = old_keys - set(new_headers.keys())
				if len(more_keys) > 0:
					if not len(old_keys) == 0:


						print('More headers appear:', more_keys)
					reload = True
				elif len(less_keys) > 0:
					print('Less headers appear:', less_keys)
					reload = True
				else:
					for key in index[url].keys():