Samesite - proxy that can cache partial transfers

Check-in [083ec707ea]
anonymous

Check-in [083ec707ea]

Overview
Comment:changed logic and some log messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: 083ec707eae26f95b17a1a822c165f2e5d222cc48919d6b0720ffe967362a714
User & Date: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 on 2010-07-06 15:02:07.000
Other Links: branch diff | manifest | tags
Context
2010-07-26
09:38
option to skip ETag processing, X-Powered-By ignored check-in: 38b25713eb user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-07-06
15:02
changed logic and some log messages check-in: 083ec707ea user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
2010-07-05
14:52
enchanced logging check-in: 53dcfdb8f7 user: c.kworr@b84a3442-36b4-a7b2-c7ad-07429f13c525 tags: master, trunk
Changes
42
43
44
45
46
47
48



49
50
51

52
53

54
55
56
57
58
59
60

61
62
63
64
65
66


67
68
69
70
71
72



73
74
75

76

77
78
79
80
81
82
83
84
85
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

57
58
59
60

61
62

63
64
65
66
67


68
69
70
71
72



73
74
75
76
77

78
79
80
81

82
83
84
85
86
87
88







+
+
+



+

-
+



-


-
+




-
-
+
+



-
-
-
+
+
+


-
+

+

-







		log_line = re.compile('^[^ ]+ - - \[.*] "(GET|HEAD) (.*?)(\?.*)? HTTP/1.1" (\d+) \d+ "(.*)" "(.*)"$')
		for line in log_file:
			this_line = log_line.match(line.strip())
			if this_line:
				unchecked_files.add(this_line.group(2))

	for url in unchecked_files:
		reload = False
		recheck = False
		print('Checking file:', url)

		# creating empty placeholder in index
		if not url in index:
			print('This one is new.')
			index[url] = {}
		reload = False
			reload = True

		# creating file name from url
		file_name = options.dir + re.compile('%20').sub(' ', url)
		print('Checking file:', url)

		# forcibly checking file if no file present
		if not os.access(file_name, os.R_OK):
		if not reload and not os.access(file_name, os.R_OK):
			print('File not found or inaccessible.')
			reload = True

		# forcibly checking file if file size doesn't match with index data
		elif 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']):
			print('File size is', os.stat(file_name).st_size, 'and stored file size is', index[url]['Content-Length'])
		elif not reload and 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']):
			print('File size is ', os.stat(file_name).st_size, ' and stored file size is ', index[url]['Content-Length'], '.', sep='')
			reload = True

		# forcibly checking file if index hods Pragma header
		if 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache':
			print('Pragma on: recheck iminent.')
			reload = True
		if not reload and 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache':
			print('Pragma on: recheck imminent.')
			recheck = True

		# skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago
		if not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0:
		if not recheck and not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0:
			continue

		try:
			print('Contacting website.')
			with urllib.request.urlopen(options.root + url) as source:
				new_headers = {}
				headers = source.info()

				# stripping unneeded headers (XXX make this inplace?)
				for header in headers:
					if header in desc_fields:
95
96
97
98
99
100
101
102

103
104
105
106
107
108
109
110
111
98
99
100
101
102
103
104

105


106
107
108
109
110
111
112







-
+
-
-







				old_keys = set(index[url].keys())
				old_keys.discard('__time__')
				old_keys.discard('Pragma')
				more_keys = set(new_headers.keys()) - old_keys
				more_keys.discard('Pragma')
				less_keys = old_keys - set(new_headers.keys())
				if len(more_keys) > 0:
					if len(old_keys) == 0:
					if not len(old_keys) == 0:
						print('No data on that file yet.')
					else:
						print('More headers appear:', more_keys)
					reload = True
				elif len(less_keys) > 0:
					print('Less headers appear:', less_keys)
					reload = True
				else:
					for key in index[url].keys():