Samesite - proxy that can cache partial transfers

Diff
anonymous

Diff

Differences From Artifact [81ccd8c170]:

To Artifact [f084a96edb]:


39
40
41
42
43
44
45
46

47
48
49
50

51
52
53
54
55
56
57
58
59

60
61
62
63
64

65
66
67
68
69

70



71
72
73
74
75
76
77
78
79


80
81
82
83
84
85
86
39
40
41
42
43
44
45

46
47
48
49

50
51
52
53
54
55
56
57
58

59
60
61
62
63

64
65
66
67
68

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91







-
+



-
+








-
+




-
+




-
+

+
+
+









+
+







			this_line = log_line.match(line.strip())
			if this_line:
				unchecked_files.add(this_line.group(2))

	for url in unchecked_files:
		reload = False
		recheck = False
		print('Checking file:', url)
		info = 'Checking file: ' + url

		# creating empty placeholder in index
		if not url in index:
			print('This one is new.')
			info += '\nThis one is new.'
			index[url] = {}
			reload = True

		# creating file name from url
		file_name = options.dir + re.compile('%20').sub(' ', url)

		# forcibly checking file if no file present
		if not reload and not os.access(file_name, os.R_OK):
			print('File not found or inaccessible.')
			info += '\nFile not found or inaccessible.'
			reload = True

		# forcibly checking file if file size doesn't match with index data
		elif not reload and 'Content-Length' in index[url] and os.stat(file_name).st_size != int(index[url]['Content-Length']):
			print('File size is ', os.stat(file_name).st_size, ' and stored file size is ', index[url]['Content-Length'], '.', sep='')
			info += '\nFile size is ' + os.stat(file_name).st_size + ' and stored file size is ' + index[url]['Content-Length'] + '.'
			reload = True

		# forcibly checking file if index hods Pragma header
		if not reload and 'Pragma' in index[url] and index[url]['Pragma'] == 'no-cache':
			print('Pragma on: recheck imminent.')
			info +='\nPragma on: recheck imminent.'
			recheck = True

		if options.verbose:
			print(info)

		# skipping file processing if there's no need to recheck it and we have checked it at least 4 hours ago
		if not recheck and not reload and '__time__' in index[url] and (datetime.datetime.now() - datetime.timedelta(hours = 4) - index[url]['__time__']).days < 0:
			continue

		try:
			with urllib.request.urlopen(options.root + url) as source:
				new_headers = {}
				headers = source.info()
				if not options.verbose:
					print(info)

				# stripping unneeded headers (XXX make this inplace?)
				for header in headers:
					if header in desc_fields:
						if header == 'Pragma' and headers[header] != 'no-cache':
							print('Pragma:', headers[header])
						new_headers[header] = headers[header]
143
144
145
146
147
148
149

150

151
152
153
154
148
149
150
151
152
153
154
155

156
157
158
159
160







+
-
+




				index.sync()

		except urllib.error.HTTPError as error:
			# in case of error we don't need to do anything actually,
			# if file download stalls or fails the file would not be moved to it's location
			print(error)

	if options.verbose:
	print('[', len(unchecked_files), '/', checked_files, ']')
		print('[', len(unchecked_files), '/', checked_files, ']')

	# checking if there were any files downloaded, if yes - restarting sequence
	if checked_files == 0:
		break