Index: samesite.py ================================================================== --- samesite.py +++ samesite.py @@ -10,10 +10,12 @@ }, '_other': { 'verbose': 'no', 'noetag': 'no', 'noparts': 'no', + 'strip': '', + 'sub': '', },} # function to read in config file def __init__(self): import configparser, optparse @@ -72,12 +74,20 @@ config = Config() #assert options.port or os.access(options.log, os.R_OK), 'Log file unreadable' -const_desc_fields = set(['Content-Length', 'Pragma', 'Last-Modified']) -const_ignore_fields = set(['Accept-Ranges', 'Age', 'Cache-Control', 'Connection', 'Content-Type', 'Date', 'Expires', 'Server', 'Via', 'X-Cache', 'X-Cache-Lookup', 'X-Powered-By']) +const_desc_fields = set(['Content-Length', 'Last-Modified', 'Pragma']) +const_ignore_fields = set([ + 'Accept-Ranges', 'Age', + 'Cache-Control', 'Connection', 'Content-Type', + 'Date', + 'Expires', + 'Server', + 'Via', + 'X-Cache', 'X-Cache-Lookup', 'X-Powered-By' +]) block_size = 4096 ''' # later, kqueue would be good but later @@ -193,19 +203,24 @@ file_stat = None # requested_ranges holds data about any range requested requested_ranges = None # records holds data from index locally, should be written back upon successfull completion record = None - info = 'Checking file: ' + self.path myPath = re.compile('^(.*?)(\?.*)$').match(self.path) if myPath: my_path = myPath.group(1) else: my_path = self.path config.section(self.headers['Host']) + + if config['sub'] != None and config['strip'] != None and len(config['strip']) > 0: + string = re.compile(config['strip']).sub(config['sub'], my_path) + my_path = string + + info = 'Checking file: ' + my_path if not os.access(config['dir'], os.X_OK): os.mkdir(config['dir']) # this is file index - everything is stored in this file # _parts - list of stored parts of file @@ -218,18 +233,20 @@ if not config['noetag']: desc_fields.add('ETag') else: ignore_fields.add('ETag') - proxy_ignored = ('Accept', 'Accept-Encoding', - 'Cache-Control', 'Connection', + proxy_ignored = set([ + 'Accept', 'Accept-Charset', 'Accept-Encoding', 'Accept-Language', + 'Cache-Control', 'Connection', 'Content-Length', 'Cookie', 'Host', 'If-Modified-Since', 'If-Unmodified-Since', + 'Referer', 'User-Agent', 'Via', - 'X-Forwarded-For', - ) + 'X-Forwarded-For', 'X-REMOVED', + ]) print('===============[ {} request ]==='.format(self.command)) for header in self.headers: if header in proxy_ignored: @@ -271,12 +288,10 @@ info += '\nFile not found or inaccessible.' index[my_path]['_parts'] = None reload = True record = index[my_path] - print(record) - if not '_parts' in record: record['_parts'] = None if record['_parts'] == None: recheck = True @@ -300,11 +315,16 @@ print(info) if reload or recheck: try: - request = 'http://' + config['root'] + my_path + request = 'http://' + config['root'] + self.path + my_headers = {} + for header in ('Cache-Control', 'Cookie', 'Referer', 'User-Agent'): + if header in self.headers: + my_headers[header] = self.headers[header] + needed = None # XXX and if we specify full file we don't go partial? if requested_ranges != None: if '_parts' in record and record['_parts'] != None: if config['noparts']: @@ -320,11 +340,13 @@ while True: range = needed.pop() if range[0] == None: break ranges += '{}-{}'.format(range[0], range[1] - 1), - request = urllib.request.Request(request, headers = {'Range': 'bytes=' + ','.join(ranges)}) + my_headers['Range'] = 'bytes=' + ','.join(ranges) + + request = urllib.request.Request(request, headers = my_headers) with urllib.request.urlopen(request) as source: new_record = {} new_record['_parts'] = record['_parts'] headers = source.info() @@ -435,15 +457,20 @@ except urllib.error.HTTPError as error: # in case of error we don't need to do anything actually, # if file download stalls or fails the file would not be moved to it's location print(error) - if '_parts' in index[my_path] and index[my_path]['_parts'] == spacemap.SpaceMap(): + if not os.access(file_name, os.R_OK) and os.access(temp_name, os.R_OK) and '_parts' in index[my_path] and index[my_path]['_parts'] == spacemap.SpaceMap(): # just moving # drop old dirs XXX print('Moving temporary file to new destination.') os.renames(temp_name, file_name) + + if not my_path in index: + self.send_response(502) + self.end_headers() + return if self.command == 'HEAD': self.send_response(200) if 'Content-Length' in index[my_path]: self.send_header('Content-Length', index[my_path]['Content-Length']) @@ -470,11 +497,12 @@ self.send_header('Content-Range', 'bytes {}/{}'.format(','.join(ranges), index[my_path]['Content-Length'])) else: self.send_response(200) self.send_header('Content-Length', str(file_stat.st_size)) requested_ranges = spacemap.SpaceMap({0: file_stat.st_size}) - self.send_header('Last-Modified', index[my_path]['Last-Modified']) + if 'Last-Modified' in index[my_path]: + self.send_header('Last-Modified', index[my_path]['Last-Modified']) self.send_header('Content-Type', 'application/octet-stream') self.end_headers() if self.command in ('GET'): if len(requested_ranges) > 0: requested_ranges.rewind()