Index: samesite.py ================================================================== --- samesite.py +++ samesite.py @@ -1,104 +1,68 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.2 from __future__ import unicode_literals, print_function #import gevent.monkey #gevent.monkey.patch_all() -import bsddb.dbshelve, copy, datetime, os, BaseHTTPServer, sys, spacemap, re, urllib2 - -class Config: - __slots__ = frozenset(['_config', '_default', '_section', 'options', 'root']) - _default = { - 'general': { - 'port': '8008', - }, - '_other': { - 'verbose': 'no', - 'noetag': 'no', - 'noparts': 'no', - 'strip': '', - 'sub': '', - 'proto': 'http', - },} - - # function to read in config file - def __init__(self): - import ConfigParser, optparse - - parser = optparse.OptionParser() - parser.add_option('-c', '--config', dest = 'config', help = 'config file location', metavar = 'FILE', default = 'samesite.conf') - (self.options, args) = parser.parse_args() - - assert os.access(self.options.config, os.R_OK), "Fatal error: can't read {}".format(self.options.config) - - configDir = re.compile('^(.*)/[^/]+$').match(self.options.config) - if configDir: - self.root = configDir.group(1) - else: - self.root = os.getcwd() - - self._config = ConfigParser.ConfigParser() - self._config.readfp(open(self.options.config)) - - for section in self._config.sections(): - if section != 'general': - if self._config.has_option(section, 'dir'): - if re.compile('^/$').match(self._config.get(section, 'dir')): - self._config.set(section, 'dir', self.root + os.sep + section) - thisDir = re.compile('^(.*)/$').match(self._config.get(section, 'dir')) - if thisDir: - self._config.set(section, 'dir', thisDir.group(1)) - if not re.compile('^/(.*)$').match(self._config.get(section, 'dir')): - self._config.set(section, 'dir', self.root + os.sep + self._config.get(section, 'dir')) - else: - self._config.set(section, 'dir', self.root + os.sep + section) - - if not self._config.has_option(section, 'root'): - self._config.set(section, 'root', section) - - # function to select config file section or create one - def section(self, section): - if not self._config.has_section(section): - self._config.add_section(section) - self._section = section - - # function to get config parameter, if parameter doesn't exists the default - # value or None is substituted - def __getitem__(self, name): - if not self._config.has_option(self._section, name): - if self._section in self._default: - if name in self._default[self._section]: - self._config.set(self._section, name, self._default[self._section][name]) - else: - self._config.set(self._section, name, None) - elif name in self._default['_other']: - self._config.set(self._section, name, self._default['_other'][name]) - else: - self._config.set(self._section, name, None) - return(self._config.get(self._section, name)) - -config = Config() +import argparse, os +parser = argparse.ArgumentParser() +parser.add_argument('-c', '--config', dest = 'config', help = 'config file location', metavar = 'FILE', default = 'samesite.conf') +args = parser.parse_args() +assert os.access(args.config, os.R_OK), "Fatal error: can't read {}".format(args.config) + +import configparser +config = configparser.ConfigParser({ + 'port': '8008', + 'verbose': 'no', + 'noetag': 'no', + 'noparts': 'no', + 'strip': '', + 'sub': '', + 'proto': 'http', +}) +config.read(args.config) + +cache_dir = os.path.realpath(os.path.dirname(args.config)) + +import re +for section in config.sections(): + if section != 'DEFAULT': + if 'dir' in config[section]: + if not re.compile('^/.*').match(config[section]['dir']): + config[section]['dir'] = cache_dir + os.sep + section + thisDir = re.compile('^(.*)/$').match(config[section]['dir']) + if thisDir: + config[section]['dir'] = thisDir.group(1) + if not re.compile('^/(.*)$').match(config[section]['dir']): + config[section]['dir'] = cache_dir + os.sep + config[section]['dir'] + else: + config[section]['dir'] = cache_dir + os.sep + section + + if not 'root' in config[section]: + config[section]['root'] = section #assert options.port or os.access(options.log, os.R_OK), 'Log file unreadable' -const_desc_fields = set(['content-length', 'last-modified', 'pragma']) +const_desc_fields = set(['Content-Length', 'Last-Modified', 'Pragma']) const_ignore_fields = set([ - 'accept-ranges', 'age', - 'cache-control', 'connection', 'content-type', - 'date', - 'expires', - 'referer', - 'server', - 'via', - 'x-cache', 'x-cache-lookup', 'x-livetool', 'x-powered-by', + 'Accept-Ranges', 'Age', + 'Cache-Control', 'Connection', 'Content-Type', + 'Date', + 'Expires', + 'Referer', + 'Server', + 'Via', + 'X-Cache', 'X-Cache-Lookup', 'X-Livetool', 'X-Powered-By', ]) block_size = 8192 -class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): +import bsddb3.dbshelve, copy, datetime, http.server, spacemap, urllib.request, urllib.error + +class MyRequestHandler(http.server.BaseHTTPRequestHandler): def __process(self): # reload means file needs to be reloaded to serve request reload = False # recheck means file needs to be checked, this also means that if file hav been modified we can serve older copy recheck = False @@ -113,83 +77,84 @@ if myPath: my_path = myPath.group(1) else: my_path = self.path - config.section(self.headers['host']) + config_host = config[self.headers['Host']] - if config['sub'] != None and config['strip'] != None and len(config['strip']) > 0: - string = re.compile(config['strip']).sub(config['sub'], my_path) + if config_host['sub'] != None and config_host['strip'] != None and len(config_host['strip']) > 0: + string = re.compile(config_host['strip']).sub(config_host['sub'], my_path) my_path = string + my_path_b = my_path.encode('utf-8') info = 'Checking file: ' + my_path - if not os.access(config['dir'], os.X_OK): - os.mkdir(config['dir']) + if not os.access(config_host['dir'], os.X_OK): + os.mkdir(config_host['dir']) # this is file index - everything is stored in this file # _parts - list of stored parts of file # _time - last time the file was checked # everything else is just the headers - index = bsddb.dbshelve.open(config['dir'] + os.sep + '.index') + index = bsddb3.dbshelve.open(config_host['dir'] + os.sep + '.index') desc_fields = const_desc_fields.copy() ignore_fields = const_ignore_fields.copy() - if config['noetag'] == 'no': + if config_host['noetag'] == 'no': desc_fields.add('etag') else: ignore_fields.add('etag') proxy_ignored = set([ - 'accept', 'accept-charset', 'accept-encoding', 'accept-language', - 'cache-control', 'connection', 'content-length', 'cookie', - 'host', - 'if-modified-since', 'if-unmodified-since', - 'referer', - 'ua-cpu', 'user-agent', - 'via', - 'x-forwarded-for', 'x-last-hr', 'x-last-http-status-code', 'x-removed', 'x-real-ip', 'x-retry-count', + 'Accept', 'Accept-Charset', 'Accept-Encoding', 'Accept-Language', + 'Cache-Control', 'Connection', 'Content-Length', 'Cookie', + 'Host', + 'If-Modified-Since', 'If-Unmodified-Since', + 'Referer', + 'Ua-Cpu', 'User-Agent', + 'Via', + 'X-Forwarded-For', 'X-Last-Hr', 'X-Last-Http-Status-Code', 'X-Removed', 'X-Real-Ip', 'X-Retry-Count', ]) print('===============[ {} request ]==='.format(self.command)) for header in self.headers: if header in proxy_ignored: pass - elif header in ('range'): + elif header in ('Range'): isRange = re.compile('bytes=(\d+)-(\d+)').match(self.headers[header]) if isRange: requested_ranges = spacemap.SpaceMap({int(isRange.group(1)): int(isRange.group(2)) + 1}) else: return() - elif header in ('pragma'): - if my_path in index: - index[my_path][header] = self.headers[header] + elif header in ('Pragma'): + if my_path_b in index: + index[my_path_b][header] = self.headers[header] else: print('Unknown header - ', header, ': ', self.headers[header], sep='') return() print(header, self.headers[header]) # creating file name from my_path - file_name = config['dir'] + os.sep + re.compile('%20').sub(' ', my_path) + file_name = config_host['dir'] + os.sep + re.compile('%20').sub(' ', my_path) # partial file or unfinished download - temp_name = config['dir'] + os.sep + '.parts' + re.compile('%20').sub(' ', my_path) + temp_name = config_host['dir'] + os.sep + '.parts' + re.compile('%20').sub(' ', my_path) # creating empty placeholder in index # if there's no space map and there's no file in real directory - we have no file # if there's an empty space map - file is full # space map generally covers every bit of file we don't posess currently - if not my_path in index: + if not my_path_b in index: info += '\nThis one is new.' reload = True record = {} else: # forcibly checking file if no file present - record = index[my_path] + record = index[my_path_b] if os.access(file_name, os.R_OK): info += '\nFull file found.' file_stat = os.stat(file_name) - elif '_parts' in index[my_path] and os.access(temp_name, os.R_OK): + elif '_parts' in index[my_path_b] and os.access(temp_name, os.R_OK): info += '\nPartial file found.' file_stat = os.stat(temp_name) recheck = True else: info += '\nFile not found or inaccessible.' @@ -222,24 +187,24 @@ print(info) if reload or recheck: try: - request = config['proto'] + '://' + config['root'] + self.path + request = config_host['proto'] + '://' + config_host['root'] + self.path my_headers = {} - for header in ('cache-control', 'cookie', 'referer', 'user-agent'): + for header in ('Cache-Control', 'Cookie', 'Referer', 'User-Agent'): if header in self.headers: my_headers[header] = self.headers[header] needed = None if self.command not in ('HEAD'): if '_parts' in record and record['_parts'] != None: - if config['noparts'] != 'no' or requested_ranges == None or requested_ranges == spacemap.SpaceMap(): + if config_host['noparts'] != 'no' or requested_ranges == None or requested_ranges == spacemap.SpaceMap(): needed = record['_parts'] else: needed = record['_parts'] & requested_ranges - elif config['noparts'] =='no' and requested_ranges != None and requested_ranges != spacemap.SpaceMap(): + elif config_host['noparts'] =='no' and requested_ranges != None and requested_ranges != spacemap.SpaceMap(): needed = requested_ranges ranges = () print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed)) if needed != None and len(needed) > 0: needed.rewind() @@ -246,37 +211,37 @@ while True: range = needed.pop() if range[0] == None: break ranges += '{}-{}'.format(range[0], range[1] - 1), - my_headers['range'] = 'bytes=' + ','.join(ranges) + my_headers['Range'] = 'bytes=' + ','.join(ranges) my_headers['Accept-Encoding'] = 'gzip, compress, deflate, identity; q=0' - request = urllib2.Request(request, headers = my_headers) + request = urllib.request.Request(request, headers = my_headers) - source = urllib2.urlopen(request, timeout = 60) + source = urllib.request.urlopen(request, timeout = 60) new_record = {} new_record['_parts'] = record['_parts'] headers = source.info() - if 'content-encoding' in headers and headers['content-encoding'] == 'gzip': + if 'Content-Encoding' in headers and headers['Content-Encoding'] == 'gzip': import gzip source = gzip.GzipFile(fileobj=source) # stripping unneeded headers (XXX make this inplace?) for header in headers: if header in desc_fields: #if header == 'Pragma' and headers[header] != 'no-cache': - if header == 'content-length': - if 'content-range' not in headers: + if header == 'Content-Length': + if 'Content-Range' not in headers: new_record[header] = int(headers[header]) else: new_record[header] = headers[header] - elif header == 'content-range': + elif header == 'Content-Range': range = re.compile('^bytes (\d+)-(\d+)/(\d+)$').match(headers[header]) if range: - new_record['content-length'] = int(range.group(3)) + new_record['Content-Length'] = int(range.group(3)) else: assert False, 'Content-Range unrecognized.' elif not header in ignore_fields: print('Undefined header "', header, '": ', headers[header], sep='') @@ -283,23 +248,23 @@ # comparing headers with data found in index # if any header has changed (except Pragma) file is fully downloaded # same if we get more or less headers old_keys = set(record.keys()) old_keys.discard('_time') - old_keys.discard('pragma') + old_keys.discard('Pragma') more_keys = set(new_record.keys()) - old_keys - more_keys.discard('pragma') + more_keys.discard('Pragma') less_keys = old_keys - set(new_record.keys()) if len(more_keys) > 0: if len(old_keys) != 0: print('More headers appear:', more_keys) reload = True elif len(less_keys) > 0: print('Less headers appear:', less_keys) else: for key in record.keys(): - if key[0] != '_' and key != 'pragma' and record[key] != new_record[key]: + if key[0] != '_' and key != 'Pragma' and record[key] != new_record[key]: print('Header "', key, '" changed from [', record[key], '] to [', new_record[key], ']', sep='') print(type(record[key]), type(new_record[key])) reload = True if reload: @@ -306,18 +271,18 @@ print('Reloading.') if os.access(temp_name, os.R_OK): os.unlink(temp_name) if os.access(file_name, os.R_OK): os.unlink(file_name) - if 'content-length' in new_record: - new_record['_parts'] = spacemap.SpaceMap({0: int(new_record['content-length'])}) + if 'Content-Length' in new_record: + new_record['_parts'] = spacemap.SpaceMap({0: int(new_record['Content-Length'])}) if not new_record['_parts']: new_record['_parts'] = spacemap.SpaceMap() print(new_record) # downloading file or segment - if 'content-length' in new_record: + if 'Content-Length' in new_record: if needed == None: needed = new_record['_parts'] else: if len(needed) > 1: print("Multipart requests currently not supported.") @@ -327,11 +292,11 @@ new_record['_time'] = datetime.datetime.now() if self.command not in ('HEAD'): # file is created at temporary location and moved in place only when download completes if not os.access(temp_name, os.R_OK): - empty_name = config['dir'] + os.sep + '.tmp' + empty_name = config_host['dir'] + os.sep + '.tmp' with open(empty_name, 'w+b') as some_file: pass os.renames(empty_name, temp_name) temp_file = open(temp_name, 'r+b') if requested_ranges == None and needed == None: @@ -355,11 +320,11 @@ assert stream_pos <= end, 'Received more data then requested: pos:{} start:{} end:{}.'.format(stream_pos, start, end) temp_file.seek(stream_last) temp_file.write(buffer) x = new_record['_parts'] - spacemap.SpaceMap({stream_last: stream_pos}) new_record['_parts'] = new_record['_parts'] - spacemap.SpaceMap({stream_last: stream_pos}) - index[my_path] = old_record + index[my_path_b] = old_record index.sync() old_record = copy.copy(new_record) stream_last = stream_pos if end - stream_last < block_size: req_block_size = end - stream_last @@ -366,73 +331,73 @@ buffer = source.read(req_block_size) length = len(buffer) # moving downloaded data to real file temp_file.close() - index[my_path] = new_record + index[my_path_b] = new_record index.sync() - except urllib2.HTTPError as error: + except urllib.error.HTTPError as error: # in case of error we don't need to do anything actually, # if file download stalls or fails the file would not be moved to it's location print(error) - print(index[my_path]) + print(index[my_path_b]) - if not os.access(file_name, os.R_OK) and os.access(temp_name, os.R_OK) and '_parts' in index[my_path] and index[my_path]['_parts'] == spacemap.SpaceMap(): + if not os.access(file_name, os.R_OK) and os.access(temp_name, os.R_OK) and '_parts' in index[my_path_b] and index[my_path_b]['_parts'] == spacemap.SpaceMap(): # just moving # drop old dirs XXX print('Moving temporary file to new destination.') os.renames(temp_name, file_name) - if not my_path in index: + if not my_path_b in index: self.send_response(502) self.end_headers() return if self.command == 'HEAD': self.send_response(200) - if 'content-length' in index[my_path]: - self.send_header('content-length', index[my_path]['content-length']) - self.send_header('accept-ranges', 'bytes') - self.send_header('content-type', 'application/octet-stream') - if 'last-modified' in index[my_path]: - self.send_header('last-modified', index[my_path]['last-modified']) + if 'Content-Length' in index[my_path_b]: + self.send_header('Content-Length', index[my_path_b]['Content-Length']) + self.send_header('Accept-Ranges', 'bytes') + self.send_header('Content-Type', 'application/octet-stream') + if 'Last-Modified' in index[my_path_b]: + self.send_header('Last-Modified', index[my_path_b]['Last-Modified']) self.end_headers() else: - if ('_parts' in index[my_path] and index[my_path]['_parts'] != spacemap.SpaceMap()) or not os.access(file_name, os.R_OK): + if ('_parts' in index[my_path_b] and index[my_path_b]['_parts'] != spacemap.SpaceMap()) or not os.access(file_name, os.R_OK): file_name = temp_name with open(file_name, 'rb') as real_file: file_stat = os.stat(file_name) - if 'range' in self.headers: + if 'Range' in self.headers: self.send_response(206) ranges = () requested_ranges.rewind() while True: pair = requested_ranges.pop() if pair[0] == None: break ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)), - self.send_header('content-range', 'bytes {}/{}'.format(','.join(ranges), index[my_path]['content-length'])) + self.send_header('Content-Range', 'bytes {}/{}'.format(','.join(ranges), index[my_path_b]['Content-Length'])) else: self.send_response(200) - self.send_header('content-length', str(file_stat.st_size)) + self.send_header('Content-Length', str(file_stat.st_size)) requested_ranges = spacemap.SpaceMap({0: file_stat.st_size}) - if 'last-modified' in index[my_path]: - self.send_header('last-modified', index[my_path]['last-modified']) - self.send_header('content-type', 'application/octet-stream') + if 'Last-Modified' in index[my_path_b]: + self.send_header('Last-Modified', index[my_path_b]['Last-Modified']) + self.send_header('Content-Type', 'application/octet-stream') self.end_headers() if self.command in ('GET'): if len(requested_ranges) > 0: requested_ranges.rewind() (start, end) = requested_ranges.pop() else: start = 0 # XXX ugly hack - if 'content-length' in index[my_path]: - end = index[my_path]['content-length'] + if 'Content-Length' in index[my_path_b]: + end = index[my_path_b]['Content-Length'] else: end = 0 real_file.seek(start) if block_size > end - start: req_block_size = end - start @@ -453,10 +418,9 @@ def do_HEAD(self): return self.__process() def do_GET(self): return self.__process() -config.section('general') -server = BaseHTTPServer.HTTPServer(('127.0.0.1', int(config['port'])), MyRequestHandler) +server = http.server.HTTPServer(('127.0.0.1', int(config['DEFAULT']['port'])), MyRequestHandler) server.serve_forever() #gevent.joinall()