ADDED LICENSE Index: LICENSE ================================================================== --- /dev/null +++ LICENSE @@ -0,0 +1,10 @@ +Copyright (c) 2009-2010, Volodymyr Kostyrko +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * No names of software contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ADDED TODO Index: TODO ================================================================== --- /dev/null +++ TODO @@ -0,0 +1,19 @@ +Some things I should take care about: + + * If-(Unm|M)odified-Since +This one is totally ignored. Code should be added to correctly return 304. + + * Multiple byte ranges +Totally unsupported but requested with multiple byte ranges can be generated. + + * Handling of files without Content-Length +Those ones wouldn't work now. + + * Get piece of file when issuing a HEAD request +Why not? This way we will silently complete all of the partial files. + + * File recheck +Currently when file is fully downloaded it gets stored in real file tree to be served by production-grade web-server. This way we will never see any requests to that file. Finding oldest ones and moving them to .parts would suffice. + + * File time +Currently when file is downloaded it gets current time, not the time in Last-Modified. Index: samesite.py ================================================================== --- samesite.py +++ samesite.py @@ -164,17 +164,17 @@ my_path = self.path proxy_ignored = ('Accept', 'Accept-Encoding', 'Cache-Control', 'Connection', 'Host', + 'If-Modified-Since', 'If-Unmodified-Since', 'User-Agent', 'Via', 'X-Forwarded-For', ) - print('===============[ Request ]===') - print('Command:', self.command) + print('===============[ {} request ]==='.format(self.command)) for header in self.headers: if header in proxy_ignored: pass elif header in ('Range'): @@ -185,11 +185,10 @@ return() else: print('Unknown header - ', header, ': ', self.headers[header], sep='') return() print(header, self.headers[header]) - print(my_path) # creating empty placeholder in index # if there's no space map and there's no file in real directory - we have no file # if there's an empty space map - file is full # space map generally covers every bit of file we don't posess currently @@ -198,11 +197,10 @@ reload = True record = {'_parts': None} else: record = index[my_path] if '_parts' in index[my_path]: - print(record['_parts']) if index[my_path]['_parts'] == {0: -1}: index[my_path]['_parts'] = None # creating file name from my_path file_name = options.dir + os.sep + re.compile('%20').sub(' ', my_path) @@ -214,10 +212,11 @@ file_stat = os.stat(file_name) elif '_parts' in record and os.access(temp_name, os.R_OK): file_stat = os.stat(temp_name) elif not reload: info += '\nFile not found or inaccessible.' + record = {'_parts': None} reload = True # forcibly checking file if file size doesn't match with index data if not reload: if '_parts' in record and record['_parts'] == SpaceMap(): @@ -237,27 +236,26 @@ print(info) if reload or recheck: try: request = options.root + my_path + needed = None if requested_ranges != None: if '_parts' in record and record['_parts'] != None: needed = record['_parts'] & requested_ranges else: needed = requested_ranges ranges = () - print('Requesting ranges:', ranges) - print('Not stored ranges:', record['_parts']) - print('Requested ranges:', requested_ranges) - print('Needed ranges:', needed) - needed.rewind() - while True: - range = needed.pop() - if range[0] == None: - break - ranges += '{}-{}'.format(range[0], range[1] - 1), - request = urllib.request.Request(request, headers = {'Range': 'bytes=' + ','.join(ranges)}) + print('Missing ranges: {}, requested ranges: {}, needed ranges: {}.'.format(record['_parts'], requested_ranges, needed)) + if len(needed) > 0: + needed.rewind() + while True: + range = needed.pop() + if range[0] == None: + break + ranges += '{}-{}'.format(range[0], range[1] - 1), + request = urllib.request.Request(request, headers = {'Range': 'bytes=' + ','.join(ranges)}) with urllib.request.urlopen(request) as source: new_record = {} new_record['_parts'] = record['_parts'] headers = source.info() @@ -264,29 +262,24 @@ # stripping unneeded headers (XXX make this inplace?) for header in headers: if header in desc_fields: #if header == 'Pragma' and headers[header] != 'no-cache': - print(header, headers[header]) if header == 'Content-Length': if 'Content-Range' not in headers: - new_record[header] = headers[header] + new_record[header] = int(headers[header]) else: new_record[header] = headers[header] elif header == 'Content-Range': range = re.compile('^bytes (\d+)-(\d+)/(\d+)$').match(headers[header]) if range: - new_record['Content-Length'] = range.group(3) + new_record['Content-Length'] = int(range.group(3)) else: assert False, 'Content-Range unrecognized.' elif not header in ignore_fields: print('Undefined header "', header, '": ', headers[header], sep='') - if new_record['_parts'] == None: - new_record['_parts'] = SpaceMap({0: int(new_record['Content-Length'])}) - print(new_record) - # comparing headers with data found in index # if any header has changed (except Pragma) file is fully downloaded # same if we get more or less headers old_keys = set(record.keys()) old_keys.discard('_time') @@ -302,91 +295,88 @@ print('Less headers appear:', less_keys) else: for key in record.keys(): if key[0] != '_' and key != 'Pragma' and not record[key] == new_record[key]: print('Header "', key, '" changed from [', record[key], '] to [', new_record[key], ']', sep='') + print(type(record[key]), type(new_record[key])) reload = True if reload: print('Reloading.') if os.access(temp_name, os.R_OK): os.unlink(temp_name) if os.access(file_name, os.R_OK): os.unlink(file_name) + if new_record['_parts'] == None or reload: + new_record['_parts'] = SpaceMap({0: int(new_record['Content-Length'])}) + print(new_record) # downloading file or segment if 'Content-Length' in new_record: - if requested_ranges == None: - requested_ranges = new_record['_parts'] + if needed == None: + needed = new_record['_parts'] else: - if len(requested_ranges) > 1: + if len(needed) > 1: print("Multipart requests currently not supported.") assert False, 'Skip this one for now.' else: assert False, 'No Content-Length or Content-Range header.' - if reload: - new_record['_time'] = datetime.datetime.now() - if self.command not in ('HEAD'): - # file is created at temporary location and moved in place only when download completes - if not os.access(temp_name, os.R_OK): - empty_name = options.dir + os.sep + '.tmp' - with open(empty_name, 'w+b') as some_file: - pass - os.renames(empty_name, temp_name) - temp_file = open(temp_name, 'r+b') - requested_ranges.rewind() - while True: - (start, end) = requested_ranges.pop() - if start == None: - break - stream_last = start - old_record = new_record - if end - start < block_size: - req_block_size = end - start - else: - req_block_size = block_size - buffer = source.read(req_block_size) - print(buffer) - length = len(buffer) - while length > 0 and stream_last < end: - stream_pos = stream_last + length - assert not stream_pos > end, 'Received more data then requested: pos:{} start:{} end:{}.'.format(stream_pos, start, end) - print('Writing', length, 'bytes to temp file at position', stream_last) - temp_file.seek(stream_last) - temp_file.write(buffer) - new_record['_parts'] = new_record['_parts'] - SpaceMap({stream_last: stream_pos}) - print(new_record) - index[my_path] = old_record - index.sync() - old_record = new_record - stream_last = stream_pos - if end - stream_last < block_size: - req_block_size = end - stream_last - buffer = source.read(req_block_size) - print(buffer) - length = len(buffer) - print(new_record) - index[my_path] = new_record - index.sync() - temp_file.close() - - # moving downloaded data to real file - if new_record['_parts'] == SpaceMap(): - # just moving - # drop old dirs XXX - print('Moving temporary file to new destination.') - os.renames(temp_name, file_name) + new_record['_time'] = datetime.datetime.now() + if self.command not in ('HEAD'): + # file is created at temporary location and moved in place only when download completes + if not os.access(temp_name, os.R_OK): + empty_name = options.dir + os.sep + '.tmp' + with open(empty_name, 'w+b') as some_file: + pass + os.renames(empty_name, temp_name) + temp_file = open(temp_name, 'r+b') + needed.rewind() + while True: + (start, end) = needed.pop() + if start == None: + break + stream_last = start + old_record = new_record + if end - start < block_size: + req_block_size = end - start + else: + req_block_size = block_size + buffer = source.read(req_block_size) + length = len(buffer) + while length > 0 and stream_last < end: + stream_pos = stream_last + length + assert not stream_pos > end, 'Received more data then requested: pos:{} start:{} end:{}.'.format(stream_pos, start, end) + temp_file.seek(stream_last) + temp_file.write(buffer) + new_record['_parts'] = new_record['_parts'] - SpaceMap({stream_last: stream_pos}) + index[my_path] = old_record + index.sync() + old_record = new_record + stream_last = stream_pos + if end - stream_last < block_size: + req_block_size = end - stream_last + buffer = source.read(req_block_size) + length = len(buffer) + # moving downloaded data to real file + temp_file.close() + if new_record['_parts'] == SpaceMap(): + # just moving + # drop old dirs XXX + print('Moving temporary file to new destination.') + os.renames(temp_name, file_name) + + print(new_record) + index[my_path] = new_record + index.sync() except urllib.error.HTTPError as error: # in case of error we don't need to do anything actually, # if file download stalls or fails the file would not be moved to it's location print(error) - print('Sending response.') if self.command == 'HEAD': - print('Sending HEAD response.') self.send_response(200) if 'Content-Length' in index[my_path]: self.send_header('Content-Length', index[my_path]['Content-Length']) self.send_header('Accept-Ranges', 'bytes') self.send_header('Content-Type', 'application/octet-stream') @@ -397,46 +387,48 @@ if index[my_path]['_parts'] != SpaceMap(): file_name = temp_name with open(file_name, 'rb') as real_file: file_stat = os.stat(file_name) - self.send_response(200) - self.send_header('Last-Modified', index[my_path]['Last-Modified']) - if requested_ranges != None: + if 'Range' in self.headers: + self.send_response(206) ranges = () requested_ranges.rewind() while True: pair = requested_ranges.pop() if pair[0] == None: break ranges += '{}-{}'.format(pair[0], str(pair[1] - 1)), - self.send_header('Content-Range', 'bytes ' + ','.join(ranges) + '/' + index[my_path]['Content-Length']) + self.send_header('Content-Range', 'bytes {}/{}'.format(','.join(ranges), index[my_path]['Content-Length'])) else: + self.send_response(200) self.send_header('Content-Length', str(file_stat.st_size)) requested_ranges = SpaceMap({0: file_stat.st_size}) + self.send_header('Last-Modified', index[my_path]['Last-Modified']) self.send_header('Content-Type', 'application/octet-stream') self.end_headers() if self.command in ('GET'): - requested_ranges.rewind() - (start, end) = requested_ranges.pop() - print('Seeking file to position', start) + if len(requested_ranges) > 0: + requested_ranges.rewind() + (start, end) = requested_ranges.pop() + else: + start = 0 + end = index[my_path]['Content-Length'] real_file.seek(start) if block_size > end - start: req_block_size = end - start else: req_block_size = block_size - print('block_size is', req_block_size) buffer = real_file.read(req_block_size) length = len(buffer) while length > 0: self.wfile.write(buffer) start += len(buffer) if req_block_size > end - start: req_block_size = end - start if req_block_size == 0: break - print('block_size is', req_block_size) buffer = real_file.read(req_block_size) length = len(buffer) def do_HEAD(self): return self.__process()