Index: squid-tagger.py ================================================================== --- squid-tagger.py +++ squid-tagger.py @@ -105,12 +105,10 @@ logging.handlers.SysLogHandler.__init__(self, '/dev/log') self._tail = gevent.queue.Queue() self._worker = None def emit(self, record): - # my syslog is broken and cannot into UTF-8 BOM - record.msg = record.msg.encode('utf-8') try: self._tail.put(record) except (KeyboardInterrupt, SystemExit): raise except: @@ -304,11 +302,11 @@ def __init__(self, queue, logger): self._db = tagDB() self._log = logger self._log.info('started') - self._request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.:]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$') + self._request = re.compile('^([0-9]+)\ ((http|ftp):\/\/)?([-\w.]+)(:[0-9]+)?(\/([^ ]*))?\ ([0-9.:]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST|CONNECT).*$') self._queue = queue self._stdout = FWritelineQueue(sys.stdout, False) def process(self, id, site, ip_address, url_path, line = None): #self._log.info('trying {}'.format(site)) @@ -315,19 +313,19 @@ result = self._db.check(site, ip_address) reply = None #self._log.info('got {} lines from database'.format(len(result))) for row in result: if row != None and row[0] != None: - if row[1] != None: + if row[1] != None and url_path != None: self._log.info('trying regexp "{}" versus "{}"'.format(row[1], url_path)) try: if re.compile(row[1]).match(url_path): reply = row[0].format(host = site, path = url_path) else: continue except: - self._log.info("can't compile regexp") + self._log.info("can't compile or execute regexp") else: reply = row[0].format(host = site, path = url_path) if reply != None: self.writeline('{} {}'.format(id, reply)) return(True) @@ -340,14 +338,14 @@ break #self._log.info('request: ' + line) request = self._request.match(line) if request: id = request.group(1) - #proto = request.group(2) - site = request.group(3) - url_path = request.group(4) - ip_address = request.group(5) + #proto = request.group(3) + site = request.group(4) + url_path = request.group(7) + ip_address = request.group(8) self.process(id, site, ip_address, url_path, line) else: self._log.info('bad request') self.writeline(line)