d500448801 2009-10-05 1: #!/usr/bin/env python3.1
d500448801 2009-10-05 2:
d500448801 2009-10-05 3: import configparser, optparse, os, postgresql.api, re, sys, _thread
d500448801 2009-10-05 4:
d500448801 2009-10-05 5: class Logger:
d500448801 2009-10-05 6: __slots__ = frozenset(['_syslog'])
d500448801 2009-10-05 7:
d500448801 2009-10-05 8: def __init__(self):
d500448801 2009-10-05 9: config.section('log')
d500448801 2009-10-05 10: if config['silent'] == 'yes':
d500448801 2009-10-05 11: self._syslog = None
d500448801 2009-10-05 12: else:
d500448801 2009-10-05 13: import syslog
d500448801 2009-10-05 14: self._syslog = syslog
d500448801 2009-10-05 15: self._syslog.openlog('squidTag')
d500448801 2009-10-05 16:
d500448801 2009-10-05 17: def info(self, message):
4b22e25f24 2009-10-07 18: if self._syslog:
d500448801 2009-10-05 19: self._syslog.syslog(self._syslog.LOG_INFO, message)
d500448801 2009-10-05 20:
d500448801 2009-10-05 21: def notice(self, message):
4b22e25f24 2009-10-07 22: if self._syslog:
d500448801 2009-10-05 23: self._syslog.syslog(self._syslog.LOG_NOTICE, message)
d500448801 2009-10-05 24:
d500448801 2009-10-05 25: class tagDB:
d500448801 2009-10-05 26: __slots__ = frozenset(['_prepared', '_check_stmt', '_db'])
d500448801 2009-10-05 27:
d500448801 2009-10-05 28: def __init__(self):
d500448801 2009-10-05 29: self._prepared = set()
d500448801 2009-10-05 30: self._db = False
88c03b5440 2009-10-09 31: self._check_stmt = self._curs().prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")
d500448801 2009-10-05 32:
d500448801 2009-10-05 33: def _curs(self):
d500448801 2009-10-05 34: if not self._db:
d500448801 2009-10-05 35: config.section('database')
d500448801 2009-10-05 36: self._db = postgresql.open(
442d7bf53a 2009-10-12 37: 'pq://{}:{}@{}/{}'.format(
d500448801 2009-10-05 38: config['user'],
d500448801 2009-10-05 39: config['password'],
d500448801 2009-10-05 40: config['host'],
d500448801 2009-10-05 41: config['database'],
d500448801 2009-10-05 42: ) )
d500448801 2009-10-05 43: return(self._db)
d500448801 2009-10-05 44:
d500448801 2009-10-05 45: def check(self, ip_address, site):
88c03b5440 2009-10-09 46: return self._check_stmt(site, ip_address)
d500448801 2009-10-05 47:
d500448801 2009-10-05 48: class CheckerThread:
d500448801 2009-10-05 49: __slots__ = frozenset(['_db', '_lock', '_lock_queue', '_log', '_queue'])
d500448801 2009-10-05 50:
d500448801 2009-10-05 51: def __init__(self, db, log):
d500448801 2009-10-05 52: self._db = db
d500448801 2009-10-05 53: self._log = log
88c03b5440 2009-10-09 54: # Spin lock. Loop acquires it on start then releases it when holding queue
88c03b5440 2009-10-09 55: # lock. This way the thread proceeds without stops while queue has data and
88c03b5440 2009-10-09 56: # gets stalled when no data present. The lock is released by queue writer
88c03b5440 2009-10-09 57: # after storing something into the queue
d500448801 2009-10-05 58: self._lock = _thread.allocate_lock()
d500448801 2009-10-05 59: self._lock_queue = _thread.allocate_lock()
d500448801 2009-10-05 60: self._lock.acquire()
d500448801 2009-10-05 61: self._queue = []
d500448801 2009-10-05 62: _thread.start_new_thread(self._start, ())
d500448801 2009-10-05 63:
d500448801 2009-10-05 64: def _start(self):
d500448801 2009-10-05 65: while True:
d500448801 2009-10-05 66: self._lock.acquire()
d500448801 2009-10-05 67: self._lock_queue.acquire()
88c03b5440 2009-10-09 68: # yes this should be written this way, and yes, this is why I hate threading
d500448801 2009-10-05 69: if len(self._queue) > 1 and self._lock.locked():
d500448801 2009-10-05 70: self._lock.release()
d500448801 2009-10-05 71: req = self._queue.pop(0)
d500448801 2009-10-05 72: self._lock_queue.release()
442d7bf53a 2009-10-12 73: self._log.info('trying {}\n'.format(req[1]))
88c03b5440 2009-10-09 74: result = self._db.check(req[2], req[1])
88c03b5440 2009-10-09 75: for row in result:
88c03b5440 2009-10-09 76: if row != None and row[0] != None:
88c03b5440 2009-10-09 77: if row[1] != None:
442d7bf53a 2009-10-12 78: self._log.info('trying regexp "{}" versus "{}"\n'.format(row[1], req[3]))
88c03b5440 2009-10-09 79: if re.compile(row[1]).match(req[3]):
442d7bf53a 2009-10-12 80: writeline('{} 302:{}\n'.format(req[0], row[0]))
88c03b5440 2009-10-09 81: break
88c03b5440 2009-10-09 82: else:
88c03b5440 2009-10-09 83: continue
88c03b5440 2009-10-09 84: else:
442d7bf53a 2009-10-12 85: writeline('{} 302:{}\n'.format(req[0], row[0]))
88c03b5440 2009-10-09 86: break
442d7bf53a 2009-10-12 87: writeline('{} {}://{}/{}\n'.format(req[0], req[4], req[1], req[3]))
d500448801 2009-10-05 88:
d500448801 2009-10-05 89: def check(self, line):
d500448801 2009-10-05 90: request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
d500448801 2009-10-05 91: if request:
d500448801 2009-10-05 92: id = request.group(1)
442d7bf53a 2009-10-12 93: proto = request.group(2)
d500448801 2009-10-05 94: site = request.group(3)
d500448801 2009-10-05 95: url_path = request.group(4)
d500448801 2009-10-05 96: ip_address = request.group(5)
d500448801 2009-10-05 97: self._lock_queue.acquire()
442d7bf53a 2009-10-12 98: self._queue.append((id, site, ip_address, url_path, proto))
d500448801 2009-10-05 99: if self._lock.locked():
d500448801 2009-10-05 100: self._lock.release()
d500448801 2009-10-05 101: self._lock_queue.release()
442d7bf53a 2009-10-12 102: self._log.info('request {} queued ({})\n'.format(id, line))
d500448801 2009-10-05 103: else:
d500448801 2009-10-05 104: self._log.info('bad request\n')
d500448801 2009-10-05 105: writeline(line)
d500448801 2009-10-05 106:
d500448801 2009-10-05 107: def writeline(string):
442d7bf53a 2009-10-12 108: log.info('sending: ' + string)
d500448801 2009-10-05 109: sys.stdout.write(string)
d500448801 2009-10-05 110: sys.stdout.flush()
d500448801 2009-10-05 111:
d500448801 2009-10-05 112: class Config:
d500448801 2009-10-05 113: __slots__ = frozenset(['_config', '_section'])
d500448801 2009-10-05 114:
d500448801 2009-10-05 115: def __init__(self):
d500448801 2009-10-05 116: parser = optparse.OptionParser()
d500448801 2009-10-05 117: parser.add_option('-c', '--config', dest = 'config',
d500448801 2009-10-05 118: help = 'config file location', metavar = 'FILE',
d500448801 2009-10-05 119: default = '/usr/local/etc/squid-tagger.conf')
d500448801 2009-10-05 120:
d500448801 2009-10-05 121: (options, args) = parser.parse_args()
d500448801 2009-10-05 122:
d500448801 2009-10-05 123: if not os.access(options.config, os.R_OK):
d500448801 2009-10-05 124: print("Can't read {}: exitting".format(options.config))
d500448801 2009-10-05 125: sys.exit(2)
d500448801 2009-10-05 126:
d500448801 2009-10-05 127: self._config = configparser.ConfigParser()
d500448801 2009-10-05 128: self._config.readfp(open(options.config))
d500448801 2009-10-05 129:
d500448801 2009-10-05 130: def section(self, section):
d500448801 2009-10-05 131: self._section = section
d500448801 2009-10-05 132:
d500448801 2009-10-05 133: def __getitem__(self, name):
d500448801 2009-10-05 134: return self._config.get(self._section, name)
d500448801 2009-10-05 135:
d500448801 2009-10-05 136: config = Config()
d500448801 2009-10-05 137:
d500448801 2009-10-05 138: log = Logger()
d500448801 2009-10-05 139: db = tagDB()
d500448801 2009-10-05 140: checker = CheckerThread(db,log)
d500448801 2009-10-05 141:
d500448801 2009-10-05 142: while True:
d500448801 2009-10-05 143: line = sys.stdin.readline()
d500448801 2009-10-05 144: if len(line) == 0:
d500448801 2009-10-05 145: break
d500448801 2009-10-05 146: checker.check(line)