d500448801 2009-10-05 1: #!/usr/bin/env python3.1
d500448801 2009-10-05 2:
d500448801 2009-10-05 3: import configparser, optparse, os, postgresql.api, re, sys, _thread
d500448801 2009-10-05 4:
b93dc49210 2009-10-13 5: # wrapper around syslog, can be muted
d500448801 2009-10-05 6: class Logger:
d500448801 2009-10-05 7: __slots__ = frozenset(['_syslog'])
d500448801 2009-10-05 8:
d500448801 2009-10-05 9: def __init__(self):
d500448801 2009-10-05 10: config.section('log')
d500448801 2009-10-05 11: if config['silent'] == 'yes':
d500448801 2009-10-05 12: self._syslog = None
d500448801 2009-10-05 13: else:
d500448801 2009-10-05 14: import syslog
d500448801 2009-10-05 15: self._syslog = syslog
d500448801 2009-10-05 16: self._syslog.openlog('squidTag')
d500448801 2009-10-05 17:
d500448801 2009-10-05 18: def info(self, message):
4b22e25f24 2009-10-07 19: if self._syslog:
d500448801 2009-10-05 20: self._syslog.syslog(self._syslog.LOG_INFO, message)
d500448801 2009-10-05 21:
d500448801 2009-10-05 22: def notice(self, message):
4b22e25f24 2009-10-07 23: if self._syslog:
d500448801 2009-10-05 24: self._syslog.syslog(self._syslog.LOG_NOTICE, message)
d500448801 2009-10-05 25:
b93dc49210 2009-10-13 26: # wrapper around database
d500448801 2009-10-05 27: class tagDB:
9450c03d41 2010-08-07 28: __slots__ = frozenset(('_check_stmt', '_db'))
b93dc49210 2009-10-13 29:
b93dc49210 2009-10-13 30: def __init__(self):
9450c03d41 2010-08-07 31: config.section('database')
9450c03d41 2010-08-07 32: self._db = postgresql.open(
9450c03d41 2010-08-07 33: 'pq://{}:{}@{}/{}'.format(
9450c03d41 2010-08-07 34: config['user'],
9450c03d41 2010-08-07 35: config['password'],
9450c03d41 2010-08-07 36: config['host'],
9450c03d41 2010-08-07 37: config['database'],
9450c03d41 2010-08-07 38: ) )
9450c03d41 2010-08-07 39: self._check_stmt = self._db.prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")
b93dc49210 2009-10-13 40:
b93dc49210 2009-10-13 41: def check(self, site, ip_address):
b93dc49210 2009-10-13 42: return(self._check_stmt(site, ip_address))
b93dc49210 2009-10-13 43:
b93dc49210 2009-10-13 44: # abstract class with basic checking functionality
b93dc49210 2009-10-13 45: class Checker:
ed7808827d 2009-10-14 46: __slots__ = frozenset(['_db', '_log'])
7e3418d94f 2009-10-12 47:
7e3418d94f 2009-10-12 48: def __init__(self):
b93dc49210 2009-10-13 49: self._db = tagDB()
b93dc49210 2009-10-13 50: self._log = Logger()
7c13294e9f 2010-08-07 51: self._log.info('started\n')
b93dc49210 2009-10-13 52:
ed7808827d 2009-10-14 53: def process(self, id, site, ip_address, url_path, line = None):
b93dc49210 2009-10-13 54: self._log.info('trying {}\n'.format(site))
b93dc49210 2009-10-13 55: result = self._db.check(site, ip_address)
b93dc49210 2009-10-13 56: #reply = '{}://{}/{}'.format(req[4], req[1], req[3])
b93dc49210 2009-10-13 57: reply = '-'
b93dc49210 2009-10-13 58: for row in result:
b93dc49210 2009-10-13 59: if row != None and row[0] != None:
b93dc49210 2009-10-13 60: if row[1] != None:
b93dc49210 2009-10-13 61: self._log.info('trying regexp "{}" versus "{}"\n'.format(row[1], url_path))
d2c54d0451 2010-03-01 62: try:
d2c54d0451 2010-03-01 63: if re.compile(row[1]).match(url_path):
1fa8a88371 2010-07-14 64: reply = row[0].format(url_path)
d2c54d0451 2010-03-01 65: break
d2c54d0451 2010-03-01 66: else:
d2c54d0451 2010-03-01 67: continue
d2c54d0451 2010-03-01 68: except:
d2c54d0451 2010-03-01 69: self._log.info("can't compile regexp")
b93dc49210 2009-10-13 70: else:
1fa8a88371 2010-07-14 71: reply = row[0].format(url_path)
b93dc49210 2009-10-13 72: break
b93dc49210 2009-10-13 73: self.writeline('{} {}\n'.format(id, reply))
7e3418d94f 2009-10-12 74:
7e3418d94f 2009-10-12 75: def check(self, line):
7e3418d94f 2009-10-12 76: request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
7e3418d94f 2009-10-12 77: if request:
7e3418d94f 2009-10-12 78: id = request.group(1)
7e3418d94f 2009-10-12 79: #proto = request.group(2)
7e3418d94f 2009-10-12 80: site = request.group(3)
7e3418d94f 2009-10-12 81: url_path = request.group(4)
7e3418d94f 2009-10-12 82: ip_address = request.group(5)
ed7808827d 2009-10-14 83: self.process(id, site, ip_address, url_path, line)
26fc9b34d9 2010-08-07 84: return(True)
7e3418d94f 2009-10-12 85: else:
7e3418d94f 2009-10-12 86: self._log.info('bad request\n')
b93dc49210 2009-10-13 87: self.writeline(line)
26fc9b34d9 2010-08-07 88: return(False)
b93dc49210 2009-10-13 89:
b93dc49210 2009-10-13 90: def writeline(self, string):
b93dc49210 2009-10-13 91: self._log.info('sending: ' + string)
b93dc49210 2009-10-13 92: sys.stdout.write(string)
b93dc49210 2009-10-13 93: sys.stdout.flush()
b93dc49210 2009-10-13 94:
ed7808827d 2009-10-14 95: def loop(self):
ed7808827d 2009-10-14 96: while True:
ed7808827d 2009-10-14 97: line = sys.stdin.readline()
ed7808827d 2009-10-14 98: if len(line) == 0:
ed7808827d 2009-10-14 99: break
ed7808827d 2009-10-14 100: self.check(line)
ed7808827d 2009-10-14 101:
b93dc49210 2009-10-13 102: # threaded checking facility
b93dc49210 2009-10-13 103: class CheckerThread(Checker):
ed7808827d 2009-10-14 104: __slots__ = frozenset(['_lock', '_lock_exit', '_lock_queue', '_queue'])
b93dc49210 2009-10-13 105:
b93dc49210 2009-10-13 106: def __init__(self):
ed7808827d 2009-10-14 107: # basic initialisation
b93dc49210 2009-10-13 108: Checker.__init__(self)
ed7808827d 2009-10-14 109:
b93dc49210 2009-10-13 110: # Spin lock. Loop acquires it on start then releases it when holding queue
b93dc49210 2009-10-13 111: # lock. This way the thread proceeds without stops while queue has data and
b93dc49210 2009-10-13 112: # gets stalled when no data present. The lock is released by queue writer
b93dc49210 2009-10-13 113: # after storing something into the queue
b93dc49210 2009-10-13 114: self._lock = _thread.allocate_lock()
ed7808827d 2009-10-14 115: self._lock_exit = _thread.allocate_lock()
b93dc49210 2009-10-13 116: self._lock_queue = _thread.allocate_lock()
b93dc49210 2009-10-13 117: self._lock.acquire()
b93dc49210 2009-10-13 118: self._queue = []
b93dc49210 2009-10-13 119: _thread.start_new_thread(self._start, ())
b93dc49210 2009-10-13 120:
b93dc49210 2009-10-13 121: def _start(self):
b93dc49210 2009-10-13 122: while True:
b93dc49210 2009-10-13 123: self._lock.acquire()
ed7808827d 2009-10-14 124: with self._lock_queue:
ed7808827d 2009-10-14 125: # yes this should be written this way, and yes, this is why I hate threading
ed7808827d 2009-10-14 126: if len(self._queue) > 1:
ed7808827d 2009-10-14 127: if self._lock.locked():
ed7808827d 2009-10-14 128: self._lock.release()
ed7808827d 2009-10-14 129: req = self._queue.pop(0)
ed7808827d 2009-10-14 130: Checker.process(self, req[0], req[1], req[2], req[3])
ed7808827d 2009-10-14 131: with self._lock_queue:
ed7808827d 2009-10-14 132: if len(self._queue) == 0:
ed7808827d 2009-10-14 133: if self._lock_exit.locked():
ed7808827d 2009-10-14 134: self._lock_exit.release()
ed7808827d 2009-10-14 135:
ed7808827d 2009-10-14 136: def process(self, id, site, ip_address, url_path, line):
ed7808827d 2009-10-14 137: with self._lock_queue:
ed7808827d 2009-10-14 138: self._queue.append((id, site, ip_address, url_path))
ed7808827d 2009-10-14 139: self._log.info('request {} queued ({})\n'.format(id, line))
ed7808827d 2009-10-14 140: if not self._lock_exit.locked():
ed7808827d 2009-10-14 141: self._lock_exit.acquire()
ed7808827d 2009-10-14 142: if self._lock.locked():
ed7808827d 2009-10-14 143: self._lock.release()
ed7808827d 2009-10-14 144:
ed7808827d 2009-10-14 145: def loop(self):
ed7808827d 2009-10-14 146: while True:
ed7808827d 2009-10-14 147: line = sys.stdin.readline()
ed7808827d 2009-10-14 148: if len(line) == 0:
ed7808827d 2009-10-14 149: break
ed7808827d 2009-10-14 150: self.check(line)
ed7808827d 2009-10-14 151: self._lock_exit.acquire()
ed7808827d 2009-10-14 152:
26fc9b34d9 2010-08-07 153: # kqueue enabled class for BSD's
ed7808827d 2009-10-14 154: class CheckerKqueue(Checker):
ed7808827d 2009-10-14 155: __slots__ = frozenset(['_kq', '_select', '_queue'])
ed7808827d 2009-10-14 156:
ed7808827d 2009-10-14 157: def __init__(self):
ed7808827d 2009-10-14 158: # basic initialisation
ed7808827d 2009-10-14 159: Checker.__init__(self)
ed7808827d 2009-10-14 160:
ed7808827d 2009-10-14 161: # importing select module
ed7808827d 2009-10-14 162: import select
ed7808827d 2009-10-14 163: self._select = select
ed7808827d 2009-10-14 164:
ed7808827d 2009-10-14 165: # kreating kqueue
ed7808827d 2009-10-14 166: self._kq = self._select.kqueue()
7c13294e9f 2010-08-07 167: assert self._kq.fileno() != -1, "Fatal error: can't initialise kqueue."
ed7808827d 2009-10-14 168:
ed7808827d 2009-10-14 169: # watching sys.stdin for data
ed7808827d 2009-10-14 170: self._kq.control([self._select.kevent(sys.stdin, self._select.KQ_FILTER_READ, self._select.KQ_EV_ADD)], 0)
ed7808827d 2009-10-14 171:
ed7808827d 2009-10-14 172: # creating data queue
ed7808827d 2009-10-14 173: self._queue = []
ed7808827d 2009-10-14 174:
ed7808827d 2009-10-14 175: def loop(self):
ed7808827d 2009-10-14 176: # Wait for data by default
ed7808827d 2009-10-14 177: timeout = None
26fc9b34d9 2010-08-07 178: eof = False
26fc9b34d9 2010-08-07 179: buffer = ''
ed7808827d 2009-10-14 180: while True:
26fc9b34d9 2010-08-07 181: # checking if there is any data or witing for data to arrive
ed7808827d 2009-10-14 182: kevs = self._kq.control(None, 1, timeout)
7c13294e9f 2010-08-07 183:
7c13294e9f 2010-08-07 184: # detect end of stream and exit if possible
a55552d30f 2010-08-07 185: if len(kevs) > 0 and kevs[0].flags >> 15 == 1:
7c13294e9f 2010-08-07 186: eof = True
7c13294e9f 2010-08-07 187:
a55552d30f 2010-08-07 188: if len(kevs) > 0 and kevs[0].filter == self._select.KQ_FILTER_READ and kevs[0].data > 0:
6c8b368359 2010-08-07 189: # reading data in
6c8b368359 2010-08-07 190: new_buffer = sys.stdin.read(kevs[0].data)
7c13294e9f 2010-08-07 191: # if no data was sent - we have reached end of file
6c8b368359 2010-08-07 192: if len(new_buffer) == 0:
6c8b368359 2010-08-07 193: eof = True
6c8b368359 2010-08-07 194: else:
6c8b368359 2010-08-07 195: # adding current buffer to old buffer remains
6c8b368359 2010-08-07 196: buffer += new_buffer
6c8b368359 2010-08-07 197: # splitting to lines
6c8b368359 2010-08-07 198: lines = buffer.split('\n')
6c8b368359 2010-08-07 199: # last line that was not terminate by newline returns to buffer
6c8b368359 2010-08-07 200: buffer = lines[-1]
6c8b368359 2010-08-07 201: # an only if there was at least one newline
6c8b368359 2010-08-07 202: if len(lines) > 1:
6c8b368359 2010-08-07 203: for line in lines[:-1]:
6c8b368359 2010-08-07 204: # add data to the queue
6c8b368359 2010-08-07 205: if self.check(line + '\n'):
6c8b368359 2010-08-07 206: # don't wait for more data, start processing
6c8b368359 2010-08-07 207: timeout = 0
ed7808827d 2009-10-14 208: else:
26fc9b34d9 2010-08-07 209: if len(self._queue) > 0:
7c13294e9f 2010-08-07 210: # get one request and process it
26fc9b34d9 2010-08-07 211: req = self._queue.pop(0)
26fc9b34d9 2010-08-07 212: Checker.process(self, req[0], req[1], req[2], req[3])
26fc9b34d9 2010-08-07 213: if len(self._queue) == 0:
26fc9b34d9 2010-08-07 214: # wait for data - we have nothing to process
26fc9b34d9 2010-08-07 215: timeout = None
7c13294e9f 2010-08-07 216:
7c13294e9f 2010-08-07 217: # if queue is empty and we reached end of stream - we can exit
7c13294e9f 2010-08-07 218: if len(self._queue) == 0 and eof:
7c13294e9f 2010-08-07 219: break
ed7808827d 2009-10-14 220:
ed7808827d 2009-10-14 221: def process(self, id, site, ip_address, url_path, line):
26fc9b34d9 2010-08-07 222: # simply adding data to the queue
ed7808827d 2009-10-14 223: self._queue.append((id, site, ip_address, url_path))
ed7808827d 2009-10-14 224: self._log.info('request {} queued ({})\n'.format(id, line))
7e3418d94f 2009-10-12 225:
fc934cead1 2009-10-13 226: # this classes processes config file and substitutes default values
d500448801 2009-10-05 227: class Config:
b93dc49210 2009-10-13 228: __slots__ = frozenset(['_config', '_default', '_section'])
b93dc49210 2009-10-13 229: _default = {
b93dc49210 2009-10-13 230: 'reactor': {
b93dc49210 2009-10-13 231: 'reactor': 'thread',
b93dc49210 2009-10-13 232: },
fc934cead1 2009-10-13 233: 'log': {
fc934cead1 2009-10-13 234: 'silent': 'no',
fc934cead1 2009-10-13 235: },
fc934cead1 2009-10-13 236: 'database': {
fc934cead1 2009-10-13 237: 'host': 'localhost',
fc934cead1 2009-10-13 238: 'database': 'squidTag',
fc934cead1 2009-10-13 239: },}
d500448801 2009-10-05 240:
fc934cead1 2009-10-13 241: # function to read in config file
d500448801 2009-10-05 242: def __init__(self):
d500448801 2009-10-05 243: parser = optparse.OptionParser()
d500448801 2009-10-05 244: parser.add_option('-c', '--config', dest = 'config',
d500448801 2009-10-05 245: help = 'config file location', metavar = 'FILE',
d500448801 2009-10-05 246: default = '/usr/local/etc/squid-tagger.conf')
d500448801 2009-10-05 247:
d500448801 2009-10-05 248: (options, args) = parser.parse_args()
d500448801 2009-10-05 249:
7c13294e9f 2010-08-07 250: assert os.access(options.config, os.R_OK), "Fatal error: can't read {}".format(options.config)
d500448801 2009-10-05 251:
d500448801 2009-10-05 252: self._config = configparser.ConfigParser()
d500448801 2009-10-05 253: self._config.readfp(open(options.config))
d500448801 2009-10-05 254:
fc934cead1 2009-10-13 255: # function to select config file section or create one
d500448801 2009-10-05 256: def section(self, section):
fc934cead1 2009-10-13 257: if not self._config.has_section(section):
fc934cead1 2009-10-13 258: self._config.add_section(section)
d500448801 2009-10-05 259: self._section = section
d500448801 2009-10-05 260:
fc934cead1 2009-10-13 261: # function to get config parameter, if parameter doesn't exists the default
fc934cead1 2009-10-13 262: # value or None is substituted
d500448801 2009-10-05 263: def __getitem__(self, name):
fc934cead1 2009-10-13 264: if not self._config.has_option(self._section, name):
b93dc49210 2009-10-13 265: if self._section in self._default:
b93dc49210 2009-10-13 266: if name in self._default[self._section]:
fc934cead1 2009-10-13 267: self._config.set(self._section, name, self._default[self._section][name])
fc934cead1 2009-10-13 268: else:
fc934cead1 2009-10-13 269: self._config.set(self._section, name, None)
fc934cead1 2009-10-13 270: else:
fc934cead1 2009-10-13 271: self._config.set(self._section, name, None)
b93dc49210 2009-10-13 272: return(self._config.get(self._section, name))
d500448801 2009-10-05 273:
fc934cead1 2009-10-13 274: # initializing and reading in config file
d500448801 2009-10-05 275: config = Config()
d500448801 2009-10-05 276:
b93dc49210 2009-10-13 277: config.section('reactor')
b93dc49210 2009-10-13 278: if config['reactor'] == 'thread':
b93dc49210 2009-10-13 279: checker = CheckerThread()
ed7808827d 2009-10-14 280: elif config['reactor'] == 'plain':
ed7808827d 2009-10-14 281: checker = Checker()
ed7808827d 2009-10-14 282: elif config['reactor'] == 'kqueue':
ed7808827d 2009-10-14 283: checker = CheckerKqueue()
b93dc49210 2009-10-13 284:
ed7808827d 2009-10-14 285: checker.loop()