0ef24b1937 2011-04-06 c.kworr@d4daf: #!/usr/bin/env python
0ef24b1937 2011-04-06 c.kworr@d4daf:
0ef24b1937 2011-04-06 c.kworr@d4daf: from __future__ import division, print_function, unicode_literals
0ef24b1937 2011-04-06 c.kworr@d4daf:
0ef24b1937 2011-04-06 c.kworr@d4daf: import gevent.monkey
0ef24b1937 2011-04-06 c.kworr@d4daf: gevent.monkey.patch_all()
0ef24b1937 2011-04-06 c.kworr@d4daf:
0ef24b1937 2011-04-06 c.kworr@d4daf: import fcntl, gevent.core, gevent.pool, gevent.queue, gevent.socket, os, psycopg2, re, sys
0ef24b1937 2011-04-06 c.kworr@d4daf:
0ef24b1937 2011-04-06 c.kworr@d4daf: # //inclusion start
0ef24b1937 2011-04-06 c.kworr@d4daf: # Copyright (C) 2010 Daniele Varrazzo <daniele.varrazzo@gmail.com>
0ef24b1937 2011-04-06 c.kworr@d4daf: # and licensed under the MIT license:
0ef24b1937 2011-04-06 c.kworr@d4daf:
0ef24b1937 2011-04-06 c.kworr@d4daf: def gevent_wait_callback(conn, timeout=None):
0ef24b1937 2011-04-06 c.kworr@d4daf: """A wait callback useful to allow gevent to work with Psycopg."""
0ef24b1937 2011-04-06 c.kworr@d4daf: while 1:
0ef24b1937 2011-04-06 c.kworr@d4daf: state = conn.poll()
0ef24b1937 2011-04-06 c.kworr@d4daf: if state == psycopg2.extensions.POLL_OK:
0ef24b1937 2011-04-06 c.kworr@d4daf: break
0ef24b1937 2011-04-06 c.kworr@d4daf: elif state == psycopg2.extensions.POLL_READ:
0ef24b1937 2011-04-06 c.kworr@d4daf: gevent.socket.wait_read(conn.fileno(), timeout=timeout)
0ef24b1937 2011-04-06 c.kworr@d4daf: elif state == psycopg2.extensions.POLL_WRITE:
0ef24b1937 2011-04-06 c.kworr@d4daf: gevent.socket.wait_write(conn.fileno(), timeout=timeout)
0ef24b1937 2011-04-06 c.kworr@d4daf: else:
0ef24b1937 2011-04-06 c.kworr@d4daf: raise psycopg2.OperationalError("Bad result from poll: %r" % state)
0ef24b1937 2011-04-06 c.kworr@d4daf:
0ef24b1937 2011-04-06 c.kworr@d4daf: if not hasattr(psycopg2.extensions, 'set_wait_callback'):
0ef24b1937 2011-04-06 c.kworr@d4daf: raise ImportError("support for coroutines not available in this Psycopg version (%s)" % psycopg2.__version__)
0ef24b1937 2011-04-06 c.kworr@d4daf: psycopg2.extensions.set_wait_callback(gevent_wait_callback)
0ef24b1937 2011-04-06 c.kworr@d4daf:
0ef24b1937 2011-04-06 c.kworr@d4daf: # //inclusion end
fc934cead1 2009-10-13 c.kworr@d4daf:
fc934cead1 2009-10-13 c.kworr@d4daf: # this classes processes config file and substitutes default values
fc934cead1 2009-10-13 c.kworr@d4daf: class Config:
ae30851739 2010-08-12 c.kworr@d4daf: __slots__ = frozenset(['_config', '_default', '_section', 'options'])
b93dc49210 2009-10-13 c.kworr@d4daf: _default = {
fc934cead1 2009-10-13 c.kworr@d4daf: 'log': {
fc934cead1 2009-10-13 c.kworr@d4daf: 'silent': 'no',
fc934cead1 2009-10-13 c.kworr@d4daf: },
fc934cead1 2009-10-13 c.kworr@d4daf: 'database': {
fc934cead1 2009-10-13 c.kworr@d4daf: 'database': 'squidTag',
fc934cead1 2009-10-13 c.kworr@d4daf: },}
fc934cead1 2009-10-13 c.kworr@d4daf:
fc934cead1 2009-10-13 c.kworr@d4daf: # function to read in config file
fc934cead1 2009-10-13 c.kworr@d4daf: def __init__(self):
0ef24b1937 2011-04-06 c.kworr@d4daf: import ConfigParser, optparse, os
ae30851739 2010-08-12 c.kworr@d4daf:
d500448801 2009-10-05 c.kworr@d4daf: parser = optparse.OptionParser()
d500448801 2009-10-05 c.kworr@d4daf: parser.add_option('-c', '--config', dest = 'config',
d500448801 2009-10-05 c.kworr@d4daf: help = 'config file location', metavar = 'FILE',
d500448801 2009-10-05 c.kworr@d4daf: default = '/usr/local/etc/squid-tagger.conf')
ae30851739 2010-08-12 c.kworr@d4daf: parser.add_option('-d', '--dump', dest = 'dump',
ae30851739 2010-08-12 c.kworr@d4daf: help = 'dump database', action = 'store_true', metavar = 'bool',
ae30851739 2010-08-12 c.kworr@d4daf: default = False)
31e69c4237 2010-08-12 c.kworr@d4daf: parser.add_option('-f', '--flush-database', dest = 'flush_db',
31e69c4237 2010-08-12 c.kworr@d4daf: help = 'flush previous database on load', default = False,
31e69c4237 2010-08-12 c.kworr@d4daf: action = 'store_true', metavar = 'bool')
31e69c4237 2010-08-12 c.kworr@d4daf: parser.add_option('-l', '--load', dest = 'load',
31e69c4237 2010-08-12 c.kworr@d4daf: help = 'load database', action = 'store_true', metavar = 'bool',
31e69c4237 2010-08-12 c.kworr@d4daf: default = False)
d301d9adc6 2010-08-13 c.kworr@d4daf: parser.add_option('-D', '--dump-conf', dest = 'dump_conf',
d301d9adc6 2010-08-13 c.kworr@d4daf: help = 'dump filtering rules', default = False, metavar = 'bool',
d301d9adc6 2010-08-13 c.kworr@d4daf: action = 'store_true')
d301d9adc6 2010-08-13 c.kworr@d4daf: parser.add_option('-L', '--load-conf', dest = 'load_conf',
d301d9adc6 2010-08-13 c.kworr@d4daf: help = 'load filtering rules', default = False, metavar = 'bool',
d301d9adc6 2010-08-13 c.kworr@d4daf: action = 'store_true')
7c13294e9f 2010-08-07 c.kworr@d4daf:
ae30851739 2010-08-12 c.kworr@d4daf: (self.options, args) = parser.parse_args()
7c13294e9f 2010-08-07 c.kworr@d4daf:
ae30851739 2010-08-12 c.kworr@d4daf: assert os.access(self.options.config, os.R_OK), "Fatal error: can't read {}".format(self.options.config)
7c13294e9f 2010-08-07 c.kworr@d4daf:
0ef24b1937 2011-04-06 c.kworr@d4daf: self._config = ConfigParser.ConfigParser()
ae30851739 2010-08-12 c.kworr@d4daf: self._config.readfp(open(self.options.config))
fc934cead1 2009-10-13 c.kworr@d4daf:
fc934cead1 2009-10-13 c.kworr@d4daf: # function to select config file section or create one
d500448801 2009-10-05 c.kworr@d4daf: def section(self, section):
fc934cead1 2009-10-13 c.kworr@d4daf: if not self._config.has_section(section):
fc934cead1 2009-10-13 c.kworr@d4daf: self._config.add_section(section)
d500448801 2009-10-05 c.kworr@d4daf: self._section = section
d500448801 2009-10-05 c.kworr@d4daf:
fc934cead1 2009-10-13 c.kworr@d4daf: # function to get config parameter, if parameter doesn't exists the default
fc934cead1 2009-10-13 c.kworr@d4daf: # value or None is substituted
d500448801 2009-10-05 c.kworr@d4daf: def __getitem__(self, name):
fc934cead1 2009-10-13 c.kworr@d4daf: if not self._config.has_option(self._section, name):
b93dc49210 2009-10-13 c.kworr@d4daf: if self._section in self._default:
b93dc49210 2009-10-13 c.kworr@d4daf: if name in self._default[self._section]:
fc934cead1 2009-10-13 c.kworr@d4daf: self._config.set(self._section, name, self._default[self._section][name])
fc934cead1 2009-10-13 c.kworr@d4daf: else:
fc934cead1 2009-10-13 c.kworr@d4daf: self._config.set(self._section, name, None)
fc934cead1 2009-10-13 c.kworr@d4daf: else:
fc934cead1 2009-10-13 c.kworr@d4daf: self._config.set(self._section, name, None)
b93dc49210 2009-10-13 c.kworr@d4daf: return(self._config.get(self._section, name))
d500448801 2009-10-05 c.kworr@d4daf:
fc934cead1 2009-10-13 c.kworr@d4daf: # initializing and reading in config file
d500448801 2009-10-05 c.kworr@d4daf: config = Config()
d500448801 2009-10-05 c.kworr@d4daf:
fad48b740c 2012-07-07 arcade@b1t.na: import logging, logging.handlers
2654b86697 2012-07-09 arcade@b1t.na:
2654b86697 2012-07-09 arcade@b1t.na: # wrapper around logging handler to make it queue records and don't stall when sending them
2654b86697 2012-07-09 arcade@b1t.na:
2654b86697 2012-07-09 arcade@b1t.na: class SysLogHandlerQueue(logging.handlers.SysLogHandler):
67e8b3309d 2012-07-09 arcade@b1t.na: __slots__ = frozenset(['_running', '_tail', '_worker'])
2654b86697 2012-07-09 arcade@b1t.na:
2654b86697 2012-07-09 arcade@b1t.na: def __init__(self):
2654b86697 2012-07-09 arcade@b1t.na: logging.handlers.SysLogHandler.__init__(self, '/dev/log')
2654b86697 2012-07-09 arcade@b1t.na: self._tail = gevent.queue.Queue()
67e8b3309d 2012-07-09 arcade@b1t.na: self._worker = None
2654b86697 2012-07-09 arcade@b1t.na:
2654b86697 2012-07-09 arcade@b1t.na: def emit(self, record):
46225bbe65 2013-01-24 arcade@b1t.na: try:
46225bbe65 2013-01-24 arcade@b1t.na: self._tail.put(record)
46225bbe65 2013-01-24 arcade@b1t.na: except (KeyboardInterrupt, SystemExit):
46225bbe65 2013-01-24 arcade@b1t.na: raise
46225bbe65 2013-01-24 arcade@b1t.na: except:
46225bbe65 2013-01-24 arcade@b1t.na: self.handleError(record)
67e8b3309d 2012-07-09 arcade@b1t.na: if self._worker == None:
2654b86697 2012-07-09 arcade@b1t.na: # in case queue is empty we will spawn new worker
2654b86697 2012-07-09 arcade@b1t.na: # all workers are logged so we can kill them on close()
67e8b3309d 2012-07-09 arcade@b1t.na: self._worker = gevent.spawn(self._writer)
2654b86697 2012-07-09 arcade@b1t.na:
2654b86697 2012-07-09 arcade@b1t.na: def _writer(self):
2654b86697 2012-07-09 arcade@b1t.na: # here we are locking the queue so we can be sure we are the only one
2654b86697 2012-07-09 arcade@b1t.na: while not self._tail.empty():
2654b86697 2012-07-09 arcade@b1t.na: logging.handlers.SysLogHandler.emit(self, self._tail.get())
67e8b3309d 2012-07-09 arcade@b1t.na: self._worker = None
2654b86697 2012-07-09 arcade@b1t.na:
2654b86697 2012-07-09 arcade@b1t.na: def close(self):
67e8b3309d 2012-07-09 arcade@b1t.na: if self._worker != None:
67e8b3309d 2012-07-09 arcade@b1t.na: gevent.kill(self._worker)
2654b86697 2012-07-09 arcade@b1t.na: logging.handlers.SysLogHandler.close(self)
2654b86697 2012-07-09 arcade@b1t.na:
fad48b740c 2012-07-07 arcade@b1t.na: logger = logging.getLogger('squidTag')
fad48b740c 2012-07-07 arcade@b1t.na: logger.setLevel(logging.INFO)
2654b86697 2012-07-09 arcade@b1t.na: handler = SysLogHandlerQueue()
fad48b740c 2012-07-07 arcade@b1t.na: handler.setFormatter(logging.Formatter(str('squidTag[%(process)s]: %(message)s')))
fad48b740c 2012-07-07 arcade@b1t.na: logger.addHandler(handler)
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: # tiny wrapper around a file to make reads from it geventable
39b97ced92 2011-06-05 c.kworr@d4daf: # or should i move this somewhere?
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: class FReadlineQueue(gevent.queue.Queue):
9d7d80e594 2013-11-28 arcade@b1t.na: # storing fileno descriptor, leftover
9d7d80e594 2013-11-28 arcade@b1t.na: __slots__ = frozenset(['_fn', '_tail'])
39b97ced92 2011-06-05 c.kworr@d4daf:
9d7d80e594 2013-11-28 arcade@b1t.na: def __init__(self, fd):
39b97ced92 2011-06-05 c.kworr@d4daf: # initialising class
39b97ced92 2011-06-05 c.kworr@d4daf: gevent.queue.Queue.__init__(self)
9d7d80e594 2013-11-28 arcade@b1t.na: self._fn = fd.fileno()
39b97ced92 2011-06-05 c.kworr@d4daf: # using empty tail
39b97ced92 2011-06-05 c.kworr@d4daf: self._tail = ''
39b97ced92 2011-06-05 c.kworr@d4daf: # putting file to nonblocking mode
9d7d80e594 2013-11-28 arcade@b1t.na: gevent.os.make_nonblocking(fd)
9d7d80e594 2013-11-28 arcade@b1t.na: # starting main loop
9d7d80e594 2013-11-28 arcade@b1t.na: gevent.spawn(self._frobber)
9d7d80e594 2013-11-28 arcade@b1t.na:
9d7d80e594 2013-11-28 arcade@b1t.na: def _frobber(self):
9d7d80e594 2013-11-28 arcade@b1t.na: while True:
9d7d80e594 2013-11-28 arcade@b1t.na: # reading one buffer from stream
9d7d80e594 2013-11-28 arcade@b1t.na: buf = gevent.os.nb_read(self._fn, 4096)
9d7d80e594 2013-11-28 arcade@b1t.na: # EOF found
9d7d80e594 2013-11-28 arcade@b1t.na: if len(buf) == 0:
9d7d80e594 2013-11-28 arcade@b1t.na: break
9d7d80e594 2013-11-28 arcade@b1t.na: # splitting stream by line ends
9d7d80e594 2013-11-28 arcade@b1t.na: rows = buf.decode('l1').split('\n')
9d7d80e594 2013-11-28 arcade@b1t.na: # adding tail to the first element if there is some tail
9d7d80e594 2013-11-28 arcade@b1t.na: if len(self._tail) > 0:
9d7d80e594 2013-11-28 arcade@b1t.na: rows[0] = self._tail + rows[0]
9d7d80e594 2013-11-28 arcade@b1t.na: # popping out last (incomplete) element
9d7d80e594 2013-11-28 arcade@b1t.na: self._tail = rows.pop(-1)
9d7d80e594 2013-11-28 arcade@b1t.na: # dropping all complete elements to the queue
9d7d80e594 2013-11-28 arcade@b1t.na: for row in rows:
9d7d80e594 2013-11-28 arcade@b1t.na: self.put_nowait(row)
d6f041d57d 2018-10-28 arcade: logger.info('< ' + row[0:1024])
9d7d80e594 2013-11-28 arcade@b1t.na: # sending EOF
9d7d80e594 2013-11-28 arcade@b1t.na: self.put_nowait(None)
9d7d80e594 2013-11-28 arcade@b1t.na:
9d7d80e594 2013-11-28 arcade@b1t.na: stdin = FReadlineQueue(sys.stdin)
d823fa83dd 2012-07-07 c.kworr@d4daf:
2654b86697 2012-07-09 arcade@b1t.na: # wrapper against file handler that makes possible to queue some writes without stalling
2654b86697 2012-07-09 arcade@b1t.na:
d823fa83dd 2012-07-07 c.kworr@d4daf: class FWritelineQueue(gevent.queue.JoinableQueue):
9d7d80e594 2013-11-28 arcade@b1t.na: # storing fileno, leftover
9d7d80e594 2013-11-28 arcade@b1t.na: __slots__ = frozenset(['_fn', '_tail'])
d823fa83dd 2012-07-07 c.kworr@d4daf:
9d7d80e594 2013-11-28 arcade@b1t.na: def __init__(self, fd):
d823fa83dd 2012-07-07 c.kworr@d4daf: # initialising class
d823fa83dd 2012-07-07 c.kworr@d4daf: gevent.queue.JoinableQueue.__init__(self)
d823fa83dd 2012-07-07 c.kworr@d4daf: # storing fileno
9d7d80e594 2013-11-28 arcade@b1t.na: self._fn = fd.fileno()
9d7d80e594 2013-11-28 arcade@b1t.na: # putting file to nonblocking mode
9d7d80e594 2013-11-28 arcade@b1t.na: gevent.os.make_nonblocking(fd)
d823fa83dd 2012-07-07 c.kworr@d4daf: # using empty tail
d823fa83dd 2012-07-07 c.kworr@d4daf: self._tail = None
d823fa83dd 2012-07-07 c.kworr@d4daf:
d823fa83dd 2012-07-07 c.kworr@d4daf: def __del__(self):
d823fa83dd 2012-07-07 c.kworr@d4daf: # purge queue before deleting
d823fa83dd 2012-07-07 c.kworr@d4daf: if not self.empty():
d823fa83dd 2012-07-07 c.kworr@d4daf: self.join()
d823fa83dd 2012-07-07 c.kworr@d4daf:
d823fa83dd 2012-07-07 c.kworr@d4daf: def put(self, item, block=True, timeout=None):
d823fa83dd 2012-07-07 c.kworr@d4daf: # calling real put
d823fa83dd 2012-07-07 c.kworr@d4daf: gevent.queue.JoinableQueue.put(self, item, block, timeout)
9d7d80e594 2013-11-28 arcade@b1t.na: # starting main loop
9d7d80e594 2013-11-28 arcade@b1t.na: gevent.spawn(self._frobber)
d823fa83dd 2012-07-07 c.kworr@d4daf:
9d7d80e594 2013-11-28 arcade@b1t.na: def _frobber(self):
d823fa83dd 2012-07-07 c.kworr@d4daf: # checking leftover
d823fa83dd 2012-07-07 c.kworr@d4daf: while True:
d823fa83dd 2012-07-07 c.kworr@d4daf: if self._tail == None:
d823fa83dd 2012-07-07 c.kworr@d4daf: try:
d823fa83dd 2012-07-07 c.kworr@d4daf: self._tail = str(self.get_nowait()).encode('utf-8') + '\n'
d823fa83dd 2012-07-07 c.kworr@d4daf: except gevent.queue.Empty:
d823fa83dd 2012-07-07 c.kworr@d4daf: self._tail = None
d823fa83dd 2012-07-07 c.kworr@d4daf: return
d823fa83dd 2012-07-07 c.kworr@d4daf: # writing tail
9d7d80e594 2013-11-28 arcade@b1t.na: written = gevent.os.nb_write(self._fn, self._tail)
d823fa83dd 2012-07-07 c.kworr@d4daf: length = len(self._tail)
d823fa83dd 2012-07-07 c.kworr@d4daf: if written == length:
d823fa83dd 2012-07-07 c.kworr@d4daf: self._tail = None
d823fa83dd 2012-07-07 c.kworr@d4daf: elif written < length:
d823fa83dd 2012-07-07 c.kworr@d4daf: self._tail = self._tail[written:]
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: # wrapper around database
39b97ced92 2011-06-05 c.kworr@d4daf: class tagDB(object):
39b97ced92 2011-06-05 c.kworr@d4daf: __slots__ = frozenset(['_cursor', '_db'])
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def __init__(self):
39b97ced92 2011-06-05 c.kworr@d4daf: config.section('database')
d2c7ba18a4 2011-09-14 c.kworr@d4daf: if config['host'] == None:
d2c7ba18a4 2011-09-14 c.kworr@d4daf: self._db = psycopg2.connect(
d2c7ba18a4 2011-09-14 c.kworr@d4daf: database = config['database'],
d2c7ba18a4 2011-09-14 c.kworr@d4daf: user = config['user'],
d2c7ba18a4 2011-09-14 c.kworr@d4daf: password = config['password']
d2c7ba18a4 2011-09-14 c.kworr@d4daf: )
d2c7ba18a4 2011-09-14 c.kworr@d4daf: else:
d2c7ba18a4 2011-09-14 c.kworr@d4daf: self._db = psycopg2.connect(
d2c7ba18a4 2011-09-14 c.kworr@d4daf: database = config['database'],
d2c7ba18a4 2011-09-14 c.kworr@d4daf: host = config['host'],
d2c7ba18a4 2011-09-14 c.kworr@d4daf: user = config['user'],
d2c7ba18a4 2011-09-14 c.kworr@d4daf: password = config['password']
d2c7ba18a4 2011-09-14 c.kworr@d4daf: )
39b97ced92 2011-06-05 c.kworr@d4daf: self._cursor = self._db.cursor()
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def _field_names(self):
39b97ced92 2011-06-05 c.kworr@d4daf: names = []
39b97ced92 2011-06-05 c.kworr@d4daf: for record in self._cursor.description:
39b97ced92 2011-06-05 c.kworr@d4daf: names.append(record.name)
39b97ced92 2011-06-05 c.kworr@d4daf: return(names)
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def check(self, site, ip_address):
39b97ced92 2011-06-05 c.kworr@d4daf: self._cursor.execute("select * from (select redirect_url, regexp from site_rule where site <@ tripdomain(%s) and netmask >>= %s order by array_length(site, 1) desc) a group by redirect_url, regexp", [site, ip_address])
39b97ced92 2011-06-05 c.kworr@d4daf: return(self._cursor.fetchall())
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def dump(self):
39b97ced92 2011-06-05 c.kworr@d4daf: self._cursor.execute("select untrip(site) as site, tag::text, regexp from urls order by site, tag")
39b97ced92 2011-06-05 c.kworr@d4daf: return(self._field_names(), self._cursor.fetchall())
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def load(self, data):
39b97ced92 2011-06-05 c.kworr@d4daf: if config.options.flush_db:
39b97ced92 2011-06-05 c.kworr@d4daf: self._cursor.execute('delete from urls;')
39b97ced92 2011-06-05 c.kworr@d4daf: bundle = []
39b97ced92 2011-06-05 c.kworr@d4daf: for row in data:
39b97ced92 2011-06-05 c.kworr@d4daf: if len(row) == 2:
39b97ced92 2011-06-05 c.kworr@d4daf: bundle.append([row[0], row[1], None])
39b97ced92 2011-06-05 c.kworr@d4daf: else:
39b97ced92 2011-06-05 c.kworr@d4daf: bundle.append([row[0], row[1], row[2]])
39b97ced92 2011-06-05 c.kworr@d4daf: self._cursor.executemany("insert into urls (site, tag, regexp) values (tripdomain(%s), %s, %s)", bundle)
39b97ced92 2011-06-05 c.kworr@d4daf: self._cursor.execute("update urls set regexp = NULL where regexp = ''")
39b97ced92 2011-06-05 c.kworr@d4daf: self._db.commit()
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def load_conf(self, csv_data):
39b97ced92 2011-06-05 c.kworr@d4daf: self._cursor.execute('delete from rules;')
39b97ced92 2011-06-05 c.kworr@d4daf: bundle = []
39b97ced92 2011-06-05 c.kworr@d4daf: for row in csv_data:
39b97ced92 2011-06-05 c.kworr@d4daf: bundle.append([row[0], row[1], int(row[2]), int(row[3]), row[4], row[5], row[6]])
39b97ced92 2011-06-05 c.kworr@d4daf: self._cursor.executemany("insert into rules (netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag) values (%s::text::cidr, %s, %s, %s, %s::text::time, %s::text::time, %s::text::text[])", bundle)
39b97ced92 2011-06-05 c.kworr@d4daf: self._db.commit()
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def dump_conf(self):
39b97ced92 2011-06-05 c.kworr@d4daf: self._cursor.execute("select netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag::text from rules")
39b97ced92 2011-06-05 c.kworr@d4daf: return(self._field_names(), self._cursor.fetchall())
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: # abstract class with basic checking functionality
39b97ced92 2011-06-05 c.kworr@d4daf: class Checker(object):
d823fa83dd 2012-07-07 c.kworr@d4daf: __slots__ = frozenset(['_db', '_log', '_queue', '_request', '_stdout'])
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def __init__(self, queue, logger):
39b97ced92 2011-06-05 c.kworr@d4daf: self._db = tagDB()
39b97ced92 2011-06-05 c.kworr@d4daf: self._log = logger
2654b86697 2012-07-09 arcade@b1t.na: self._log.info('started')
d6f041d57d 2018-10-28 arcade: self._request = re.compile('^([0-9]+)\ ((http|ftp):\/\/)?([-\w.]+)(:[0-9]+)?(\/([^ ]*))?\ ([0-9.:]+)\/(-|[\w.-]+)\ (-|\w+)\ (-|GET|HEAD|POST|CONNECT|OPTIONS).*$')
39b97ced92 2011-06-05 c.kworr@d4daf: self._queue = queue
9d7d80e594 2013-11-28 arcade@b1t.na: self._stdout = FWritelineQueue(sys.stdout)
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def process(self, id, site, ip_address, url_path, line = None):
2654b86697 2012-07-09 arcade@b1t.na: #self._log.info('trying {}'.format(site))
39b97ced92 2011-06-05 c.kworr@d4daf: result = self._db.check(site, ip_address)
39b97ced92 2011-06-05 c.kworr@d4daf: reply = None
2654b86697 2012-07-09 arcade@b1t.na: #self._log.info('got {} lines from database'.format(len(result)))
39b97ced92 2011-06-05 c.kworr@d4daf: for row in result:
39b97ced92 2011-06-05 c.kworr@d4daf: if row != None and row[0] != None:
25bd939a42 2013-08-07 arcade@b1t.na: if row[1] != None and url_path != None:
2654b86697 2012-07-09 arcade@b1t.na: self._log.info('trying regexp "{}" versus "{}"'.format(row[1], url_path))
39b97ced92 2011-06-05 c.kworr@d4daf: try:
39b97ced92 2011-06-05 c.kworr@d4daf: if re.compile(row[1]).match(url_path):
a326d03ba1 2013-03-13 arcade@b1t.na: reply = row[0].format(host = site, path = url_path)
39b97ced92 2011-06-05 c.kworr@d4daf: else:
39b97ced92 2011-06-05 c.kworr@d4daf: continue
39b97ced92 2011-06-05 c.kworr@d4daf: except:
25bd939a42 2013-08-07 arcade@b1t.na: self._log.info("can't compile or execute regexp")
39b97ced92 2011-06-05 c.kworr@d4daf: else:
a326d03ba1 2013-03-13 arcade@b1t.na: reply = row[0].format(host = site, path = url_path)
39b97ced92 2011-06-05 c.kworr@d4daf: if reply != None:
d823fa83dd 2012-07-07 c.kworr@d4daf: self.writeline('{} {}'.format(id, reply))
39b97ced92 2011-06-05 c.kworr@d4daf: return(True)
d823fa83dd 2012-07-07 c.kworr@d4daf: self.writeline('{}'.format(id))
39b97ced92 2011-06-05 c.kworr@d4daf:
2654b86697 2012-07-09 arcade@b1t.na: def loop(self):
39b97ced92 2011-06-05 c.kworr@d4daf: while True:
39b97ced92 2011-06-05 c.kworr@d4daf: line = self._queue.get()
39b97ced92 2011-06-05 c.kworr@d4daf: if line == None:
39b97ced92 2011-06-05 c.kworr@d4daf: break
2654b86697 2012-07-09 arcade@b1t.na: #self._log.info('request: ' + line)
39b97ced92 2011-06-05 c.kworr@d4daf: request = self._request.match(line)
39b97ced92 2011-06-05 c.kworr@d4daf: if request:
39b97ced92 2011-06-05 c.kworr@d4daf: id = request.group(1)
25bd939a42 2013-08-07 arcade@b1t.na: #proto = request.group(3)
25bd939a42 2013-08-07 arcade@b1t.na: site = request.group(4)
25bd939a42 2013-08-07 arcade@b1t.na: url_path = request.group(7)
25bd939a42 2013-08-07 arcade@b1t.na: ip_address = request.group(8)
39b97ced92 2011-06-05 c.kworr@d4daf: self.process(id, site, ip_address, url_path, line)
39b97ced92 2011-06-05 c.kworr@d4daf: else:
2654b86697 2012-07-09 arcade@b1t.na: self._log.info('bad request')
d823fa83dd 2012-07-07 c.kworr@d4daf: self.writeline(line)
39b97ced92 2011-06-05 c.kworr@d4daf:
39b97ced92 2011-06-05 c.kworr@d4daf: def writeline(self, string):
2654b86697 2012-07-09 arcade@b1t.na: self._log.info('> ' + string)
d823fa83dd 2012-07-07 c.kworr@d4daf: self._stdout.put(string)
39b97ced92 2011-06-05 c.kworr@d4daf:
d301d9adc6 2010-08-13 c.kworr@d4daf: if config.options.dump or config.options.load or config.options.dump_conf or config.options.load_conf:
d301d9adc6 2010-08-13 c.kworr@d4daf: import csv
d301d9adc6 2010-08-13 c.kworr@d4daf:
d301d9adc6 2010-08-13 c.kworr@d4daf: tagdb = tagDB()
bde51dc0c7 2010-08-26 c.kworr@d4daf: data_fields = ['site', 'tag', 'regexp']
d301d9adc6 2010-08-13 c.kworr@d4daf: conf_fields = ['netmask', 'redirect_url', 'from_weekday', 'to_weekday', 'from_time', 'to_time', 'tag']
d301d9adc6 2010-08-13 c.kworr@d4daf:
d301d9adc6 2010-08-13 c.kworr@d4daf: if config.options.dump or config.options.dump_conf:
0ef24b1937 2011-04-06 c.kworr@d4daf: csv_writer = csv.writer(sys.stdout)
d301d9adc6 2010-08-13 c.kworr@d4daf: if config.options.dump:
bde51dc0c7 2010-08-26 c.kworr@d4daf: dump = tagdb.dump()
bde51dc0c7 2010-08-26 c.kworr@d4daf: elif config.options.dump_conf:
bde51dc0c7 2010-08-26 c.kworr@d4daf: dump = tagdb.dump_conf()
bde51dc0c7 2010-08-26 c.kworr@d4daf:
0ef24b1937 2011-04-06 c.kworr@d4daf: csv_writer.writerow(dump[0])
0ef24b1937 2011-04-06 c.kworr@d4daf: for line in dump[1]:
0ef24b1937 2011-04-06 c.kworr@d4daf: csv_writer.writerow(line)
d301d9adc6 2010-08-13 c.kworr@d4daf:
d301d9adc6 2010-08-13 c.kworr@d4daf: elif config.options.load or config.options.load_conf:
d301d9adc6 2010-08-13 c.kworr@d4daf: csv_reader = csv.reader(sys.stdin)
d301d9adc6 2010-08-13 c.kworr@d4daf: first_row = next(csv_reader)
d301d9adc6 2010-08-13 c.kworr@d4daf:
d301d9adc6 2010-08-13 c.kworr@d4daf: if config.options.load:
bde51dc0c7 2010-08-26 c.kworr@d4daf: fields = data_fields
bde51dc0c7 2010-08-26 c.kworr@d4daf: load = tagdb.load
bde51dc0c7 2010-08-26 c.kworr@d4daf: elif config.options.load_conf:
bde51dc0c7 2010-08-26 c.kworr@d4daf: fields = conf_fields
bde51dc0c7 2010-08-26 c.kworr@d4daf: load = tagdb.load_conf
bde51dc0c7 2010-08-26 c.kworr@d4daf:
bde51dc0c7 2010-08-26 c.kworr@d4daf: assert first_row == fields, 'File must contain csv data with theese columns: ' + repr(fields)
bde51dc0c7 2010-08-26 c.kworr@d4daf: load(csv_reader)
d301d9adc6 2010-08-13 c.kworr@d4daf:
d301d9adc6 2010-08-13 c.kworr@d4daf: else:
d301d9adc6 2010-08-13 c.kworr@d4daf: # main loop
39b97ced92 2011-06-05 c.kworr@d4daf: Checker(stdin, logger).loop()