Squid url redirector

Annotation For squid-tagger.py
anonymous

Annotation For squid-tagger.py

Origin for each line in squid-tagger.py from check-in 2654b86697:

0ef24b1937 2011-04-06    1: #!/usr/bin/env python
0ef24b1937 2011-04-06    2: 
0ef24b1937 2011-04-06    3: from __future__ import division, print_function, unicode_literals
0ef24b1937 2011-04-06    4: 
0ef24b1937 2011-04-06    5: import gevent.monkey
0ef24b1937 2011-04-06    6: gevent.monkey.patch_all()
0ef24b1937 2011-04-06    7: 
0ef24b1937 2011-04-06    8: import fcntl, gevent.core, gevent.pool, gevent.queue, gevent.socket, os, psycopg2, re, sys
0ef24b1937 2011-04-06    9: 
0ef24b1937 2011-04-06   10: # //inclusion start
0ef24b1937 2011-04-06   11: # Copyright (C) 2010 Daniele Varrazzo <daniele.varrazzo@gmail.com>
0ef24b1937 2011-04-06   12: # and licensed under the MIT license:
0ef24b1937 2011-04-06   13: 
0ef24b1937 2011-04-06   14: def gevent_wait_callback(conn, timeout=None):
0ef24b1937 2011-04-06   15: 	"""A wait callback useful to allow gevent to work with Psycopg."""
0ef24b1937 2011-04-06   16: 	while 1:
0ef24b1937 2011-04-06   17: 		state = conn.poll()
0ef24b1937 2011-04-06   18: 		if state == psycopg2.extensions.POLL_OK:
0ef24b1937 2011-04-06   19: 			break
0ef24b1937 2011-04-06   20: 		elif state == psycopg2.extensions.POLL_READ:
0ef24b1937 2011-04-06   21: 			gevent.socket.wait_read(conn.fileno(), timeout=timeout)
0ef24b1937 2011-04-06   22: 		elif state == psycopg2.extensions.POLL_WRITE:
0ef24b1937 2011-04-06   23: 			gevent.socket.wait_write(conn.fileno(), timeout=timeout)
0ef24b1937 2011-04-06   24: 		else:
0ef24b1937 2011-04-06   25: 			raise psycopg2.OperationalError("Bad result from poll: %r" % state)
0ef24b1937 2011-04-06   26: 
0ef24b1937 2011-04-06   27: if not hasattr(psycopg2.extensions, 'set_wait_callback'):
0ef24b1937 2011-04-06   28: 	raise ImportError("support for coroutines not available in this Psycopg version (%s)" % psycopg2.__version__)
0ef24b1937 2011-04-06   29: 	psycopg2.extensions.set_wait_callback(gevent_wait_callback)
0ef24b1937 2011-04-06   30: 
0ef24b1937 2011-04-06   31: # //inclusion end
fc934cead1 2009-10-13   32: 
fc934cead1 2009-10-13   33: # this classes processes config file and substitutes default values
fc934cead1 2009-10-13   34: class Config:
ae30851739 2010-08-12   35: 	__slots__ = frozenset(['_config', '_default', '_section', 'options'])
b93dc49210 2009-10-13   36: 	_default = {
fc934cead1 2009-10-13   37: 		'log': {
fc934cead1 2009-10-13   38: 			'silent': 'no',
fc934cead1 2009-10-13   39: 		},
fc934cead1 2009-10-13   40: 		'database': {
fc934cead1 2009-10-13   41: 			'host': 'localhost',
fc934cead1 2009-10-13   42: 			'database': 'squidTag',
fc934cead1 2009-10-13   43: 	},}
fc934cead1 2009-10-13   44: 
fc934cead1 2009-10-13   45: 	# function to read in config file
fc934cead1 2009-10-13   46: 	def __init__(self):
0ef24b1937 2011-04-06   47: 		import ConfigParser, optparse, os
ae30851739 2010-08-12   48: 
d500448801 2009-10-05   49: 		parser = optparse.OptionParser()
d500448801 2009-10-05   50: 		parser.add_option('-c', '--config', dest = 'config',
d500448801 2009-10-05   51: 			help = 'config file location', metavar = 'FILE',
d500448801 2009-10-05   52: 			default = '/usr/local/etc/squid-tagger.conf')
ae30851739 2010-08-12   53: 		parser.add_option('-d', '--dump', dest = 'dump',
ae30851739 2010-08-12   54: 			help = 'dump database', action = 'store_true', metavar = 'bool',
ae30851739 2010-08-12   55: 			default = False)
31e69c4237 2010-08-12   56: 		parser.add_option('-f', '--flush-database', dest = 'flush_db',
31e69c4237 2010-08-12   57: 			help = 'flush previous database on load', default = False,
31e69c4237 2010-08-12   58: 			action = 'store_true', metavar = 'bool')
31e69c4237 2010-08-12   59: 		parser.add_option('-l', '--load', dest = 'load',
31e69c4237 2010-08-12   60: 			help = 'load database', action = 'store_true', metavar = 'bool',
31e69c4237 2010-08-12   61: 			default = False)
d301d9adc6 2010-08-13   62: 		parser.add_option('-D', '--dump-conf', dest = 'dump_conf',
d301d9adc6 2010-08-13   63: 			help = 'dump filtering rules', default = False, metavar = 'bool',
d301d9adc6 2010-08-13   64: 			action = 'store_true')
d301d9adc6 2010-08-13   65: 		parser.add_option('-L', '--load-conf', dest = 'load_conf',
d301d9adc6 2010-08-13   66: 			help = 'load filtering rules', default = False, metavar = 'bool',
d301d9adc6 2010-08-13   67: 			action = 'store_true')
7c13294e9f 2010-08-07   68: 
ae30851739 2010-08-12   69: 		(self.options, args) = parser.parse_args()
7c13294e9f 2010-08-07   70: 
ae30851739 2010-08-12   71: 		assert os.access(self.options.config, os.R_OK), "Fatal error: can't read {}".format(self.options.config)
7c13294e9f 2010-08-07   72: 
0ef24b1937 2011-04-06   73: 		self._config = ConfigParser.ConfigParser()
ae30851739 2010-08-12   74: 		self._config.readfp(open(self.options.config))
fc934cead1 2009-10-13   75: 
fc934cead1 2009-10-13   76: 	# function to select config file section or create one
d500448801 2009-10-05   77: 	def section(self, section):
fc934cead1 2009-10-13   78: 		if not self._config.has_section(section):
fc934cead1 2009-10-13   79: 			self._config.add_section(section)
d500448801 2009-10-05   80: 		self._section = section
d500448801 2009-10-05   81: 
fc934cead1 2009-10-13   82: 	# function to get config parameter, if parameter doesn't exists the default
fc934cead1 2009-10-13   83: 	# value or None is substituted
d500448801 2009-10-05   84: 	def __getitem__(self, name):
fc934cead1 2009-10-13   85: 		if not self._config.has_option(self._section, name):
b93dc49210 2009-10-13   86: 			if self._section in self._default:
b93dc49210 2009-10-13   87: 				if name in self._default[self._section]:
fc934cead1 2009-10-13   88: 					self._config.set(self._section, name, self._default[self._section][name])
fc934cead1 2009-10-13   89: 				else:
fc934cead1 2009-10-13   90: 					self._config.set(self._section, name, None)
fc934cead1 2009-10-13   91: 			else:
fc934cead1 2009-10-13   92: 				self._config.set(self._section, name, None)
b93dc49210 2009-10-13   93: 		return(self._config.get(self._section, name))
d500448801 2009-10-05   94: 
fc934cead1 2009-10-13   95: # initializing and reading in config file
d500448801 2009-10-05   96: config = Config()
d500448801 2009-10-05   97: 
fad48b740c 2012-07-07   98: import logging, logging.handlers
2654b86697 2012-07-09   99: 
2654b86697 2012-07-09  100: # wrapper around logging handler to make it queue records and don't stall when sending them
2654b86697 2012-07-09  101: 
2654b86697 2012-07-09  102: class SysLogHandlerQueue(logging.handlers.SysLogHandler):
2654b86697 2012-07-09  103: 	__slots__ = frozenset(['_event', '_tail', '_workers'])
2654b86697 2012-07-09  104: 
2654b86697 2012-07-09  105: 	def __init__(self):
2654b86697 2012-07-09  106: 		logging.handlers.SysLogHandler.__init__(self, '/dev/log')
2654b86697 2012-07-09  107: 		self._event = gevent.event.Event()
2654b86697 2012-07-09  108: 		self._event.set()
2654b86697 2012-07-09  109: 		self._tail = gevent.queue.Queue()
2654b86697 2012-07-09  110: 		self._workers = set()
2654b86697 2012-07-09  111: 
2654b86697 2012-07-09  112: 	def emit(self, record):
2654b86697 2012-07-09  113: 		# my syslog is broken and cannot into UTF-8 BOM
2654b86697 2012-07-09  114: 		record.msg = str(record.msg)
2654b86697 2012-07-09  115: 		self._tail.put(record)
2654b86697 2012-07-09  116: 		if self._tail.qsize() != 0:
2654b86697 2012-07-09  117: 			# in case queue is empty we will spawn new worker
2654b86697 2012-07-09  118: 			# all workers are logged so we can kill them on close()
2654b86697 2012-07-09  119: 			self._workers.add(gevent.spawn(self._writer))
2654b86697 2012-07-09  120: 
2654b86697 2012-07-09  121: 	def _writer(self):
2654b86697 2012-07-09  122: 		# here we are locking the queue so we can be sure we are the only one
2654b86697 2012-07-09  123: 		self._event.wait()
2654b86697 2012-07-09  124: 		self._event.clear()
2654b86697 2012-07-09  125: 		while not self._tail.empty():
2654b86697 2012-07-09  126: 			logging.handlers.SysLogHandler.emit(self, self._tail.get())
2654b86697 2012-07-09  127: 		self._event.set()
2654b86697 2012-07-09  128: 		self._workers.remove(gevent.getcurrent())
2654b86697 2012-07-09  129: 
2654b86697 2012-07-09  130: 	def close(self):
2654b86697 2012-07-09  131: 		for worker in self._workers:
2654b86697 2012-07-09  132: 			gevent.kill(worker)
2654b86697 2012-07-09  133: 		logging.handlers.SysLogHandler.close(self)
2654b86697 2012-07-09  134: 
fad48b740c 2012-07-07  135: logger = logging.getLogger('squidTag')
fad48b740c 2012-07-07  136: logger.setLevel(logging.INFO)
2654b86697 2012-07-09  137: handler = SysLogHandlerQueue()
fad48b740c 2012-07-07  138: handler.setFormatter(logging.Formatter(str('squidTag[%(process)s]: %(message)s')))
fad48b740c 2012-07-07  139: logger.addHandler(handler)
39b97ced92 2011-06-05  140: 
39b97ced92 2011-06-05  141: # tiny wrapper around a file to make reads from it geventable
39b97ced92 2011-06-05  142: # or should i move this somewhere?
39b97ced92 2011-06-05  143: 
39b97ced92 2011-06-05  144: class FReadlineQueue(gevent.queue.Queue):
39b97ced92 2011-06-05  145: 	# storing file descriptor, leftover
39b97ced92 2011-06-05  146: 	__slots__ = frozenset(['_fd', '_tail'])
39b97ced92 2011-06-05  147: 
39b97ced92 2011-06-05  148: 	def __init__(self, fd):
39b97ced92 2011-06-05  149: 		# initialising class
39b97ced92 2011-06-05  150: 		gevent.queue.Queue.__init__(self)
39b97ced92 2011-06-05  151: 		# storing file descriptor
39b97ced92 2011-06-05  152: 		self._fd = fd
39b97ced92 2011-06-05  153: 		# using empty tail
39b97ced92 2011-06-05  154: 		self._tail = ''
39b97ced92 2011-06-05  155: 		# setting up event
39b97ced92 2011-06-05  156: 		self._install_wait()
39b97ced92 2011-06-05  157: 
39b97ced92 2011-06-05  158: 	def _install_wait(self):
39b97ced92 2011-06-05  159: 		fileno = self._fd.fileno()
39b97ced92 2011-06-05  160: 		# putting file to nonblocking mode
39b97ced92 2011-06-05  161: 		fcntl.fcntl(fileno, fcntl.F_SETFL, fcntl.fcntl(fileno, fcntl.F_GETFL)  | os.O_NONBLOCK)
39b97ced92 2011-06-05  162: 		# installing event handler
39b97ced92 2011-06-05  163: 		gevent.core.read_event(fileno, self._wait_helper)
39b97ced92 2011-06-05  164: 
39b97ced92 2011-06-05  165: 	def _wait_helper(self, ev, evtype):
39b97ced92 2011-06-05  166: 		# reading one buffer from stream
39b97ced92 2011-06-05  167: 		buf = self._fd.read(4096)
39b97ced92 2011-06-05  168: 		# splitting stream by line ends
39b97ced92 2011-06-05  169: 		rows = buf.decode('l1').split('\n')
39b97ced92 2011-06-05  170: 		# adding tail to the first element if there is some tail
39b97ced92 2011-06-05  171: 		if len(self._tail) > 0:
39b97ced92 2011-06-05  172: 			rows[0] = self._tail + rows[0]
39b97ced92 2011-06-05  173: 		# popping out last (incomplete) element
39b97ced92 2011-06-05  174: 		self._tail = rows.pop(-1)
39b97ced92 2011-06-05  175: 		# dropping all complete elements to the queue
39b97ced92 2011-06-05  176: 		for row in rows:
39b97ced92 2011-06-05  177: 			self.put_nowait(row)
2654b86697 2012-07-09  178: 			logger.info('< ' + row)
39b97ced92 2011-06-05  179: 		if len(buf) > 0:
39b97ced92 2011-06-05  180: 			# no EOF, reinstalling event handler
39b97ced92 2011-06-05  181: 			gevent.core.read_event(self._fd.fileno(), self._wait_helper)
39b97ced92 2011-06-05  182: 		else:
39b97ced92 2011-06-05  183: 			# EOF found, sending EOF to queue
39b97ced92 2011-06-05  184: 			self.put_nowait(None)
39b97ced92 2011-06-05  185: 
39b97ced92 2011-06-05  186: stdin = FReadlineQueue(sys.stdin)
2654b86697 2012-07-09  187: 
2654b86697 2012-07-09  188: # wrapper against file handler that makes possible to queue some writes without stalling
d823fa83dd 2012-07-07  189: 
d823fa83dd 2012-07-07  190: class FWritelineQueue(gevent.queue.JoinableQueue):
d823fa83dd 2012-07-07  191: 	# storing fileno, io interface, leftover
d823fa83dd 2012-07-07  192: 	__slots__ = frozenset(['_fileno', '_io', '_tail'])
d823fa83dd 2012-07-07  193: 
d823fa83dd 2012-07-07  194: 	def __init__(self, fd, closefd = True):
d823fa83dd 2012-07-07  195: 		import io
d823fa83dd 2012-07-07  196: 		# initialising class
d823fa83dd 2012-07-07  197: 		gevent.queue.JoinableQueue.__init__(self)
d823fa83dd 2012-07-07  198: 		# storing fileno
d823fa83dd 2012-07-07  199: 		self._fileno = fd.fileno()
d823fa83dd 2012-07-07  200: 		# creating interface
d823fa83dd 2012-07-07  201: 		self._io = io.FileIO(self._fileno, 'w', closefd)
d823fa83dd 2012-07-07  202: 		# using empty tail
d823fa83dd 2012-07-07  203: 		self._tail = None
d823fa83dd 2012-07-07  204: 		# putting file to nonblocking mode
d823fa83dd 2012-07-07  205: 		fcntl.fcntl(self._fileno, fcntl.F_SETFL, fcntl.fcntl(self._fileno, fcntl.F_GETFL)  | os.O_NONBLOCK)
d823fa83dd 2012-07-07  206: 
d823fa83dd 2012-07-07  207: 	def __del__(self):
d823fa83dd 2012-07-07  208: 		# purge queue before deleting
d823fa83dd 2012-07-07  209: 		if not self.empty():
d823fa83dd 2012-07-07  210: 			self.join()
d823fa83dd 2012-07-07  211: 
d823fa83dd 2012-07-07  212: 	def put(self, item, block=True, timeout=None):
d823fa83dd 2012-07-07  213: 		# calling real put
d823fa83dd 2012-07-07  214: 		gevent.queue.JoinableQueue.put(self, item, block, timeout)
d823fa83dd 2012-07-07  215: 		# installing event handler
d823fa83dd 2012-07-07  216: 		gevent.core.write_event(self._fileno, self._wait_helper)
d823fa83dd 2012-07-07  217: 
d823fa83dd 2012-07-07  218: 	def _wait_helper(self, ev, evtype):
d823fa83dd 2012-07-07  219: 		# XXX ev, evtype checking?
d823fa83dd 2012-07-07  220: 		# checking leftover
d823fa83dd 2012-07-07  221: 		while True:
d823fa83dd 2012-07-07  222: 			if self._tail == None:
d823fa83dd 2012-07-07  223: 				try:
d823fa83dd 2012-07-07  224: 					self._tail = str(self.get_nowait()).encode('utf-8') + '\n'
d823fa83dd 2012-07-07  225: 				except gevent.queue.Empty:
d823fa83dd 2012-07-07  226: 					self._tail = None
d823fa83dd 2012-07-07  227: 					return
d823fa83dd 2012-07-07  228: 			# writing tail
d823fa83dd 2012-07-07  229: 			written = self._io.write(self._tail)
d823fa83dd 2012-07-07  230: 			length = len(self._tail)
d823fa83dd 2012-07-07  231: 			if written == length:
d823fa83dd 2012-07-07  232: 				self._tail = None
d823fa83dd 2012-07-07  233: 			elif written < length:
d823fa83dd 2012-07-07  234: 				self._tail = self._tail[written:]
d823fa83dd 2012-07-07  235: 				break
d823fa83dd 2012-07-07  236: 			else:
d823fa83dd 2012-07-07  237: 				break
d823fa83dd 2012-07-07  238: 		# reinstalling event handler
d823fa83dd 2012-07-07  239: 		gevent.core.write_event(self._fileno, self._wait_helper)
d2c7ba18a4 2011-09-14  240: 
39b97ced92 2011-06-05  241: # wrapper around database
39b97ced92 2011-06-05  242: class tagDB(object):
39b97ced92 2011-06-05  243: 	__slots__ = frozenset(['_cursor', '_db'])
39b97ced92 2011-06-05  244: 
39b97ced92 2011-06-05  245: 	def __init__(self):
39b97ced92 2011-06-05  246: 		config.section('database')
d2c7ba18a4 2011-09-14  247: 		if config['host'] == None:
d2c7ba18a4 2011-09-14  248: 			self._db = psycopg2.connect(
d2c7ba18a4 2011-09-14  249: 				database = config['database'],
d2c7ba18a4 2011-09-14  250: 				user = config['user'],
d2c7ba18a4 2011-09-14  251: 				password = config['password']
d2c7ba18a4 2011-09-14  252: 			)
d2c7ba18a4 2011-09-14  253: 		else:
d2c7ba18a4 2011-09-14  254: 			self._db = psycopg2.connect(
d2c7ba18a4 2011-09-14  255: 				database = config['database'],
d2c7ba18a4 2011-09-14  256: 				host = config['host'],
d2c7ba18a4 2011-09-14  257: 				user = config['user'],
d2c7ba18a4 2011-09-14  258: 				password = config['password']
d2c7ba18a4 2011-09-14  259: 			)
39b97ced92 2011-06-05  260: 		self._cursor = self._db.cursor()
39b97ced92 2011-06-05  261: 
39b97ced92 2011-06-05  262: 	def _field_names(self):
39b97ced92 2011-06-05  263: 		names = []
39b97ced92 2011-06-05  264: 		for record in self._cursor.description:
39b97ced92 2011-06-05  265: 			names.append(record.name)
39b97ced92 2011-06-05  266: 		return(names)
39b97ced92 2011-06-05  267: 
39b97ced92 2011-06-05  268: 	def check(self, site, ip_address):
39b97ced92 2011-06-05  269: 		self._cursor.execute("select * from (select redirect_url, regexp from site_rule where site <@ tripdomain(%s) and netmask >>= %s order by array_length(site, 1) desc) a group by redirect_url, regexp", [site, ip_address])
39b97ced92 2011-06-05  270: 		return(self._cursor.fetchall())
39b97ced92 2011-06-05  271: 
39b97ced92 2011-06-05  272: 	def dump(self):
39b97ced92 2011-06-05  273: 		self._cursor.execute("select untrip(site) as site, tag::text, regexp from urls order by site, tag")
39b97ced92 2011-06-05  274: 		return(self._field_names(), self._cursor.fetchall())
39b97ced92 2011-06-05  275: 
39b97ced92 2011-06-05  276: 	def load(self, data):
39b97ced92 2011-06-05  277: 		if config.options.flush_db:
39b97ced92 2011-06-05  278: 			self._cursor.execute('delete from urls;')
39b97ced92 2011-06-05  279: 		bundle = []
39b97ced92 2011-06-05  280: 		for row in data:
39b97ced92 2011-06-05  281: 			if len(row) == 2:
39b97ced92 2011-06-05  282: 				bundle.append([row[0], row[1], None])
39b97ced92 2011-06-05  283: 			else:
39b97ced92 2011-06-05  284: 				bundle.append([row[0], row[1], row[2]])
39b97ced92 2011-06-05  285: 		self._cursor.executemany("insert into urls (site, tag, regexp) values (tripdomain(%s), %s, %s)", bundle)
39b97ced92 2011-06-05  286: 		self._cursor.execute("update urls set regexp = NULL where regexp = ''")
39b97ced92 2011-06-05  287: 		self._db.commit()
39b97ced92 2011-06-05  288: 
39b97ced92 2011-06-05  289: 	def load_conf(self, csv_data):
39b97ced92 2011-06-05  290: 		self._cursor.execute('delete from rules;')
39b97ced92 2011-06-05  291: 		bundle = []
39b97ced92 2011-06-05  292: 		for row in csv_data:
39b97ced92 2011-06-05  293: 			bundle.append([row[0], row[1], int(row[2]), int(row[3]), row[4], row[5], row[6]])
39b97ced92 2011-06-05  294: 		self._cursor.executemany("insert into rules (netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag) values (%s::text::cidr, %s, %s, %s, %s::text::time, %s::text::time, %s::text::text[])", bundle)
39b97ced92 2011-06-05  295: 		self._db.commit()
39b97ced92 2011-06-05  296: 
39b97ced92 2011-06-05  297: 	def dump_conf(self):
39b97ced92 2011-06-05  298: 		self._cursor.execute("select netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag::text from rules")
39b97ced92 2011-06-05  299: 		return(self._field_names(), self._cursor.fetchall())
39b97ced92 2011-06-05  300: 
39b97ced92 2011-06-05  301: # abstract class with basic checking functionality
39b97ced92 2011-06-05  302: class Checker(object):
d823fa83dd 2012-07-07  303: 	__slots__ = frozenset(['_db', '_log', '_queue', '_request', '_stdout'])
39b97ced92 2011-06-05  304: 
39b97ced92 2011-06-05  305: 	def __init__(self, queue, logger):
39b97ced92 2011-06-05  306: 		self._db = tagDB()
39b97ced92 2011-06-05  307: 		self._log = logger
2654b86697 2012-07-09  308: 		self._log.info('started')
39b97ced92 2011-06-05  309: 		self._request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$')
39b97ced92 2011-06-05  310: 		self._queue = queue
d823fa83dd 2012-07-07  311: 		self._stdout = FWritelineQueue(sys.stdout, False)
39b97ced92 2011-06-05  312: 
39b97ced92 2011-06-05  313: 	def process(self, id, site, ip_address, url_path, line = None):
2654b86697 2012-07-09  314: 		#self._log.info('trying {}'.format(site))
39b97ced92 2011-06-05  315: 		result = self._db.check(site, ip_address)
39b97ced92 2011-06-05  316: 		reply = None
2654b86697 2012-07-09  317: 		#self._log.info('got {} lines from database'.format(len(result)))
39b97ced92 2011-06-05  318: 		for row in result:
39b97ced92 2011-06-05  319: 			if row != None and row[0] != None:
39b97ced92 2011-06-05  320: 				if row[1] != None:
2654b86697 2012-07-09  321: 					self._log.info('trying regexp "{}" versus "{}"'.format(row[1], url_path))
39b97ced92 2011-06-05  322: 					try:
39b97ced92 2011-06-05  323: 						if re.compile(row[1]).match(url_path):
39b97ced92 2011-06-05  324: 							reply = row[0].format(url_path)
39b97ced92 2011-06-05  325: 						else:
39b97ced92 2011-06-05  326: 							continue
39b97ced92 2011-06-05  327: 					except:
2654b86697 2012-07-09  328: 						self._log.info("can't compile regexp")
39b97ced92 2011-06-05  329: 				else:
39b97ced92 2011-06-05  330: 					reply = row[0].format(url_path)
39b97ced92 2011-06-05  331: 			if reply != None:
d823fa83dd 2012-07-07  332: 				self.writeline('{} {}'.format(id, reply))
39b97ced92 2011-06-05  333: 				return(True)
d823fa83dd 2012-07-07  334: 		self.writeline('{}'.format(id))
39b97ced92 2011-06-05  335: 
2654b86697 2012-07-09  336: 	def loop(self):
39b97ced92 2011-06-05  337: 		while True:
39b97ced92 2011-06-05  338: 			line = self._queue.get()
39b97ced92 2011-06-05  339: 			if line == None:
39b97ced92 2011-06-05  340: 				break
2654b86697 2012-07-09  341: 			#self._log.info('request: ' + line)
39b97ced92 2011-06-05  342: 			request = self._request.match(line)
39b97ced92 2011-06-05  343: 			if request:
39b97ced92 2011-06-05  344: 				id = request.group(1)
39b97ced92 2011-06-05  345: 				#proto = request.group(2)
39b97ced92 2011-06-05  346: 				site = request.group(3)
39b97ced92 2011-06-05  347: 				url_path = request.group(4)
39b97ced92 2011-06-05  348: 				ip_address = request.group(5)
39b97ced92 2011-06-05  349: 				self.process(id, site, ip_address, url_path, line)
39b97ced92 2011-06-05  350: 			else:
2654b86697 2012-07-09  351: 				self._log.info('bad request')
d823fa83dd 2012-07-07  352: 				self.writeline(line)
39b97ced92 2011-06-05  353: 
39b97ced92 2011-06-05  354: 	def writeline(self, string):
2654b86697 2012-07-09  355: 		self._log.info('> ' + string)
d823fa83dd 2012-07-07  356: 		self._stdout.put(string)
39b97ced92 2011-06-05  357: 
d301d9adc6 2010-08-13  358: if config.options.dump or config.options.load or config.options.dump_conf or config.options.load_conf:
d301d9adc6 2010-08-13  359: 	import csv
d301d9adc6 2010-08-13  360: 
d301d9adc6 2010-08-13  361: 	tagdb = tagDB()
bde51dc0c7 2010-08-26  362: 	data_fields = ['site', 'tag', 'regexp']
d301d9adc6 2010-08-13  363: 	conf_fields = ['netmask', 'redirect_url', 'from_weekday', 'to_weekday', 'from_time', 'to_time', 'tag']
d301d9adc6 2010-08-13  364: 
d301d9adc6 2010-08-13  365: 	if config.options.dump or config.options.dump_conf:
0ef24b1937 2011-04-06  366: 		csv_writer = csv.writer(sys.stdout)
d301d9adc6 2010-08-13  367: 		if config.options.dump:
bde51dc0c7 2010-08-26  368: 			dump = tagdb.dump()
bde51dc0c7 2010-08-26  369: 		elif config.options.dump_conf:
bde51dc0c7 2010-08-26  370: 			dump = tagdb.dump_conf()
bde51dc0c7 2010-08-26  371: 
0ef24b1937 2011-04-06  372: 		csv_writer.writerow(dump[0])
0ef24b1937 2011-04-06  373: 		for line in dump[1]:
0ef24b1937 2011-04-06  374: 			csv_writer.writerow(line)
d301d9adc6 2010-08-13  375: 
d301d9adc6 2010-08-13  376: 	elif config.options.load or config.options.load_conf:
d301d9adc6 2010-08-13  377: 		csv_reader = csv.reader(sys.stdin)
d301d9adc6 2010-08-13  378: 		first_row = next(csv_reader)
d301d9adc6 2010-08-13  379: 
d301d9adc6 2010-08-13  380: 		if config.options.load:
bde51dc0c7 2010-08-26  381: 			fields = data_fields
bde51dc0c7 2010-08-26  382: 			load = tagdb.load
bde51dc0c7 2010-08-26  383: 		elif config.options.load_conf:
bde51dc0c7 2010-08-26  384: 			fields = conf_fields
bde51dc0c7 2010-08-26  385: 			load = tagdb.load_conf
bde51dc0c7 2010-08-26  386: 
bde51dc0c7 2010-08-26  387: 		assert first_row == fields, 'File must contain csv data with theese columns: ' + repr(fields)
bde51dc0c7 2010-08-26  388: 		load(csv_reader)
d301d9adc6 2010-08-13  389: 
d301d9adc6 2010-08-13  390: else:
d301d9adc6 2010-08-13  391: 	# main loop
39b97ced92 2011-06-05  392: 	Checker(stdin, logger).loop()