Squid url redirector

Annotation For squid-tagger.py
anonymous

Annotation For squid-tagger.py

Origin for each line in squid-tagger.py from check-in ed7808827d:

d500448801 2009-10-05    1: #!/usr/bin/env python3.1
d500448801 2009-10-05    2: 
d500448801 2009-10-05    3: import configparser, optparse, os, postgresql.api, re, sys, _thread
d500448801 2009-10-05    4: 
b93dc49210 2009-10-13    5: # wrapper around syslog, can be muted
d500448801 2009-10-05    6: class Logger:
d500448801 2009-10-05    7: 	__slots__ = frozenset(['_syslog'])
d500448801 2009-10-05    8: 
d500448801 2009-10-05    9: 	def __init__(self):
d500448801 2009-10-05   10: 		config.section('log')
d500448801 2009-10-05   11: 		if config['silent'] == 'yes':
d500448801 2009-10-05   12: 			self._syslog = None
d500448801 2009-10-05   13: 		else:
d500448801 2009-10-05   14: 			import syslog
d500448801 2009-10-05   15: 			self._syslog = syslog
d500448801 2009-10-05   16: 			self._syslog.openlog('squidTag')
d500448801 2009-10-05   17: 
d500448801 2009-10-05   18: 	def info(self, message):
4b22e25f24 2009-10-07   19: 		if self._syslog:
d500448801 2009-10-05   20: 			self._syslog.syslog(self._syslog.LOG_INFO, message)
d500448801 2009-10-05   21: 
d500448801 2009-10-05   22: 	def notice(self, message):
4b22e25f24 2009-10-07   23: 		if self._syslog:
d500448801 2009-10-05   24: 			self._syslog.syslog(self._syslog.LOG_NOTICE, message)
d500448801 2009-10-05   25: 
b93dc49210 2009-10-13   26: # wrapper around database
d500448801 2009-10-05   27: class tagDB:
d500448801 2009-10-05   28: 	__slots__ = frozenset(['_prepared', '_check_stmt', '_db'])
d500448801 2009-10-05   29: 
d500448801 2009-10-05   30: 	def __init__(self):
d500448801 2009-10-05   31: 		self._prepared = set()
d500448801 2009-10-05   32: 		self._db = False
88c03b5440 2009-10-09   33: 		self._check_stmt = self._curs().prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")
d500448801 2009-10-05   34: 
d500448801 2009-10-05   35: 	def _curs(self):
d500448801 2009-10-05   36: 		if not self._db:
d500448801 2009-10-05   37: 			config.section('database')
d500448801 2009-10-05   38: 			self._db = postgresql.open(
442d7bf53a 2009-10-12   39: 				'pq://{}:{}@{}/{}'.format(
d500448801 2009-10-05   40: 					config['user'],
d500448801 2009-10-05   41: 					config['password'],
d500448801 2009-10-05   42: 					config['host'],
d500448801 2009-10-05   43: 					config['database'],
d500448801 2009-10-05   44: 			) )
d500448801 2009-10-05   45: 		return(self._db)
d500448801 2009-10-05   46: 
b93dc49210 2009-10-13   47: 	def check(self, site, ip_address):
b93dc49210 2009-10-13   48: 		return(self._check_stmt(site, ip_address))
b93dc49210 2009-10-13   49: 
b93dc49210 2009-10-13   50: # abstract class with basic checking functionality
b93dc49210 2009-10-13   51: class Checker:
ed7808827d 2009-10-14   52: 	__slots__ = frozenset(['_db', '_log'])
b93dc49210 2009-10-13   53: 
b93dc49210 2009-10-13   54: 	def __init__(self):
b93dc49210 2009-10-13   55: 		self._db = tagDB()
b93dc49210 2009-10-13   56: 		self._log = Logger()
b93dc49210 2009-10-13   57: 
ed7808827d 2009-10-14   58: 	def process(self, id, site, ip_address, url_path, line = None):
b93dc49210 2009-10-13   59: 		self._log.info('trying {}\n'.format(site))
b93dc49210 2009-10-13   60: 		result = self._db.check(site, ip_address)
b93dc49210 2009-10-13   61: 		#reply = '{}://{}/{}'.format(req[4], req[1], req[3])
b93dc49210 2009-10-13   62: 		reply = '-'
b93dc49210 2009-10-13   63: 		for row in result:
b93dc49210 2009-10-13   64: 			if row != None and row[0] != None:
b93dc49210 2009-10-13   65: 				if row[1] != None:
b93dc49210 2009-10-13   66: 					self._log.info('trying regexp "{}" versus "{}"\n'.format(row[1], url_path))
b93dc49210 2009-10-13   67: 					if re.compile(row[1]).match(url_path):
b93dc49210 2009-10-13   68: 						reply = '302:' + row[0]
b93dc49210 2009-10-13   69: 						break
b93dc49210 2009-10-13   70: 					else:
b93dc49210 2009-10-13   71: 						continue
b93dc49210 2009-10-13   72: 				else:
b93dc49210 2009-10-13   73: 					reply = '302:' + row[0]
b93dc49210 2009-10-13   74: 					break
b93dc49210 2009-10-13   75: 		self.writeline('{} {}\n'.format(id, reply))
7e3418d94f 2009-10-12   76: 
7e3418d94f 2009-10-12   77: 	def check(self, line):
7e3418d94f 2009-10-12   78: 		request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
7e3418d94f 2009-10-12   79: 		if request:
7e3418d94f 2009-10-12   80: 			id = request.group(1)
7e3418d94f 2009-10-12   81: 			#proto = request.group(2)
7e3418d94f 2009-10-12   82: 			site = request.group(3)
7e3418d94f 2009-10-12   83: 			url_path = request.group(4)
7e3418d94f 2009-10-12   84: 			ip_address = request.group(5)
ed7808827d 2009-10-14   85: 			self.process(id, site, ip_address, url_path, line)
7e3418d94f 2009-10-12   86: 		else:
7e3418d94f 2009-10-12   87: 			self._log.info('bad request\n')
b93dc49210 2009-10-13   88: 			self.writeline(line)
b93dc49210 2009-10-13   89: 
b93dc49210 2009-10-13   90: 	def writeline(self, string):
b93dc49210 2009-10-13   91: 		self._log.info('sending: ' + string)
b93dc49210 2009-10-13   92: 		sys.stdout.write(string)
b93dc49210 2009-10-13   93: 		sys.stdout.flush()
b93dc49210 2009-10-13   94: 
ed7808827d 2009-10-14   95: 	def loop(self):
ed7808827d 2009-10-14   96: 		while True:
ed7808827d 2009-10-14   97: 			line = sys.stdin.readline()
ed7808827d 2009-10-14   98: 			if len(line) == 0:
ed7808827d 2009-10-14   99: 				break
ed7808827d 2009-10-14  100: 			self.check(line)
ed7808827d 2009-10-14  101: 
b93dc49210 2009-10-13  102: # threaded checking facility
b93dc49210 2009-10-13  103: class CheckerThread(Checker):
ed7808827d 2009-10-14  104: 	__slots__ = frozenset(['_lock', '_lock_exit', '_lock_queue', '_queue'])
b93dc49210 2009-10-13  105: 
b93dc49210 2009-10-13  106: 	def __init__(self):
ed7808827d 2009-10-14  107: 		# basic initialisation
b93dc49210 2009-10-13  108: 		Checker.__init__(self)
ed7808827d 2009-10-14  109: 
b93dc49210 2009-10-13  110: 		# Spin lock. Loop acquires it on start then releases it when holding queue
b93dc49210 2009-10-13  111: 		# lock. This way the thread proceeds without stops while queue has data and
b93dc49210 2009-10-13  112: 		# gets stalled when no data present. The lock is released by queue writer
b93dc49210 2009-10-13  113: 		# after storing something into the queue
b93dc49210 2009-10-13  114: 		self._lock = _thread.allocate_lock()
ed7808827d 2009-10-14  115: 		self._lock_exit = _thread.allocate_lock()
b93dc49210 2009-10-13  116: 		self._lock_queue = _thread.allocate_lock()
b93dc49210 2009-10-13  117: 		self._lock.acquire()
b93dc49210 2009-10-13  118: 		self._queue = []
b93dc49210 2009-10-13  119: 		_thread.start_new_thread(self._start, ())
b93dc49210 2009-10-13  120: 
b93dc49210 2009-10-13  121: 	def _start(self):
b93dc49210 2009-10-13  122: 		while True:
b93dc49210 2009-10-13  123: 			self._lock.acquire()
ed7808827d 2009-10-14  124: 			with self._lock_queue:
ed7808827d 2009-10-14  125: 				# yes this should be written this way, and yes, this is why I hate threading
ed7808827d 2009-10-14  126: 				if len(self._queue) > 1:
ed7808827d 2009-10-14  127: 					if self._lock.locked():
ed7808827d 2009-10-14  128: 						self._lock.release()
ed7808827d 2009-10-14  129: 				req = self._queue.pop(0)
ed7808827d 2009-10-14  130: 			Checker.process(self, req[0], req[1], req[2], req[3])
ed7808827d 2009-10-14  131: 			with self._lock_queue:
ed7808827d 2009-10-14  132: 				if len(self._queue) == 0:
ed7808827d 2009-10-14  133: 					if self._lock_exit.locked():
ed7808827d 2009-10-14  134: 						self._lock_exit.release()
ed7808827d 2009-10-14  135: 
ed7808827d 2009-10-14  136: 	def process(self, id, site, ip_address, url_path, line):
ed7808827d 2009-10-14  137: 		with self._lock_queue:
ed7808827d 2009-10-14  138: 			self._queue.append((id, site, ip_address, url_path))
ed7808827d 2009-10-14  139: 			self._log.info('request {} queued ({})\n'.format(id, line))
ed7808827d 2009-10-14  140: 			if not self._lock_exit.locked():
ed7808827d 2009-10-14  141: 				self._lock_exit.acquire()
ed7808827d 2009-10-14  142: 			if self._lock.locked():
ed7808827d 2009-10-14  143: 				self._lock.release()
ed7808827d 2009-10-14  144: 
ed7808827d 2009-10-14  145: 	def loop(self):
ed7808827d 2009-10-14  146: 		while True:
ed7808827d 2009-10-14  147: 			line = sys.stdin.readline()
ed7808827d 2009-10-14  148: 			if len(line) == 0:
ed7808827d 2009-10-14  149: 				break
ed7808827d 2009-10-14  150: 			self.check(line)
ed7808827d 2009-10-14  151: 		self._lock_exit.acquire()
ed7808827d 2009-10-14  152: 
ed7808827d 2009-10-14  153: # kqueue enable class for BSD's XXX broken for now
ed7808827d 2009-10-14  154: class CheckerKqueue(Checker):
ed7808827d 2009-10-14  155: 	__slots__ = frozenset(['_kq', '_select', '_queue'])
ed7808827d 2009-10-14  156: 
ed7808827d 2009-10-14  157: 	def __init__(self):
ed7808827d 2009-10-14  158: 		# basic initialisation
ed7808827d 2009-10-14  159: 		Checker.__init__(self)
ed7808827d 2009-10-14  160: 
ed7808827d 2009-10-14  161: 		# importing select module
ed7808827d 2009-10-14  162: 		import select
ed7808827d 2009-10-14  163: 		self._select = select
ed7808827d 2009-10-14  164: 
ed7808827d 2009-10-14  165: 		# kreating kqueue
ed7808827d 2009-10-14  166: 		self._kq = self._select.kqueue()
ed7808827d 2009-10-14  167: 		assert (self._kq.fileno() != -1)
ed7808827d 2009-10-14  168: 
ed7808827d 2009-10-14  169: 		# watching sys.stdin for data
ed7808827d 2009-10-14  170: 		self._kq.control([self._select.kevent(sys.stdin, self._select.KQ_FILTER_READ, self._select.KQ_EV_ADD)], 0)
ed7808827d 2009-10-14  171: 
ed7808827d 2009-10-14  172: 		# creating data queue
ed7808827d 2009-10-14  173: 		self._queue = []
ed7808827d 2009-10-14  174: 
ed7808827d 2009-10-14  175: 	def loop(self):
ed7808827d 2009-10-14  176: 		# Wait for data by default
ed7808827d 2009-10-14  177: 		timeout = None
ed7808827d 2009-10-14  178: 		while True:
ed7808827d 2009-10-14  179: 			# checking if there is any data
ed7808827d 2009-10-14  180: 			kevs = self._kq.control(None, 1, timeout)
ed7808827d 2009-10-14  181: 			if len(kevs) > 0:
ed7808827d 2009-10-14  182: 				#kev = kevs[0]
ed7808827d 2009-10-14  183: 				# XXX add some code to read only known data size and check for newlines
ed7808827d 2009-10-14  184: 				line = sys.stdin.readline()
ed7808827d 2009-10-14  185: 				# add data to the queue
ed7808827d 2009-10-14  186: 				self.check(line)
ed7808827d 2009-10-14  187: 				# don't wait for data, start processing
ed7808827d 2009-10-14  188: 				timeout = 0
ed7808827d 2009-10-14  189: 			else:
ed7808827d 2009-10-14  190: 				req = self._queue.pop(0)
ed7808827d 2009-10-14  191: 				Checker.process(self, req[0], req[1], req[2], req[3])
ed7808827d 2009-10-14  192: 				if len(self._queue) == 0:
ed7808827d 2009-10-14  193: 					# wait for data - we have nothing to process
ed7808827d 2009-10-14  194: 					timeout = None
ed7808827d 2009-10-14  195: 
ed7808827d 2009-10-14  196: 	def process(self, id, site, ip_address, url_path, line):
ed7808827d 2009-10-14  197: 		self._queue.append((id, site, ip_address, url_path))
ed7808827d 2009-10-14  198: 		self._log.info('request {} queued ({})\n'.format(id, line))
7e3418d94f 2009-10-12  199: 
fc934cead1 2009-10-13  200: # this classes processes config file and substitutes default values
d500448801 2009-10-05  201: class Config:
b93dc49210 2009-10-13  202: 	__slots__ = frozenset(['_config', '_default', '_section'])
b93dc49210 2009-10-13  203: 	_default = {
b93dc49210 2009-10-13  204: 		'reactor': {
b93dc49210 2009-10-13  205: 			'reactor': 'thread',
b93dc49210 2009-10-13  206: 		},
fc934cead1 2009-10-13  207: 		'log': {
fc934cead1 2009-10-13  208: 			'silent': 'no',
fc934cead1 2009-10-13  209: 		},
fc934cead1 2009-10-13  210: 		'database': {
fc934cead1 2009-10-13  211: 			'host': 'localhost',
fc934cead1 2009-10-13  212: 			'database': 'squidTag',
fc934cead1 2009-10-13  213: 	},}
d500448801 2009-10-05  214: 
fc934cead1 2009-10-13  215: 	# function to read in config file
d500448801 2009-10-05  216: 	def __init__(self):
d500448801 2009-10-05  217: 		parser = optparse.OptionParser()
d500448801 2009-10-05  218: 		parser.add_option('-c', '--config', dest = 'config',
d500448801 2009-10-05  219: 			help = 'config file location', metavar = 'FILE',
d500448801 2009-10-05  220: 			default = '/usr/local/etc/squid-tagger.conf')
d500448801 2009-10-05  221: 
d500448801 2009-10-05  222: 		(options, args) = parser.parse_args()
d500448801 2009-10-05  223: 
d500448801 2009-10-05  224: 		if not os.access(options.config, os.R_OK):
d500448801 2009-10-05  225: 			print("Can't read {}: exitting".format(options.config))
d500448801 2009-10-05  226: 			sys.exit(2)
d500448801 2009-10-05  227: 
d500448801 2009-10-05  228: 		self._config = configparser.ConfigParser()
d500448801 2009-10-05  229: 		self._config.readfp(open(options.config))
d500448801 2009-10-05  230: 
fc934cead1 2009-10-13  231: 	# function to select config file section or create one
d500448801 2009-10-05  232: 	def section(self, section):
fc934cead1 2009-10-13  233: 		if not self._config.has_section(section):
fc934cead1 2009-10-13  234: 			self._config.add_section(section)
d500448801 2009-10-05  235: 		self._section = section
d500448801 2009-10-05  236: 
fc934cead1 2009-10-13  237: 	# function to get config parameter, if parameter doesn't exists the default
fc934cead1 2009-10-13  238: 	# value or None is substituted
d500448801 2009-10-05  239: 	def __getitem__(self, name):
fc934cead1 2009-10-13  240: 		if not self._config.has_option(self._section, name):
b93dc49210 2009-10-13  241: 			if self._section in self._default:
b93dc49210 2009-10-13  242: 				if name in self._default[self._section]:
fc934cead1 2009-10-13  243: 					self._config.set(self._section, name, self._default[self._section][name])
fc934cead1 2009-10-13  244: 				else:
fc934cead1 2009-10-13  245: 					self._config.set(self._section, name, None)
fc934cead1 2009-10-13  246: 			else:
fc934cead1 2009-10-13  247: 				self._config.set(self._section, name, None)
b93dc49210 2009-10-13  248: 		return(self._config.get(self._section, name))
d500448801 2009-10-05  249: 
fc934cead1 2009-10-13  250: # initializing and reading in config file
d500448801 2009-10-05  251: config = Config()
d500448801 2009-10-05  252: 
b93dc49210 2009-10-13  253: config.section('reactor')
b93dc49210 2009-10-13  254: if config['reactor'] == 'thread':
b93dc49210 2009-10-13  255: 	checker = CheckerThread()
ed7808827d 2009-10-14  256: elif config['reactor'] == 'plain':
ed7808827d 2009-10-14  257: 	checker = Checker()
ed7808827d 2009-10-14  258: elif config['reactor'] == 'kqueue':
ed7808827d 2009-10-14  259: 	checker = CheckerKqueue()
b93dc49210 2009-10-13  260: 
ed7808827d 2009-10-14  261: checker.loop()