Squid url redirector

Annotation For squid-tagger.py
anonymous

Annotation For squid-tagger.py

Origin for each line in squid-tagger.py from check-in 88c03b5440:

d500448801 2009-10-05    1: #!/usr/bin/env python3.1
d500448801 2009-10-05    2: 
d500448801 2009-10-05    3: import configparser, optparse, os, postgresql.api, re, sys, _thread
d500448801 2009-10-05    4: 
d500448801 2009-10-05    5: class Logger:
d500448801 2009-10-05    6: 	__slots__ = frozenset(['_syslog'])
d500448801 2009-10-05    7: 
d500448801 2009-10-05    8: 	def __init__(self):
d500448801 2009-10-05    9: 		config.section('log')
d500448801 2009-10-05   10: 		if config['silent'] == 'yes':
d500448801 2009-10-05   11: 			self._syslog = None
d500448801 2009-10-05   12: 		else:
d500448801 2009-10-05   13: 			import syslog
d500448801 2009-10-05   14: 			self._syslog = syslog
d500448801 2009-10-05   15: 			self._syslog.openlog('squidTag')
d500448801 2009-10-05   16: 
d500448801 2009-10-05   17: 	def info(self, message):
4b22e25f24 2009-10-07   18: 		if self._syslog:
d500448801 2009-10-05   19: 			self._syslog.syslog(self._syslog.LOG_INFO, message)
d500448801 2009-10-05   20: 
d500448801 2009-10-05   21: 	def notice(self, message):
4b22e25f24 2009-10-07   22: 		if self._syslog:
d500448801 2009-10-05   23: 			self._syslog.syslog(self._syslog.LOG_NOTICE, message)
d500448801 2009-10-05   24: 
d500448801 2009-10-05   25: class tagDB:
d500448801 2009-10-05   26: 	__slots__ = frozenset(['_prepared', '_check_stmt', '_db'])
d500448801 2009-10-05   27: 
d500448801 2009-10-05   28: 	def __init__(self):
d500448801 2009-10-05   29: 		self._prepared = set()
d500448801 2009-10-05   30: 		self._db = False
88c03b5440 2009-10-09   31: 		self._check_stmt = self._curs().prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")
d500448801 2009-10-05   32: 
d500448801 2009-10-05   33: 	def _curs(self):
d500448801 2009-10-05   34: 		if not self._db:
d500448801 2009-10-05   35: 			config.section('database')
d500448801 2009-10-05   36: 			self._db = postgresql.open(
d500448801 2009-10-05   37: 				'pq://{0}:{1}@{2}/{3}'.format(
d500448801 2009-10-05   38: 					config['user'],
d500448801 2009-10-05   39: 					config['password'],
d500448801 2009-10-05   40: 					config['host'],
d500448801 2009-10-05   41: 					config['database'],
d500448801 2009-10-05   42: 			) )
d500448801 2009-10-05   43: 		return(self._db)
d500448801 2009-10-05   44: 
d500448801 2009-10-05   45: 	def check(self, ip_address, site):
88c03b5440 2009-10-09   46: 		return self._check_stmt(site, ip_address)
d500448801 2009-10-05   47: 
d500448801 2009-10-05   48: class CheckerThread:
d500448801 2009-10-05   49: 	__slots__ = frozenset(['_db', '_lock', '_lock_queue', '_log', '_queue'])
d500448801 2009-10-05   50: 
d500448801 2009-10-05   51: 	def __init__(self, db, log):
d500448801 2009-10-05   52: 		self._db = db
d500448801 2009-10-05   53: 		self._log = log
88c03b5440 2009-10-09   54: 		# Spin lock. Loop acquires it on start then releases it when holding queue
88c03b5440 2009-10-09   55: 		# lock. This way the thread proceeds without stops while queue has data and
88c03b5440 2009-10-09   56: 		# gets stalled when no data present. The lock is released by queue writer
88c03b5440 2009-10-09   57: 		# after storing something into the queue
d500448801 2009-10-05   58: 		self._lock = _thread.allocate_lock()
d500448801 2009-10-05   59: 		self._lock_queue = _thread.allocate_lock()
d500448801 2009-10-05   60: 		self._lock.acquire()
d500448801 2009-10-05   61: 		self._queue = []
d500448801 2009-10-05   62: 		_thread.start_new_thread(self._start, ())
d500448801 2009-10-05   63: 
d500448801 2009-10-05   64: 	def _start(self):
d500448801 2009-10-05   65: 		while True:
d500448801 2009-10-05   66: 			self._lock.acquire()
d500448801 2009-10-05   67: 			self._lock_queue.acquire()
88c03b5440 2009-10-09   68: 			# yes this should be written this way, and yes, this is why I hate threading
d500448801 2009-10-05   69: 			if len(self._queue) > 1 and self._lock.locked():
d500448801 2009-10-05   70: 				self._lock.release()
d500448801 2009-10-05   71: 			req = self._queue.pop(0)
d500448801 2009-10-05   72: 			self._lock_queue.release()
d500448801 2009-10-05   73: 			self._log.info('trying %s\n'%req[1])
88c03b5440 2009-10-09   74: 			result = self._db.check(req[2], req[1])
88c03b5440 2009-10-09   75: 			for row in result:
88c03b5440 2009-10-09   76: 				if row != None and row[0] != None:
88c03b5440 2009-10-09   77: 					if row[1] != None:
88c03b5440 2009-10-09   78: 						self._log.info('trying regexp "{0}" versus "{1}"\n'.format(row[1], req[3]))
88c03b5440 2009-10-09   79: 						if re.compile(row[1]).match(req[3]):
88c03b5440 2009-10-09   80: 							writeline('%s 302:%s\n'%(req[0], row[0]))
88c03b5440 2009-10-09   81: 							break
88c03b5440 2009-10-09   82: 						else:
88c03b5440 2009-10-09   83: 							continue
88c03b5440 2009-10-09   84: 					else:
88c03b5440 2009-10-09   85: 						writeline('%s 302:%s\n'%(req[0], row[0]))
88c03b5440 2009-10-09   86: 						break
88c03b5440 2009-10-09   87: 			writeline('%s -\n'%req[0])
d500448801 2009-10-05   88: 
d500448801 2009-10-05   89: 	def check(self, line):
d500448801 2009-10-05   90: 		request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
d500448801 2009-10-05   91: 		if request:
d500448801 2009-10-05   92: 			id = request.group(1)
d500448801 2009-10-05   93: 			site = request.group(3)
d500448801 2009-10-05   94: 			url_path = request.group(4)
d500448801 2009-10-05   95: 			ip_address = request.group(5)
d500448801 2009-10-05   96: 			self._lock_queue.acquire()
d500448801 2009-10-05   97: 			self._queue.append((id, site, ip_address, url_path))
d500448801 2009-10-05   98: 			if self._lock.locked():
d500448801 2009-10-05   99: 				self._lock.release()
d500448801 2009-10-05  100: 			self._lock_queue.release()
d500448801 2009-10-05  101: 			self._log.info('request %s queued (%s)\n'%(id, line))
d500448801 2009-10-05  102: 		else:
d500448801 2009-10-05  103: 			self._log.info('bad request\n')
d500448801 2009-10-05  104: 			writeline(line)
d500448801 2009-10-05  105: 
d500448801 2009-10-05  106: def writeline(string):
d500448801 2009-10-05  107: 	log.info('sending: %s'%string)
d500448801 2009-10-05  108: 	sys.stdout.write(string)
d500448801 2009-10-05  109: 	sys.stdout.flush()
d500448801 2009-10-05  110: 
d500448801 2009-10-05  111: class Config:
d500448801 2009-10-05  112: 	__slots__ = frozenset(['_config', '_section'])
d500448801 2009-10-05  113: 
d500448801 2009-10-05  114: 	def __init__(self):
d500448801 2009-10-05  115: 		parser = optparse.OptionParser()
d500448801 2009-10-05  116: 		parser.add_option('-c', '--config', dest = 'config',
d500448801 2009-10-05  117: 			help = 'config file location', metavar = 'FILE',
d500448801 2009-10-05  118: 			default = '/usr/local/etc/squid-tagger.conf')
d500448801 2009-10-05  119: 
d500448801 2009-10-05  120: 		(options, args) = parser.parse_args()
d500448801 2009-10-05  121: 
d500448801 2009-10-05  122: 		if not os.access(options.config, os.R_OK):
d500448801 2009-10-05  123: 			print("Can't read {}: exitting".format(options.config))
d500448801 2009-10-05  124: 			sys.exit(2)
d500448801 2009-10-05  125: 
d500448801 2009-10-05  126: 		self._config = configparser.ConfigParser()
d500448801 2009-10-05  127: 		self._config.readfp(open(options.config))
d500448801 2009-10-05  128: 
d500448801 2009-10-05  129: 	def section(self, section):
d500448801 2009-10-05  130: 		self._section = section
d500448801 2009-10-05  131: 
d500448801 2009-10-05  132: 	def __getitem__(self, name):
d500448801 2009-10-05  133: 		return self._config.get(self._section, name)
d500448801 2009-10-05  134: 
d500448801 2009-10-05  135: config = Config()
d500448801 2009-10-05  136: 
d500448801 2009-10-05  137: log = Logger()
d500448801 2009-10-05  138: db = tagDB()
d500448801 2009-10-05  139: checker = CheckerThread(db,log)
d500448801 2009-10-05  140: 
d500448801 2009-10-05  141: while True:
d500448801 2009-10-05  142: 	line = sys.stdin.readline()
d500448801 2009-10-05  143: 	if len(line) == 0:
d500448801 2009-10-05  144: 		break
d500448801 2009-10-05  145: 	checker.check(line)