d500448801 2009-10-05 1: #!/usr/bin/env python3.1
d500448801 2009-10-05 2:
d500448801 2009-10-05 3: import configparser, optparse, os, postgresql.api, re, sys, _thread
d500448801 2009-10-05 4:
b93dc49210 2009-10-13 5: # wrapper around syslog, can be muted
d500448801 2009-10-05 6: class Logger:
d500448801 2009-10-05 7: __slots__ = frozenset(['_syslog'])
d500448801 2009-10-05 8:
d500448801 2009-10-05 9: def __init__(self):
d500448801 2009-10-05 10: config.section('log')
d500448801 2009-10-05 11: if config['silent'] == 'yes':
d500448801 2009-10-05 12: self._syslog = None
d500448801 2009-10-05 13: else:
d500448801 2009-10-05 14: import syslog
d500448801 2009-10-05 15: self._syslog = syslog
d500448801 2009-10-05 16: self._syslog.openlog('squidTag')
d500448801 2009-10-05 17:
d500448801 2009-10-05 18: def info(self, message):
4b22e25f24 2009-10-07 19: if self._syslog:
d500448801 2009-10-05 20: self._syslog.syslog(self._syslog.LOG_INFO, message)
d500448801 2009-10-05 21:
d500448801 2009-10-05 22: def notice(self, message):
4b22e25f24 2009-10-07 23: if self._syslog:
d500448801 2009-10-05 24: self._syslog.syslog(self._syslog.LOG_NOTICE, message)
d500448801 2009-10-05 25:
b93dc49210 2009-10-13 26: # wrapper around database
d500448801 2009-10-05 27: class tagDB:
d500448801 2009-10-05 28: __slots__ = frozenset(['_prepared', '_check_stmt', '_db'])
d500448801 2009-10-05 29:
d500448801 2009-10-05 30: def __init__(self):
d500448801 2009-10-05 31: self._prepared = set()
d500448801 2009-10-05 32: self._db = False
88c03b5440 2009-10-09 33: self._check_stmt = self._curs().prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")
d500448801 2009-10-05 34:
d500448801 2009-10-05 35: def _curs(self):
d500448801 2009-10-05 36: if not self._db:
d500448801 2009-10-05 37: config.section('database')
d500448801 2009-10-05 38: self._db = postgresql.open(
442d7bf53a 2009-10-12 39: 'pq://{}:{}@{}/{}'.format(
d500448801 2009-10-05 40: config['user'],
d500448801 2009-10-05 41: config['password'],
d500448801 2009-10-05 42: config['host'],
d500448801 2009-10-05 43: config['database'],
d500448801 2009-10-05 44: ) )
d500448801 2009-10-05 45: return(self._db)
d500448801 2009-10-05 46:
b93dc49210 2009-10-13 47: def check(self, site, ip_address):
b93dc49210 2009-10-13 48: return(self._check_stmt(site, ip_address))
b93dc49210 2009-10-13 49:
b93dc49210 2009-10-13 50: # abstract class with basic checking functionality
b93dc49210 2009-10-13 51: class Checker:
b93dc49210 2009-10-13 52: __slots__ = frozenset(['_db', '_log', '_queue'])
b93dc49210 2009-10-13 53:
b93dc49210 2009-10-13 54: def __init__(self):
b93dc49210 2009-10-13 55: self._db = tagDB()
b93dc49210 2009-10-13 56: self._log = Logger()
b93dc49210 2009-10-13 57:
b93dc49210 2009-10-13 58: def process(self, id, site, ip_address, url_path):
b93dc49210 2009-10-13 59: self._log.info('trying {}\n'.format(site))
b93dc49210 2009-10-13 60: result = self._db.check(site, ip_address)
b93dc49210 2009-10-13 61: #reply = '{}://{}/{}'.format(req[4], req[1], req[3])
b93dc49210 2009-10-13 62: reply = '-'
b93dc49210 2009-10-13 63: for row in result:
b93dc49210 2009-10-13 64: if row != None and row[0] != None:
b93dc49210 2009-10-13 65: if row[1] != None:
b93dc49210 2009-10-13 66: self._log.info('trying regexp "{}" versus "{}"\n'.format(row[1], url_path))
b93dc49210 2009-10-13 67: if re.compile(row[1]).match(url_path):
b93dc49210 2009-10-13 68: reply = '302:' + row[0]
b93dc49210 2009-10-13 69: break
b93dc49210 2009-10-13 70: else:
b93dc49210 2009-10-13 71: continue
b93dc49210 2009-10-13 72: else:
b93dc49210 2009-10-13 73: reply = '302:' + row[0]
b93dc49210 2009-10-13 74: break
b93dc49210 2009-10-13 75: self.writeline('{} {}\n'.format(id, reply))
b93dc49210 2009-10-13 76:
b93dc49210 2009-10-13 77: def check(self, line):
b93dc49210 2009-10-13 78: request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
b93dc49210 2009-10-13 79: if request:
b93dc49210 2009-10-13 80: id = request.group(1)
b93dc49210 2009-10-13 81: #proto = request.group(2)
b93dc49210 2009-10-13 82: site = request.group(3)
b93dc49210 2009-10-13 83: url_path = request.group(4)
b93dc49210 2009-10-13 84: ip_address = request.group(5)
b93dc49210 2009-10-13 85: self.insert(id, site, ip_address, url_path)
b93dc49210 2009-10-13 86:
b93dc49210 2009-10-13 87: self._log.info('request {} queued ({})\n'.format(id, line))
b93dc49210 2009-10-13 88: else:
b93dc49210 2009-10-13 89: self._log.info('bad request\n')
b93dc49210 2009-10-13 90: self.writeline(line)
b93dc49210 2009-10-13 91:
b93dc49210 2009-10-13 92: def insert(self, id, site, ip_address, url_path):
b93dc49210 2009-10-13 93: self._queue.append((id, site, ip_address, url_path))
b93dc49210 2009-10-13 94:
b93dc49210 2009-10-13 95: def writeline(self, string):
b93dc49210 2009-10-13 96: self._log.info('sending: ' + string)
b93dc49210 2009-10-13 97: sys.stdout.write(string)
b93dc49210 2009-10-13 98: sys.stdout.flush()
b93dc49210 2009-10-13 99:
b93dc49210 2009-10-13 100: # threaded checking facility
b93dc49210 2009-10-13 101: class CheckerThread(Checker):
b93dc49210 2009-10-13 102: __slots__ = frozenset(['_lock', '_lock_queue'])
b93dc49210 2009-10-13 103:
b93dc49210 2009-10-13 104: def __init__(self):
b93dc49210 2009-10-13 105: Checker.__init__(self)
88c03b5440 2009-10-09 106: # Spin lock. Loop acquires it on start then releases it when holding queue
88c03b5440 2009-10-09 107: # lock. This way the thread proceeds without stops while queue has data and
88c03b5440 2009-10-09 108: # gets stalled when no data present. The lock is released by queue writer
88c03b5440 2009-10-09 109: # after storing something into the queue
d500448801 2009-10-05 110: self._lock = _thread.allocate_lock()
d500448801 2009-10-05 111: self._lock_queue = _thread.allocate_lock()
d500448801 2009-10-05 112: self._lock.acquire()
d500448801 2009-10-05 113: self._queue = []
d500448801 2009-10-05 114: _thread.start_new_thread(self._start, ())
d500448801 2009-10-05 115:
d500448801 2009-10-05 116: def _start(self):
d500448801 2009-10-05 117: while True:
d500448801 2009-10-05 118: self._lock.acquire()
d500448801 2009-10-05 119: self._lock_queue.acquire()
88c03b5440 2009-10-09 120: # yes this should be written this way, and yes, this is why I hate threading
d500448801 2009-10-05 121: if len(self._queue) > 1 and self._lock.locked():
d500448801 2009-10-05 122: self._lock.release()
d500448801 2009-10-05 123: req = self._queue.pop(0)
d500448801 2009-10-05 124: self._lock_queue.release()
b93dc49210 2009-10-13 125: self.process(req[0], req[1], req[2], req[3])
b93dc49210 2009-10-13 126:
b93dc49210 2009-10-13 127: def insert(self, id, site, ip_address, url_path):
b93dc49210 2009-10-13 128: self._lock_queue.acquire()
b93dc49210 2009-10-13 129: Checker.insert(self, id, site, ip_address, url_path)
b93dc49210 2009-10-13 130: if self._lock.locked():
b93dc49210 2009-10-13 131: self._lock.release()
b93dc49210 2009-10-13 132: self._lock_queue.release()
fc934cead1 2009-10-13 133:
fc934cead1 2009-10-13 134: # this classes processes config file and substitutes default values
d500448801 2009-10-05 135: class Config:
b93dc49210 2009-10-13 136: __slots__ = frozenset(['_config', '_default', '_section'])
b93dc49210 2009-10-13 137: _default = {
b93dc49210 2009-10-13 138: 'reactor': {
b93dc49210 2009-10-13 139: 'reactor': 'thread',
b93dc49210 2009-10-13 140: },
fc934cead1 2009-10-13 141: 'log': {
fc934cead1 2009-10-13 142: 'silent': 'no',
fc934cead1 2009-10-13 143: },
fc934cead1 2009-10-13 144: 'database': {
fc934cead1 2009-10-13 145: 'host': 'localhost',
fc934cead1 2009-10-13 146: 'database': 'squidTag',
fc934cead1 2009-10-13 147: },}
d500448801 2009-10-05 148:
fc934cead1 2009-10-13 149: # function to read in config file
d500448801 2009-10-05 150: def __init__(self):
d500448801 2009-10-05 151: parser = optparse.OptionParser()
d500448801 2009-10-05 152: parser.add_option('-c', '--config', dest = 'config',
d500448801 2009-10-05 153: help = 'config file location', metavar = 'FILE',
d500448801 2009-10-05 154: default = '/usr/local/etc/squid-tagger.conf')
d500448801 2009-10-05 155:
d500448801 2009-10-05 156: (options, args) = parser.parse_args()
d500448801 2009-10-05 157:
d500448801 2009-10-05 158: if not os.access(options.config, os.R_OK):
d500448801 2009-10-05 159: print("Can't read {}: exitting".format(options.config))
d500448801 2009-10-05 160: sys.exit(2)
d500448801 2009-10-05 161:
d500448801 2009-10-05 162: self._config = configparser.ConfigParser()
d500448801 2009-10-05 163: self._config.readfp(open(options.config))
d500448801 2009-10-05 164:
fc934cead1 2009-10-13 165: # function to select config file section or create one
d500448801 2009-10-05 166: def section(self, section):
fc934cead1 2009-10-13 167: if not self._config.has_section(section):
fc934cead1 2009-10-13 168: self._config.add_section(section)
d500448801 2009-10-05 169: self._section = section
d500448801 2009-10-05 170:
fc934cead1 2009-10-13 171: # function to get config parameter, if parameter doesn't exists the default
fc934cead1 2009-10-13 172: # value or None is substituted
d500448801 2009-10-05 173: def __getitem__(self, name):
fc934cead1 2009-10-13 174: if not self._config.has_option(self._section, name):
b93dc49210 2009-10-13 175: if self._section in self._default:
b93dc49210 2009-10-13 176: if name in self._default[self._section]:
fc934cead1 2009-10-13 177: self._config.set(self._section, name, self._default[self._section][name])
fc934cead1 2009-10-13 178: else:
fc934cead1 2009-10-13 179: self._config.set(self._section, name, None)
fc934cead1 2009-10-13 180: else:
fc934cead1 2009-10-13 181: self._config.set(self._section, name, None)
b93dc49210 2009-10-13 182: return(self._config.get(self._section, name))
d500448801 2009-10-05 183:
fc934cead1 2009-10-13 184: # initializing and reading in config file
d500448801 2009-10-05 185: config = Config()
d500448801 2009-10-05 186:
b93dc49210 2009-10-13 187: config.section('reactor')
b93dc49210 2009-10-13 188: if config['reactor'] == 'thread':
b93dc49210 2009-10-13 189: checker = CheckerThread()
d500448801 2009-10-05 190:
d500448801 2009-10-05 191: while True:
d500448801 2009-10-05 192: line = sys.stdin.readline()
d500448801 2009-10-05 193: if len(line) == 0:
d500448801 2009-10-05 194: break
d500448801 2009-10-05 195: checker.check(line)