f1bafd194a 2010-03-18 1: #!/usr/bin/env python3.1
f1bafd194a 2010-03-18 2:
f1bafd194a 2010-03-18 3: import configparser, csv, optparse, os, postgresql.api, re, sys
f1bafd194a 2010-03-18 4:
f1bafd194a 2010-03-18 5: # wrapper around syslog, can be muted
f1bafd194a 2010-03-18 6: class Logger:
f1bafd194a 2010-03-18 7: __slots__ = frozenset(['_syslog'])
f1bafd194a 2010-03-18 8:
f1bafd194a 2010-03-18 9: def __init__(self):
f1bafd194a 2010-03-18 10: config.section('log')
f1bafd194a 2010-03-18 11: if config['silent'] == 'yes':
f1bafd194a 2010-03-18 12: self._syslog = None
f1bafd194a 2010-03-18 13: else:
f1bafd194a 2010-03-18 14: import syslog
f1bafd194a 2010-03-18 15: self._syslog = syslog
f1bafd194a 2010-03-18 16: self._syslog.openlog('squidTag')
f1bafd194a 2010-03-18 17:
f1bafd194a 2010-03-18 18: def info(self, message):
f1bafd194a 2010-03-18 19: if self._syslog:
f1bafd194a 2010-03-18 20: self._syslog.syslog(self._syslog.LOG_INFO, message)
f1bafd194a 2010-03-18 21:
f1bafd194a 2010-03-18 22: def notice(self, message):
f1bafd194a 2010-03-18 23: if self._syslog:
f1bafd194a 2010-03-18 24: self._syslog.syslog(self._syslog.LOG_NOTICE, message)
f1bafd194a 2010-03-18 25:
f1bafd194a 2010-03-18 26: # wrapper around database
f1bafd194a 2010-03-18 27: class tagDB:
f1bafd194a 2010-03-18 28: __slots__ = frozenset(['_prepared', '_db'])
f1bafd194a 2010-03-18 29:
f1bafd194a 2010-03-18 30: def __init__(self):
f1bafd194a 2010-03-18 31: self._prepared = set()
f1bafd194a 2010-03-18 32: config.section('database')
f1bafd194a 2010-03-18 33: self._db = postgresql.open(
f1bafd194a 2010-03-18 34: 'pq://{}:{}@{}/{}'.format(
f1bafd194a 2010-03-18 35: config['user'],
f1bafd194a 2010-03-18 36: config['password'],
f1bafd194a 2010-03-18 37: config['host'],
f1bafd194a 2010-03-18 38: config['database'],
f1bafd194a 2010-03-18 39: ) )
f1bafd194a 2010-03-18 40:
f1bafd194a 2010-03-18 41: def load(self, csv_data):
f1bafd194a 2010-03-18 42: insert = self._db.prepare("select mark($1, array[$2], $3)")
f1bafd194a 2010-03-18 43: with self._db.xact():
f1bafd194a 2010-03-18 44: config.section('loader')
f1bafd194a 2010-03-18 45: if config['drop_database']:
f1bafd194a 2010-03-18 46: self._db.execute('delete from urls; delete from site;');
f1bafd194a 2010-03-18 47: #print('dropped', config['drop_database'])
f1bafd194a 2010-03-18 48: for row in csv_data:
f1bafd194a 2010-03-18 49: insert(row[0], row[1], row[2])
f1bafd194a 2010-03-18 50: self._db.execute('vacuum analyze site;')
f1bafd194a 2010-03-18 51: self._db.execute('vacuum analyze urls;')
f1bafd194a 2010-03-18 52:
f1bafd194a 2010-03-18 53: # this classes processes config file and substitutes default values
f1bafd194a 2010-03-18 54: class Config:
f1bafd194a 2010-03-18 55: __slots__ = frozenset(['_config', '_default', '_section'])
f1bafd194a 2010-03-18 56: _default = {
f1bafd194a 2010-03-18 57: 'reactor': {
f1bafd194a 2010-03-18 58: 'reactor': 'thread',
f1bafd194a 2010-03-18 59: },
f1bafd194a 2010-03-18 60: 'log': {
f1bafd194a 2010-03-18 61: 'silent': 'no',
f1bafd194a 2010-03-18 62: },
f1bafd194a 2010-03-18 63: 'database': {
f1bafd194a 2010-03-18 64: 'user': 'squidTag',
f1bafd194a 2010-03-18 65: 'password': 'password',
f1bafd194a 2010-03-18 66: 'host': 'localhost',
f1bafd194a 2010-03-18 67: 'database': 'squidTag',
f1bafd194a 2010-03-18 68: },
f1bafd194a 2010-03-18 69: 'loader': {
f1bafd194a 2010-03-18 70: 'drop_database': False,
f1bafd194a 2010-03-18 71: },}
f1bafd194a 2010-03-18 72:
f1bafd194a 2010-03-18 73: # function to read in config file
f1bafd194a 2010-03-18 74: def __init__(self):
f1bafd194a 2010-03-18 75: parser = optparse.OptionParser()
f1bafd194a 2010-03-18 76: parser.add_option('-c', '--config', dest = 'config',
f1bafd194a 2010-03-18 77: help = 'config file location', metavar = 'FILE',
f1bafd194a 2010-03-18 78: default = '/usr/local/etc/squid-tagger.conf')
f1bafd194a 2010-03-18 79: parser.add_option('-d', '--drop-database', dest = 'drop_database',
f1bafd194a 2010-03-18 80: help = 'signals loader to drop previous database',
f1bafd194a 2010-03-18 81: action = 'store_true')
f1bafd194a 2010-03-18 82:
f1bafd194a 2010-03-18 83: (options, args) = parser.parse_args()
f1bafd194a 2010-03-18 84:
f1bafd194a 2010-03-18 85: if options.drop_database:
f1bafd194a 2010-03-18 86: self._default['loader']['drop_database'] = True
f1bafd194a 2010-03-18 87:
f1bafd194a 2010-03-18 88: if not os.access(options.config, os.R_OK):
f1bafd194a 2010-03-18 89: print("Can't read {}: exitting".format(options.config))
f1bafd194a 2010-03-18 90: sys.exit(2)
f1bafd194a 2010-03-18 91:
f1bafd194a 2010-03-18 92: self._config = configparser.ConfigParser()
f1bafd194a 2010-03-18 93: self._config.readfp(open(options.config))
f1bafd194a 2010-03-18 94:
f1bafd194a 2010-03-18 95: # function to select config file section or create one
f1bafd194a 2010-03-18 96: def section(self, section):
f1bafd194a 2010-03-18 97: if not self._config.has_section(section):
f1bafd194a 2010-03-18 98: self._config.add_section(section)
f1bafd194a 2010-03-18 99: self._section = section
f1bafd194a 2010-03-18 100:
f1bafd194a 2010-03-18 101: # function to get config parameter, if parameter doesn't exists the default
f1bafd194a 2010-03-18 102: # value or None is substituted
f1bafd194a 2010-03-18 103: def __getitem__(self, name):
f1bafd194a 2010-03-18 104: if not self._section in self._default or not name in self._default[self._section]:
f1bafd194a 2010-03-18 105: return None
f1bafd194a 2010-03-18 106: if not type(self._default[self._section][name]) == type(True):
f1bafd194a 2010-03-18 107: if not self._config.has_option(self._section, name):
f1bafd194a 2010-03-18 108: self._config.set(self._section, name, self._default[self._section][name])
f1bafd194a 2010-03-18 109: return(self._config.get(self._section, name))
f1bafd194a 2010-03-18 110: else:
f1bafd194a 2010-03-18 111: if not self._config.has_option(self._section, name):
f1bafd194a 2010-03-18 112: self._config.set(self._section, name, repr(self._default[self._section][name]))
f1bafd194a 2010-03-18 113: return(self._config.getboolean(self._section, name))
f1bafd194a 2010-03-18 114:
f1bafd194a 2010-03-18 115: # initializing and reading in config file
f1bafd194a 2010-03-18 116: config = Config()
f1bafd194a 2010-03-18 117:
f1bafd194a 2010-03-18 118: tagdb = tagDB()
f1bafd194a 2010-03-18 119:
f1bafd194a 2010-03-18 120: csv_reader = csv.reader(sys.stdin)
f1bafd194a 2010-03-18 121: first_row = next(csv_reader)
f1bafd194a 2010-03-18 122: if not first_row == ['site', 'tags', 'regexp']:
f1bafd194a 2010-03-18 123: print('File must contain csv data with three columns: "site", "tags" and "regexp".')
f1bafd194a 2010-03-18 124: sys.exit(1)
f1bafd194a 2010-03-18 125: tagdb.load(csv_reader)