Index: squid-tagger.py ================================================================== --- squid-tagger.py +++ squid-tagger.py @@ -47,10 +47,26 @@ def dump(self): if self._dump_stmt == None: self._dump_stmt = self._db.prepare("select untrip(site), tag, regexp from urls natural join site natural join tag order by site, tag") return(self._dump_stmt()) + def load(self, csv_data): + with self._db.xact(): + if config.options.flush_db: + self._db.execute('delete from urls;') + if config.options.flush_site: + self._db.execute('delete from site;'); + insertreg = self._db.prepare("select set($1, $2, $3)") + insert = self._db.prepare("select set($1, $2)") + for row in csv_data: + if len(row[2]) > 0: + insertreg(row[0], row[1], row[2]) + else: + insert(row[0], row[1]) + self._db.execute('vacuum analyze site;') + self._db.execute('vacuum analyze urls;') + # abstract class with basic checking functionality class Checker: __slots__ = frozenset(['_db', '_log']) def __init__(self): @@ -59,11 +75,10 @@ self._log.info('started\n') def process(self, id, site, ip_address, url_path, line = None): self._log.info('trying {}\n'.format(site)) result = self._db.check(site, ip_address) - #reply = '{}://{}/{}'.format(req[4], req[1], req[3]) reply = '-' for row in result: if row != None and row[0] != None: if row[1] != None: self._log.info('trying regexp "{}" versus "{}"\n'.format(row[1], url_path)) @@ -260,10 +275,19 @@ help = 'config file location', metavar = 'FILE', default = '/usr/local/etc/squid-tagger.conf') parser.add_option('-d', '--dump', dest = 'dump', help = 'dump database', action = 'store_true', metavar = 'bool', default = False) + parser.add_option('-f', '--flush-database', dest = 'flush_db', + help = 'flush previous database on load', default = False, + action = 'store_true', metavar = 'bool') + parser.add_option('-F', '--flush-site', dest = 'flush_site', + help = 'when flushing previous dtabase flush site index too', + action = 'store_true', default = False, metavar = 'bool') + parser.add_option('-l', '--load', dest = 'load', + help = 'load database', action = 'store_true', metavar = 'bool', + default = False) (self.options, args) = parser.parse_args() assert os.access(self.options.config, os.R_OK), "Fatal error: can't read {}".format(self.options.config) @@ -300,10 +324,23 @@ csv_writer = csv.writer(sys.stdout) csv_writer.writerow(['site', 'tags', 'regexp']) for row in tagdb.dump(): csv_writer.writerow([row[0], '{' + ','.join(row[1]) + '}', row[2]]) + +elif config.options.load: + # loading database + import csv + + tagdb = tagDB() + + csv_reader = csv.reader(sys.stdin) + first_row = next(csv_reader) + + assert first_row == ['site', 'tags', 'regexp'], 'File must contain csv data with three columns: "site", "tags" and "regexp".' + + tagdb.load(csv_reader) else: # main loop config.section('reactor') if config['reactor'] == 'thread': DELETED st-load.py Index: st-load.py ================================================================== --- st-load.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python3.1 - -import configparser, csv, optparse, os, postgresql.api, re, sys - -# wrapper around syslog, can be muted -class Logger: - __slots__ = frozenset(['_syslog']) - - def __init__(self): - config.section('log') - if config['silent'] == 'yes': - self._syslog = None - else: - import syslog - self._syslog = syslog - self._syslog.openlog('squidTag') - - def info(self, message): - if self._syslog: - self._syslog.syslog(self._syslog.LOG_INFO, message) - - def notice(self, message): - if self._syslog: - self._syslog.syslog(self._syslog.LOG_NOTICE, message) - -# wrapper around database -class tagDB: - __slots__ = frozenset(['_prepared', '_db']) - - def __init__(self): - self._prepared = set() - config.section('database') - self._db = postgresql.open( - 'pq://{}:{}@{}/{}'.format( - config['user'], - config['password'], - config['host'], - config['database'], - ) ) - - def load(self, csv_data): - with self._db.xact(): - config.section('loader') - if config['drop_database']: - self._db.execute('delete from urls;') - if config['drop_site']: - self._db.execute('delete from site;'); - insertreg = self._db.prepare("select set($1, $2, $3)") - insert = self._db.prepare("select set($1, $2)") - for row in csv_data: - if len(row[2]) > 0: - insertreg(row[0], row[1], row[2]) - else: - insert(row[0], row[1]) - self._db.execute('vacuum analyze site;') - self._db.execute('vacuum analyze urls;') - -# this classes processes config file and substitutes default values -class Config: - __slots__ = frozenset(['_config', '_default', '_section']) - _default = { - 'reactor': { - 'reactor': 'thread', - }, - 'log': { - 'silent': 'no', - }, - 'database': { - 'user': 'squidTag', - 'password': 'password', - 'host': 'localhost', - 'database': 'squidTag', - }, - 'loader': { - 'drop_database': False, - 'drop_site': False, - },} - - # function to read in config file - def __init__(self): - parser = optparse.OptionParser() - parser.add_option('-c', '--config', dest = 'config', - help = 'config file location', metavar = 'FILE', - default = '/usr/local/etc/squid-tagger.conf') - parser.add_option('-d', '--drop-database', dest = 'drop_database', - help = 'signals loader to drop previous database', - action = 'store_true') - parser.add_option('-D', '--drop-site', dest = 'drop_site', - help = 'signals loader to drop not only url definitions but site index too', - action = 'store_true') - - (options, args) = parser.parse_args() - - if options.drop_database: - self._default['loader']['drop_database'] = True - - if options.drop_site: - self._default['loader']['drop_site'] = True - - if not os.access(options.config, os.R_OK): - print("Can't read {}: exitting".format(options.config)) - sys.exit(2) - - self._config = configparser.ConfigParser() - self._config.readfp(open(options.config)) - - # function to select config file section or create one - def section(self, section): - if not self._config.has_section(section): - self._config.add_section(section) - self._section = section - - # function to get config parameter, if parameter doesn't exists the default - # value or None is substituted - def __getitem__(self, name): - if not self._section in self._default or not name in self._default[self._section]: - return None - if not type(self._default[self._section][name]) == type(True): - if not self._config.has_option(self._section, name): - self._config.set(self._section, name, self._default[self._section][name]) - return(self._config.get(self._section, name)) - else: - if not self._config.has_option(self._section, name): - self._config.set(self._section, name, repr(self._default[self._section][name])) - return(self._config.getboolean(self._section, name)) - -# initializing and reading in config file -config = Config() - -tagdb = tagDB() - -csv_reader = csv.reader(sys.stdin) -first_row = next(csv_reader) -if not first_row == ['site', 'tags', 'regexp']: - print('File must contain csv data with three columns: "site", "tags" and "regexp".') - sys.exit(1) -tagdb.load(csv_reader)