Squid url redirector

Annotation For st-load.py
anonymous

Annotation For st-load.py

Origin for each line in st-load.py from check-in b16bc5d76f:

f1bafd194a 2010-03-18    1: #!/usr/bin/env python3.1
f1bafd194a 2010-03-18    2: 
f1bafd194a 2010-03-18    3: import configparser, csv, optparse, os, postgresql.api, re, sys
f1bafd194a 2010-03-18    4: 
f1bafd194a 2010-03-18    5: # wrapper around syslog, can be muted
f1bafd194a 2010-03-18    6: class Logger:
f1bafd194a 2010-03-18    7: 	__slots__ = frozenset(['_syslog'])
f1bafd194a 2010-03-18    8: 
f1bafd194a 2010-03-18    9: 	def __init__(self):
f1bafd194a 2010-03-18   10: 		config.section('log')
f1bafd194a 2010-03-18   11: 		if config['silent'] == 'yes':
f1bafd194a 2010-03-18   12: 			self._syslog = None
f1bafd194a 2010-03-18   13: 		else:
f1bafd194a 2010-03-18   14: 			import syslog
f1bafd194a 2010-03-18   15: 			self._syslog = syslog
f1bafd194a 2010-03-18   16: 			self._syslog.openlog('squidTag')
f1bafd194a 2010-03-18   17: 
f1bafd194a 2010-03-18   18: 	def info(self, message):
f1bafd194a 2010-03-18   19: 		if self._syslog:
f1bafd194a 2010-03-18   20: 			self._syslog.syslog(self._syslog.LOG_INFO, message)
f1bafd194a 2010-03-18   21: 
f1bafd194a 2010-03-18   22: 	def notice(self, message):
f1bafd194a 2010-03-18   23: 		if self._syslog:
f1bafd194a 2010-03-18   24: 			self._syslog.syslog(self._syslog.LOG_NOTICE, message)
f1bafd194a 2010-03-18   25: 
f1bafd194a 2010-03-18   26: # wrapper around database
f1bafd194a 2010-03-18   27: class tagDB:
f1bafd194a 2010-03-18   28: 	__slots__ = frozenset(['_prepared', '_db'])
f1bafd194a 2010-03-18   29: 
f1bafd194a 2010-03-18   30: 	def __init__(self):
f1bafd194a 2010-03-18   31: 		self._prepared = set()
f1bafd194a 2010-03-18   32: 		config.section('database')
f1bafd194a 2010-03-18   33: 		self._db = postgresql.open(
f1bafd194a 2010-03-18   34: 			'pq://{}:{}@{}/{}'.format(
f1bafd194a 2010-03-18   35: 				config['user'],
f1bafd194a 2010-03-18   36: 				config['password'],
f1bafd194a 2010-03-18   37: 				config['host'],
f1bafd194a 2010-03-18   38: 				config['database'],
f1bafd194a 2010-03-18   39: 		) )
f1bafd194a 2010-03-18   40: 
f1bafd194a 2010-03-18   41: 	def load(self, csv_data):
f1bafd194a 2010-03-18   42: 		with self._db.xact():
f1bafd194a 2010-03-18   43: 			config.section('loader')
f1bafd194a 2010-03-18   44: 			if config['drop_database']:
7224844efa 2010-03-18   45: 				self._db.execute('delete from urls;')
7224844efa 2010-03-18   46: 				if config['drop_site']:
7224844efa 2010-03-18   47: 					self._db.execute('delete from site;');
b16bc5d76f 2010-03-25   48: 			insertreg = self._db.prepare("select set($1, $2, $3)")
b16bc5d76f 2010-03-25   49: 			insert = self._db.prepare("select set($1, $2)")
f1bafd194a 2010-03-18   50: 			for row in csv_data:
b16bc5d76f 2010-03-25   51: 				if len(row[2]) > 0:
b16bc5d76f 2010-03-25   52: 					insertreg(row[0], row[1], row[2])
b16bc5d76f 2010-03-25   53: 				else:
b16bc5d76f 2010-03-25   54: 					insert(row[0], row[1])
f1bafd194a 2010-03-18   55: 		self._db.execute('vacuum analyze site;')
f1bafd194a 2010-03-18   56: 		self._db.execute('vacuum analyze urls;')
f1bafd194a 2010-03-18   57: 
f1bafd194a 2010-03-18   58: # this classes processes config file and substitutes default values
f1bafd194a 2010-03-18   59: class Config:
f1bafd194a 2010-03-18   60: 	__slots__ = frozenset(['_config', '_default', '_section'])
f1bafd194a 2010-03-18   61: 	_default = {
f1bafd194a 2010-03-18   62: 		'reactor': {
f1bafd194a 2010-03-18   63: 			'reactor': 'thread',
f1bafd194a 2010-03-18   64: 		},
f1bafd194a 2010-03-18   65: 		'log': {
f1bafd194a 2010-03-18   66: 			'silent': 'no',
f1bafd194a 2010-03-18   67: 		},
f1bafd194a 2010-03-18   68: 		'database': {
f1bafd194a 2010-03-18   69: 			'user': 'squidTag',
f1bafd194a 2010-03-18   70: 			'password': 'password',
f1bafd194a 2010-03-18   71: 			'host': 'localhost',
f1bafd194a 2010-03-18   72: 			'database': 'squidTag',
f1bafd194a 2010-03-18   73: 		},
f1bafd194a 2010-03-18   74: 		'loader': {
f1bafd194a 2010-03-18   75: 			'drop_database': False,
7224844efa 2010-03-18   76: 			'drop_site': False,
f1bafd194a 2010-03-18   77: 	},}
f1bafd194a 2010-03-18   78: 
f1bafd194a 2010-03-18   79: 	# function to read in config file
f1bafd194a 2010-03-18   80: 	def __init__(self):
f1bafd194a 2010-03-18   81: 		parser = optparse.OptionParser()
f1bafd194a 2010-03-18   82: 		parser.add_option('-c', '--config', dest = 'config',
f1bafd194a 2010-03-18   83: 			help = 'config file location', metavar = 'FILE',
f1bafd194a 2010-03-18   84: 			default = '/usr/local/etc/squid-tagger.conf')
f1bafd194a 2010-03-18   85: 		parser.add_option('-d', '--drop-database', dest = 'drop_database',
f1bafd194a 2010-03-18   86: 			help = 'signals loader to drop previous database',
f1bafd194a 2010-03-18   87: 			action = 'store_true')
7224844efa 2010-03-18   88: 		parser.add_option('-D', '--drop-site', dest = 'drop_site',
7224844efa 2010-03-18   89: 			help = 'signals loader to drop not only url definitions but site index too',
7224844efa 2010-03-18   90: 			action = 'store_true')
f1bafd194a 2010-03-18   91: 
f1bafd194a 2010-03-18   92: 		(options, args) = parser.parse_args()
f1bafd194a 2010-03-18   93: 
f1bafd194a 2010-03-18   94: 		if options.drop_database:
f1bafd194a 2010-03-18   95: 			self._default['loader']['drop_database'] = True
7224844efa 2010-03-18   96: 
7224844efa 2010-03-18   97: 		if options.drop_site:
7224844efa 2010-03-18   98: 			self._default['loader']['drop_site'] = True
f1bafd194a 2010-03-18   99: 
f1bafd194a 2010-03-18  100: 		if not os.access(options.config, os.R_OK):
f1bafd194a 2010-03-18  101: 			print("Can't read {}: exitting".format(options.config))
f1bafd194a 2010-03-18  102: 			sys.exit(2)
f1bafd194a 2010-03-18  103: 
f1bafd194a 2010-03-18  104: 		self._config = configparser.ConfigParser()
f1bafd194a 2010-03-18  105: 		self._config.readfp(open(options.config))
f1bafd194a 2010-03-18  106: 
f1bafd194a 2010-03-18  107: 	# function to select config file section or create one
f1bafd194a 2010-03-18  108: 	def section(self, section):
f1bafd194a 2010-03-18  109: 		if not self._config.has_section(section):
f1bafd194a 2010-03-18  110: 			self._config.add_section(section)
f1bafd194a 2010-03-18  111: 		self._section = section
f1bafd194a 2010-03-18  112: 
f1bafd194a 2010-03-18  113: 	# function to get config parameter, if parameter doesn't exists the default
f1bafd194a 2010-03-18  114: 	# value or None is substituted
f1bafd194a 2010-03-18  115: 	def __getitem__(self, name):
f1bafd194a 2010-03-18  116: 		if not self._section in self._default or not name in self._default[self._section]:
f1bafd194a 2010-03-18  117: 			return None
f1bafd194a 2010-03-18  118: 		if not type(self._default[self._section][name]) == type(True):
f1bafd194a 2010-03-18  119: 			if not self._config.has_option(self._section, name):
f1bafd194a 2010-03-18  120: 				self._config.set(self._section, name, self._default[self._section][name])
f1bafd194a 2010-03-18  121: 			return(self._config.get(self._section, name))
f1bafd194a 2010-03-18  122: 		else:
f1bafd194a 2010-03-18  123: 			if not self._config.has_option(self._section, name):
f1bafd194a 2010-03-18  124: 				self._config.set(self._section, name, repr(self._default[self._section][name]))
f1bafd194a 2010-03-18  125: 			return(self._config.getboolean(self._section, name))
f1bafd194a 2010-03-18  126: 
f1bafd194a 2010-03-18  127: # initializing and reading in config file
f1bafd194a 2010-03-18  128: config = Config()
f1bafd194a 2010-03-18  129: 
f1bafd194a 2010-03-18  130: tagdb = tagDB()
f1bafd194a 2010-03-18  131: 
f1bafd194a 2010-03-18  132: csv_reader = csv.reader(sys.stdin)
f1bafd194a 2010-03-18  133: first_row = next(csv_reader)
f1bafd194a 2010-03-18  134: if not first_row == ['site', 'tags', 'regexp']:
f1bafd194a 2010-03-18  135: 	print('File must contain csv data with three columns: "site", "tags" and "regexp".')
f1bafd194a 2010-03-18  136: 	sys.exit(1)
f1bafd194a 2010-03-18  137: tagdb.load(csv_reader)