Squid url redirector

Check-in [31e69c4237]
anonymous

Check-in [31e69c4237]

Overview
Comment:integrated database loading
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: 31e69c4237283900961eed16165b8cc8451dbbb6c7a2799175f732705ac14d51
User & Date: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 on 2010-08-12 21:36:52.000
Other Links: branch diff | manifest | tags
Context
2010-08-13
10:05
added configuration dump/restore check-in: d301d9adc6 user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk
2010-08-12
21:36
integrated database loading check-in: 31e69c4237 user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk
21:24
forget to remove broken label from kqueue check-in: a93ad7cabc user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk
Changes
45
46
47
48
49
50
51
















52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
		return(self._check_stmt(site, ip_address))

	def dump(self):
		if self._dump_stmt == None:
			self._dump_stmt = self._db.prepare("select untrip(site), tag, regexp from urls natural join site natural join tag order by site, tag")
		return(self._dump_stmt())

















# abstract class with basic checking functionality
class Checker:
	__slots__ = frozenset(['_db', '_log'])

	def __init__(self):
		self._db = tagDB()
		self._log = Logger()
		self._log.info('started\n')

	def process(self, id, site, ip_address, url_path, line = None):
		self._log.info('trying {}\n'.format(site))
		result = self._db.check(site, ip_address)
		#reply = '{}://{}/{}'.format(req[4], req[1], req[3])
		reply = '-'
		for row in result:
			if row != None and row[0] != None:
				if row[1] != None:
					self._log.info('trying regexp "{}" versus "{}"\n'.format(row[1], url_path))
					try:
						if re.compile(row[1]).match(url_path):







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>












<







45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
		return(self._check_stmt(site, ip_address))

	def dump(self):
		if self._dump_stmt == None:
			self._dump_stmt = self._db.prepare("select untrip(site), tag, regexp from urls natural join site natural join tag order by site, tag")
		return(self._dump_stmt())

	def load(self, csv_data):
		with self._db.xact():
			if config.options.flush_db:
				self._db.execute('delete from urls;')
				if config.options.flush_site:
					self._db.execute('delete from site;');
			insertreg = self._db.prepare("select set($1, $2, $3)")
			insert = self._db.prepare("select set($1, $2)")
			for row in csv_data:
				if len(row[2]) > 0:
					insertreg(row[0], row[1], row[2])
				else:
					insert(row[0], row[1])
		self._db.execute('vacuum analyze site;')
		self._db.execute('vacuum analyze urls;')

# abstract class with basic checking functionality
class Checker:
	__slots__ = frozenset(['_db', '_log'])

	def __init__(self):
		self._db = tagDB()
		self._log = Logger()
		self._log.info('started\n')

	def process(self, id, site, ip_address, url_path, line = None):
		self._log.info('trying {}\n'.format(site))
		result = self._db.check(site, ip_address)

		reply = '-'
		for row in result:
			if row != None and row[0] != None:
				if row[1] != None:
					self._log.info('trying regexp "{}" versus "{}"\n'.format(row[1], url_path))
					try:
						if re.compile(row[1]).match(url_path):
258
259
260
261
262
263
264









265
266
267
268
269
270
271
		parser = optparse.OptionParser()
		parser.add_option('-c', '--config', dest = 'config',
			help = 'config file location', metavar = 'FILE',
			default = '/usr/local/etc/squid-tagger.conf')
		parser.add_option('-d', '--dump', dest = 'dump',
			help = 'dump database', action = 'store_true', metavar = 'bool',
			default = False)










		(self.options, args) = parser.parse_args()

		assert os.access(self.options.config, os.R_OK), "Fatal error: can't read {}".format(self.options.config)

		self._config = configparser.ConfigParser()
		self._config.readfp(open(self.options.config))







>
>
>
>
>
>
>
>
>







273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
		parser = optparse.OptionParser()
		parser.add_option('-c', '--config', dest = 'config',
			help = 'config file location', metavar = 'FILE',
			default = '/usr/local/etc/squid-tagger.conf')
		parser.add_option('-d', '--dump', dest = 'dump',
			help = 'dump database', action = 'store_true', metavar = 'bool',
			default = False)
		parser.add_option('-f', '--flush-database', dest = 'flush_db',
			help = 'flush previous database on load', default = False,
			action = 'store_true', metavar = 'bool')
		parser.add_option('-F', '--flush-site', dest = 'flush_site',
			help = 'when flushing previous dtabase flush site index too',
			action = 'store_true', default = False, metavar = 'bool')
		parser.add_option('-l', '--load', dest = 'load',
			help = 'load database', action = 'store_true', metavar = 'bool',
			default = False)

		(self.options, args) = parser.parse_args()

		assert os.access(self.options.config, os.R_OK), "Fatal error: can't read {}".format(self.options.config)

		self._config = configparser.ConfigParser()
		self._config.readfp(open(self.options.config))
298
299
300
301
302
303
304













305
306
307
308
309
310
311
312
313
314
315
316

	tagdb = tagDB()

	csv_writer = csv.writer(sys.stdout)
	csv_writer.writerow(['site', 'tags', 'regexp'])
	for row in tagdb.dump():
		csv_writer.writerow([row[0], '{' + ','.join(row[1]) + '}', row[2]])














else:
	# main loop
	config.section('reactor')
	if config['reactor'] == 'thread':
		checker = CheckerThread()
	elif config['reactor'] == 'plain':
		checker = Checker()
	elif config['reactor'] == 'kqueue':
		checker = CheckerKqueue()

	checker.loop()







>
>
>
>
>
>
>
>
>
>
>
>
>












322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353

	tagdb = tagDB()

	csv_writer = csv.writer(sys.stdout)
	csv_writer.writerow(['site', 'tags', 'regexp'])
	for row in tagdb.dump():
		csv_writer.writerow([row[0], '{' + ','.join(row[1]) + '}', row[2]])

elif config.options.load:
	# loading database
	import csv

	tagdb = tagDB()

	csv_reader = csv.reader(sys.stdin)
	first_row = next(csv_reader)

	assert first_row == ['site', 'tags', 'regexp'], 'File must contain csv data with three columns: "site", "tags" and "regexp".'

	tagdb.load(csv_reader)

else:
	# main loop
	config.section('reactor')
	if config['reactor'] == 'thread':
		checker = CheckerThread()
	elif config['reactor'] == 'plain':
		checker = Checker()
	elif config['reactor'] == 'kqueue':
		checker = CheckerKqueue()

	checker.loop()
Deleted st-load.py version [b93af38ecd].