Squid url redirector

Diff
anonymous

Diff

Differences From Artifact [c46e709b29]:

To Artifact [08a021606d]:


40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

84
85
86
87
88
89
90

	def check(self, site, ip_address):
		if self._check_stmt == None:
			self._check_stmt = self._db.prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")
		return(self._check_stmt(site, ip_address))

	def dump(self):
		return(self._db.prepare("select untrip(site), tag, regexp from urls natural join site natural join tag order by site, tag")())

	def load(self, csv_data):
		with self._db.xact():
			if config.options.flush_db:
				self._db.execute('delete from urls;')
				if config.options.flush_site:
					self._db.execute('delete from site;');
			insertreg = self._db.prepare("select set($1, $2, $3)")
			insert = self._db.prepare("select set($1, $2)")
			for row in csv_data:
				if len(row[2]) > 0:
					insertreg(row[0], row[1], row[2])
				else:
					insert(row[0], row[1])
		self._db.execute('vacuum analyze site;')
		self._db.execute('vacuum analyze urls;')

	def load_conf(self, csv_data):
		with self._db.xact():
			self._db.execute('delete from rules;')
			insertconf = self._db.prepare("insert into rules (netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, id_tag) values ($1::text::cidr, $2, $3, $4, $5::text::time, $6::text::time, get_tag($7::text::text[]))")
			for row in csv_data:
				insertconf(row[0], row[1], int(row[2]), int(row[3]), row[4], row[5], row[6])
		self._db.execute('vacuum analyze rules;')

	def dump_conf(self):
		return(self._db.prepare("select netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag from rules natural join tag")())

# abstract class with basic checking functionality
class Checker:
	__slots__ = frozenset(['_db', '_log'])

	def __init__(self):
		self._db = tagDB()
		self._log = Logger()
		self._log.info('started\n')


	def process(self, id, site, ip_address, url_path, line = None):
		self._log.info('trying {}\n'.format(site))
		result = self._db.check(site, ip_address)
		reply = '-'
		for row in result:
			if row != None and row[0] != None:







|

|



<
<
<
|
|
|
|

|
|





|





|



|





>







40
41
42
43
44
45
46
47
48
49
50
51
52



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

	def check(self, site, ip_address):
		if self._check_stmt == None:
			self._check_stmt = self._db.prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")
		return(self._check_stmt(site, ip_address))

	def dump(self):
		return(self._db.prepare("copy (select untrip(site) as site, tag, regexp from urls order by site, tag) to stdout csv header")())

	def load(self, data):
		with self._db.xact():
			if config.options.flush_db:
				self._db.execute('delete from urls;')



			insert = self._db.prepare("insert into urls (site, tag, regexp) values (tripdomain($1), $2::text::text[], $3)")
			for row in data:
				if len(row) == 2:
					insert(row[0], row[1], None)
				else:
					insert(row[0], row[1], row[2])
			self._db.execute("update urls set regexp = NULL where regexp = ''")
		self._db.execute('vacuum analyze urls;')

	def load_conf(self, csv_data):
		with self._db.xact():
			self._db.execute('delete from rules;')
			insertconf = self._db.prepare("insert into rules (netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag) values ($1::text::cidr, $2, $3, $4, $5::text::time, $6::text::time, $7::text::text[])")
			for row in csv_data:
				insertconf(row[0], row[1], int(row[2]), int(row[3]), row[4], row[5], row[6])
		self._db.execute('vacuum analyze rules;')

	def dump_conf(self):
		return(self._db.prepare("copy (select netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag from rules) to stdout csv header")())

# abstract class with basic checking functionality
class Checker:
	__slots__ = frozenset(['_db', '_log', '_request'])

	def __init__(self):
		self._db = tagDB()
		self._log = Logger()
		self._log.info('started\n')
		self._request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$')

	def process(self, id, site, ip_address, url_path, line = None):
		self._log.info('trying {}\n'.format(site))
		result = self._db.check(site, ip_address)
		reply = '-'
		for row in result:
			if row != None and row[0] != None:
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
						self._log.info("can't compile regexp")
				else:
					reply = row[0].format(url_path)
					break
		self.writeline('{} {}\n'.format(id, reply))

	def check(self, line):
		request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
		if request:
			id = request.group(1)
			#proto = request.group(2)
			site = request.group(3)
			url_path = request.group(4)
			ip_address = request.group(5)
			self.process(id, site, ip_address, url_path, line)







|







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
						self._log.info("can't compile regexp")
				else:
					reply = row[0].format(url_path)
					break
		self.writeline('{} {}\n'.format(id, reply))

	def check(self, line):
		request = self._request.match(line)
		if request:
			id = request.group(1)
			#proto = request.group(2)
			site = request.group(3)
			url_path = request.group(4)
			ip_address = request.group(5)
			self.process(id, site, ip_address, url_path, line)
236
237
238
239
240
241
242

243
244
245
246
247
248
249
									# don't wait for more data, start processing
									timeout = 0

				# detect end of stream and exit if possible
				if kev.flags >> 15 == 1:
					self._kq.control([self._select.kevent(sys.stdin, self._select.KQ_FILTER_READ, self._select.KQ_EV_DELETE)], 0)
					eof = True


			if len(kevs) == 0:
				if len(self._queue) > 0:
					# get one request and process it
					req = self._queue.pop(0)
					Checker.process(self, req[0], req[1], req[2], req[3])
					if len(self._queue) == 0:







>







234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
									# don't wait for more data, start processing
									timeout = 0

				# detect end of stream and exit if possible
				if kev.flags >> 15 == 1:
					self._kq.control([self._select.kevent(sys.stdin, self._select.KQ_FILTER_READ, self._select.KQ_EV_DELETE)], 0)
					eof = True
					#timeout = 0

			if len(kevs) == 0:
				if len(self._queue) > 0:
					# get one request and process it
					req = self._queue.pop(0)
					Checker.process(self, req[0], req[1], req[2], req[3])
					if len(self._queue) == 0:
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
			default = '/usr/local/etc/squid-tagger.conf')
		parser.add_option('-d', '--dump', dest = 'dump',
			help = 'dump database', action = 'store_true', metavar = 'bool',
			default = False)
		parser.add_option('-f', '--flush-database', dest = 'flush_db',
			help = 'flush previous database on load', default = False,
			action = 'store_true', metavar = 'bool')
		parser.add_option('-F', '--flush-site', dest = 'flush_site',
			help = 'when flushing previous dtabase flush site index too',
			action = 'store_true', default = False, metavar = 'bool')
		parser.add_option('-l', '--load', dest = 'load',
			help = 'load database', action = 'store_true', metavar = 'bool',
			default = False)
		parser.add_option('-D', '--dump-conf', dest = 'dump_conf',
			help = 'dump filtering rules', default = False, metavar = 'bool',
			action = 'store_true')
		parser.add_option('-L', '--load-conf', dest = 'load_conf',







<
<
<







283
284
285
286
287
288
289



290
291
292
293
294
295
296
			default = '/usr/local/etc/squid-tagger.conf')
		parser.add_option('-d', '--dump', dest = 'dump',
			help = 'dump database', action = 'store_true', metavar = 'bool',
			default = False)
		parser.add_option('-f', '--flush-database', dest = 'flush_db',
			help = 'flush previous database on load', default = False,
			action = 'store_true', metavar = 'bool')



		parser.add_option('-l', '--load', dest = 'load',
			help = 'load database', action = 'store_true', metavar = 'bool',
			default = False)
		parser.add_option('-D', '--dump-conf', dest = 'dump_conf',
			help = 'dump filtering rules', default = False, metavar = 'bool',
			action = 'store_true')
		parser.add_option('-L', '--load-conf', dest = 'load_conf',
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352


353
354
355
356
357
358
359
360
361
362
363
364
365
366

367

368
369
370
371
372
373
374
375
376
377
378
379
380
# initializing and reading in config file
config = Config()

if config.options.dump or config.options.load or config.options.dump_conf or config.options.load_conf:
	import csv

	tagdb = tagDB()
	data_fields = ['site', 'tags', 'regexp']
	conf_fields = ['netmask', 'redirect_url', 'from_weekday', 'to_weekday', 'from_time', 'to_time', 'tag']

	if config.options.dump or config.options.dump_conf:
		csv_writer = csv.writer(sys.stdout)
		if config.options.dump:
			# dumping database
			csv_writer.writerow(data_fields)
			for row in tagdb.dump():
				csv_writer.writerow([row[0], '{' + ','.join(row[1]) + '}', row[2]])

		elif config.options.dump_conf:
			# dumping rules
			csv_writer.writerow(conf_fields)
			for row in tagdb.dump_conf():
				csv_writer.writerow([row[0], row[1], row[2], row[3], row[4], row[5], '{' + ','.join(row[6]) + '}'])



	elif config.options.load or config.options.load_conf:
		csv_reader = csv.reader(sys.stdin)
		first_row = next(csv_reader)

		if config.options.load:
			# loading database
			assert first_row == data_fields, 'File must contain csv data with theese columns: ' + repr(data_fields)

			tagdb.load(csv_reader)

		elif config.options.load_conf:
			# loading database
			assert first_row == conf_fields, 'File must contain csv data with theese columns: ' + repr(conf_fields)



			tagdb.load_conf(csv_reader)

else:
	# main loop
	config.section('reactor')
	if config['reactor'] == 'thread':
		checker = CheckerThread()
	elif config['reactor'] == 'plain':
		checker = Checker()
	elif config['reactor'] == 'kqueue':
		checker = CheckerKqueue()

	checker.loop()







|



<

<
<
|
<
<

<
<
|
|
>
>






<
<
|
|
<

<
|
>

>
|












326
327
328
329
330
331
332
333
334
335
336

337


338


339


340
341
342
343
344
345
346
347
348
349


350
351

352

353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# initializing and reading in config file
config = Config()

if config.options.dump or config.options.load or config.options.dump_conf or config.options.load_conf:
	import csv

	tagdb = tagDB()
	data_fields = ['site', 'tag', 'regexp']
	conf_fields = ['netmask', 'redirect_url', 'from_weekday', 'to_weekday', 'from_time', 'to_time', 'tag']

	if config.options.dump or config.options.dump_conf:

		if config.options.dump:


			dump = tagdb.dump()


		elif config.options.dump_conf:


			dump = tagdb.dump_conf()

		for line in dump:
			sys.stdout.write(line.decode('utf-8'))

	elif config.options.load or config.options.load_conf:
		csv_reader = csv.reader(sys.stdin)
		first_row = next(csv_reader)

		if config.options.load:


			fields = data_fields
			load = tagdb.load

		elif config.options.load_conf:

			fields = conf_fields
			load = tagdb.load_conf

		assert first_row == fields, 'File must contain csv data with theese columns: ' + repr(fields)
		load(csv_reader)

else:
	# main loop
	config.section('reactor')
	if config['reactor'] == 'thread':
		checker = CheckerThread()
	elif config['reactor'] == 'plain':
		checker = Checker()
	elif config['reactor'] == 'kqueue':
		checker = CheckerKqueue()

	checker.loop()