Squid url redirector

Check-in [88c03b5440]
anonymous

Check-in [88c03b5440]

Overview
Comment:Main loop reworked. With regexp support there could be possibilities of passing x level domain on regexp fail and not looking into x-1 level domain. Now all matches are selected from database ordered by domain level and checked until match is found.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: 88c03b544029127c9c6b8bdbe75450e0bfd5f999493dfb79616a0d1656880aea
User & Date: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 on 2009-10-09 06:40:07.000
Other Links: branch diff | manifest | tags
Context
2009-10-10
10:26
function to return site id check-in: e74427953f user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk
2009-10-09
06:40
Main loop reworked. With regexp support there could be possibilities of passing x level domain on regexp fail and not looking into x-1 level domain. Now all matches are selected from database ordered by domain level and checked until match is found. check-in: 88c03b5440 user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk
06:20
minor naming unification check-in: 33e72616c9 user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk
Changes
24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

47
48
49
50
51
52
53
54
55
56
57




58
59
60
61
62
63
64
65
66
67

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84














85
86
87
88
89
90
91
24
25
26
27
28
29
30

31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

46




47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73












74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94







-
+














-
+
-
-
-
-







+
+
+
+










+





-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+








class tagDB:
	__slots__ = frozenset(['_prepared', '_check_stmt', '_db'])

	def __init__(self):
		self._prepared = set()
		self._db = False
		self._check_stmt = self._curs().prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc limit 1")
		self._check_stmt = self._curs().prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")

	def _curs(self):
		if not self._db:
			config.section('database')
			self._db = postgresql.open(
				'pq://{0}:{1}@{2}/{3}'.format(
					config['user'],
					config['password'],
					config['host'],
					config['database'],
			) )
		return(self._db)

	def check(self, ip_address, site):
		result = self._check_stmt(site, ip_address)
		return self._check_stmt(site, ip_address)
		if len(result) > 0:
			return result[0]
		else:
			return None

class CheckerThread:
	__slots__ = frozenset(['_db', '_lock', '_lock_queue', '_log', '_queue'])

	def __init__(self, db, log):
		self._db = db
		self._log = log
		# Spin lock. Loop acquires it on start then releases it when holding queue
		# lock. This way the thread proceeds without stops while queue has data and
		# gets stalled when no data present. The lock is released by queue writer
		# after storing something into the queue
		self._lock = _thread.allocate_lock()
		self._lock_queue = _thread.allocate_lock()
		self._lock.acquire()
		self._queue = []
		_thread.start_new_thread(self._start, ())

	def _start(self):
		while True:
			self._lock.acquire()
			self._lock_queue.acquire()
			# yes this should be written this way, and yes, this is why I hate threading
			if len(self._queue) > 1 and self._lock.locked():
				self._lock.release()
			req = self._queue.pop(0)
			self._lock_queue.release()
			self._log.info('trying %s\n'%req[1])
			row = self._db.check(req[2], req[1])
			if row != None and row[0] != None:
				if row[1] != None:
					self._log.info('trying regexp "{0}" versus "{1}"\n'.format(row[1], req[3]))
					if re.compile(row[1]).match(req[3]):
						writeline('%s 302:%s\n'%(req[0], row[0]))
					else:
						writeline('%s -\n'%req[0])
				else:
					writeline('%s 302:%s\n'%(req[0], row[0]))
			else:
				writeline('%s -\n'%req[0])
			result = self._db.check(req[2], req[1])
			for row in result:
				if row != None and row[0] != None:
					if row[1] != None:
						self._log.info('trying regexp "{0}" versus "{1}"\n'.format(row[1], req[3]))
						if re.compile(row[1]).match(req[3]):
							writeline('%s 302:%s\n'%(req[0], row[0]))
							break
						else:
							continue
					else:
						writeline('%s 302:%s\n'%(req[0], row[0]))
						break
			writeline('%s -\n'%req[0])

	def check(self, line):
		request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
		if request:
			id = request.group(1)
			site = request.group(3)
			url_path = request.group(4)