Squid url redirector

Check-in [88c03b5440]
anonymous

Check-in [88c03b5440]

Overview
Comment:Main loop reworked. With regexp support there could be possibilities of passing x level domain on regexp fail and not looking into x-1 level domain. Now all matches are selected from database ordered by domain level and checked until match is found.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256: 88c03b544029127c9c6b8bdbe75450e0bfd5f999493dfb79616a0d1656880aea
User & Date: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 on 2009-10-09 06:40:07.000
Other Links: branch diff | manifest | tags
Context
2009-10-10
10:26
function to return site id check-in: e74427953f user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk
2009-10-09
06:40
Main loop reworked. With regexp support there could be possibilities of passing x level domain on regexp fail and not looking into x-1 level domain. Now all matches are selected from database ordered by domain level and checked until match is found. check-in: 88c03b5440 user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk
06:20
minor naming unification check-in: 33e72616c9 user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk
Changes
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57




58
59
60
61
62
63
64
65
66
67

68
69
70
71
72
73

74
75
76
77
78

79
80
81
82
83
84
85
86
87
88
89
90
91

class tagDB:
	__slots__ = frozenset(['_prepared', '_check_stmt', '_db'])

	def __init__(self):
		self._prepared = set()
		self._db = False
		self._check_stmt = self._curs().prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc limit 1")

	def _curs(self):
		if not self._db:
			config.section('database')
			self._db = postgresql.open(
				'pq://{0}:{1}@{2}/{3}'.format(
					config['user'],
					config['password'],
					config['host'],
					config['database'],
			) )
		return(self._db)

	def check(self, ip_address, site):
		result = self._check_stmt(site, ip_address)
		if len(result) > 0:
			return result[0]
		else:
			return None

class CheckerThread:
	__slots__ = frozenset(['_db', '_lock', '_lock_queue', '_log', '_queue'])

	def __init__(self, db, log):
		self._db = db
		self._log = log




		self._lock = _thread.allocate_lock()
		self._lock_queue = _thread.allocate_lock()
		self._lock.acquire()
		self._queue = []
		_thread.start_new_thread(self._start, ())

	def _start(self):
		while True:
			self._lock.acquire()
			self._lock_queue.acquire()

			if len(self._queue) > 1 and self._lock.locked():
				self._lock.release()
			req = self._queue.pop(0)
			self._lock_queue.release()
			self._log.info('trying %s\n'%req[1])
			row = self._db.check(req[2], req[1])

			if row != None and row[0] != None:
				if row[1] != None:
					self._log.info('trying regexp "{0}" versus "{1}"\n'.format(row[1], req[3]))
					if re.compile(row[1]).match(req[3]):
						writeline('%s 302:%s\n'%(req[0], row[0]))

					else:
						writeline('%s -\n'%req[0])
				else:
					writeline('%s 302:%s\n'%(req[0], row[0]))
			else:
				writeline('%s -\n'%req[0])

	def check(self, line):
		request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
		if request:
			id = request.group(1)
			site = request.group(3)
			url_path = request.group(4)







|














|
<
<
<
<







>
>
>
>










>





|
>
|
|
|
|
|
>
|
|
|
|
|
|







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46




47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

class tagDB:
	__slots__ = frozenset(['_prepared', '_check_stmt', '_db'])

	def __init__(self):
		self._prepared = set()
		self._db = False
		self._check_stmt = self._curs().prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")

	def _curs(self):
		if not self._db:
			config.section('database')
			self._db = postgresql.open(
				'pq://{0}:{1}@{2}/{3}'.format(
					config['user'],
					config['password'],
					config['host'],
					config['database'],
			) )
		return(self._db)

	def check(self, ip_address, site):
		return self._check_stmt(site, ip_address)





class CheckerThread:
	__slots__ = frozenset(['_db', '_lock', '_lock_queue', '_log', '_queue'])

	def __init__(self, db, log):
		self._db = db
		self._log = log
		# Spin lock. Loop acquires it on start then releases it when holding queue
		# lock. This way the thread proceeds without stops while queue has data and
		# gets stalled when no data present. The lock is released by queue writer
		# after storing something into the queue
		self._lock = _thread.allocate_lock()
		self._lock_queue = _thread.allocate_lock()
		self._lock.acquire()
		self._queue = []
		_thread.start_new_thread(self._start, ())

	def _start(self):
		while True:
			self._lock.acquire()
			self._lock_queue.acquire()
			# yes this should be written this way, and yes, this is why I hate threading
			if len(self._queue) > 1 and self._lock.locked():
				self._lock.release()
			req = self._queue.pop(0)
			self._lock_queue.release()
			self._log.info('trying %s\n'%req[1])
			result = self._db.check(req[2], req[1])
			for row in result:
				if row != None and row[0] != None:
					if row[1] != None:
						self._log.info('trying regexp "{0}" versus "{1}"\n'.format(row[1], req[3]))
						if re.compile(row[1]).match(req[3]):
							writeline('%s 302:%s\n'%(req[0], row[0]))
							break
						else:
							continue
					else:
						writeline('%s 302:%s\n'%(req[0], row[0]))
						break
			writeline('%s -\n'%req[0])

	def check(self, line):
		request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
		if request:
			id = request.group(1)
			site = request.group(3)
			url_path = request.group(4)