Overview
| Comment: | Main loop reworked. With regexp support there could be possibilities of passing x level domain on regexp fail and not looking into x-1 level domain. Now all matches are selected from database ordered by domain level and checked until match is found. |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | master | trunk |
| Files: | files | file ages | folders |
| SHA3-256: |
88c03b544029127c9c6b8bdbe75450e0 |
| User & Date: | c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 on 2009-10-09 06:40:07.000 |
| Other Links: | branch diff | manifest | tags |
Context
|
2009-10-10
| ||
| 10:26 | function to return site id check-in: e74427953f user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk | |
|
2009-10-09
| ||
| 06:40 | Main loop reworked. With regexp support there could be possibilities of passing x level domain on regexp fail and not looking into x-1 level domain. Now all matches are selected from database ordered by domain level and checked until match is found. check-in: 88c03b5440 user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk | |
| 06:20 | minor naming unification check-in: 33e72616c9 user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk | |
Changes
Modified squid-tagger.py
from [859fd273be]
to [0cfe66321a].
| ︙ | ︙ | |||
24 25 26 27 28 29 30 | class tagDB: __slots__ = frozenset(['_prepared', '_check_stmt', '_db']) def __init__(self): self._prepared = set() self._db = False | | | < < < < > > > > > | > | | | | | > | | | | | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
class tagDB:
__slots__ = frozenset(['_prepared', '_check_stmt', '_db'])
def __init__(self):
self._prepared = set()
self._db = False
self._check_stmt = self._curs().prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")
def _curs(self):
if not self._db:
config.section('database')
self._db = postgresql.open(
'pq://{0}:{1}@{2}/{3}'.format(
config['user'],
config['password'],
config['host'],
config['database'],
) )
return(self._db)
def check(self, ip_address, site):
return self._check_stmt(site, ip_address)
class CheckerThread:
__slots__ = frozenset(['_db', '_lock', '_lock_queue', '_log', '_queue'])
def __init__(self, db, log):
self._db = db
self._log = log
# Spin lock. Loop acquires it on start then releases it when holding queue
# lock. This way the thread proceeds without stops while queue has data and
# gets stalled when no data present. The lock is released by queue writer
# after storing something into the queue
self._lock = _thread.allocate_lock()
self._lock_queue = _thread.allocate_lock()
self._lock.acquire()
self._queue = []
_thread.start_new_thread(self._start, ())
def _start(self):
while True:
self._lock.acquire()
self._lock_queue.acquire()
# yes this should be written this way, and yes, this is why I hate threading
if len(self._queue) > 1 and self._lock.locked():
self._lock.release()
req = self._queue.pop(0)
self._lock_queue.release()
self._log.info('trying %s\n'%req[1])
result = self._db.check(req[2], req[1])
for row in result:
if row != None and row[0] != None:
if row[1] != None:
self._log.info('trying regexp "{0}" versus "{1}"\n'.format(row[1], req[3]))
if re.compile(row[1]).match(req[3]):
writeline('%s 302:%s\n'%(req[0], row[0]))
break
else:
continue
else:
writeline('%s 302:%s\n'%(req[0], row[0]))
break
writeline('%s -\n'%req[0])
def check(self, line):
request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
if request:
id = request.group(1)
site = request.group(3)
url_path = request.group(4)
|
| ︙ | ︙ |