Index: database.sql
==================================================================
--- database.sql
+++ database.sql
@@ -1,85 +1,5 @@
--- general database structure
--- table to hold all rules
-CREATE TABLE rules (
-	netmask cidr NOT NULL,
-	redirect_url text DEFAULT 'about::blank'::text NOT NULL,
-	from_weekday smallint DEFAULT 0 NOT NULL,
-	to_weekday smallint DEFAULT 6 NOT NULL,
-	from_time time without time zone DEFAULT '00:00:00'::time without time zone NOT NULL,
-	to_time time without time zone DEFAULT '23:59:59'::time without time zone NOT NULL,
-	id_tag smallint NOT NULL
-);
-
-ALTER TABLE ONLY rules
-	ADD CONSTRAINT rules_tag_f FOREIGN KEY (id_tag) REFERENCES tag(id_tag) MATCH FULL
-	ON UPDATE RESTRICT ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
--- table to hold site arrays
-CREATE TABLE site (
-	id_site serial,
-	site text[] NOT NULL
-);
-
-ALTER TABLE ONLY site
-	ADD CONSTRAINT site_pkey PRIMARY KEY (id_site);
-
-CREATE UNIQUE INDEX site_u ON site (usort(site));
-
-CREATE INDEX site_g ON site USING gin (site);
-
--- table to hold tag combinations
-CREATE TABLE tag (
-	id_tag serial,
-	tag text[] NOT NULL
-);
-
-ALTER TABLE ONLY tag
-	ADD CONSTRAINT tag_pkey PRIMARY KEY (id_tag);
-
-CREATE UNIQUE INDEX tag_u ON tag (usort(tag));
-
-CREATE INDEX tag_g ON tag USING gin (tag);
-
--- table to hold tag - site links
-CREATE TABLE urls (
-	date_added timestamp without time zone DEFAULT ('now'::text)::timestamp(0) without time zone NOT NULL,
-	id_site integer NOT NULL,
-	id_tag smallint NOT NULL,
-	regexp text
-);
-
-CREATE UNIQUE INDEX urls_pkey ON urls USING btree (id_site, regexp);
-
-CREATE INDEX urls_id_tag ON urls USING btree (id_tag);
-
-ALTER TABLE ONLY urls
-	ADD CONSTRAINT urls_site_f FOREIGN KEY (id_site) REFERENCES site(id_site) MATCH FULL
-	ON UPDATE RESTRICT ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-ALTER TABLE ONLY urls
-	ADD CONSTRAINT urls_tag_f FOREIGN KEY (id_tag) REFERENCES tag(id_tag) MATCH FULL
-	ON UPDATE RESTRICT ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
--- rule to join all tables into one to simplify access
--- automaticall uses current day and time data
-CREATE VIEW site_rule AS
-SELECT a.redirect_url, a.netmask, b.site, b.regexp
-FROM ((
-	SELECT rules.redirect_url, tag.tag AS rule_tag, rules.netmask
-	FROM rules NATURAL JOIN tag
-	WHERE ('now'::text)::time without time zone >= rules.from_time
-		AND ('now'::text)::time without time zone <= rules.to_time
-		AND date_part('dow'::text, now()) >= (rules.from_weekday)::double precision
-		AND date_part('dow'::text, now()) <= (rules.to_weekday)::double precision
-) a JOIN (
-	SELECT site.site, tag.tag AS url_tag, regexp
-	FROM urls NATURAL JOIN tag NATURAL JOIN site
-) b ON (b.url_tag && a.rule_tag));
-
-CREATE PROCEDURAL LANGUAGE plpgsql;
-
 -- general array sorting and domain processing functions
 -- sorts array
 CREATE or replace FUNCTION sort(original anyarray) RETURNS anyarray
 	LANGUAGE sql IMMUTABLE STRICT
 	AS $_$
@@ -91,10 +11,45 @@
 	LANGUAGE sql IMMUTABLE STRICT
 	AS $_$
 select array_agg(item) as result from (select distinct unnest($1) as item order by item) a;
 $_$;
 
+-- general database structure
+-- table to hold all rules
+CREATE TABLE rules (
+	netmask cidr NOT NULL,
+	redirect_url text DEFAULT 'about::blank'::text NOT NULL,
+	from_weekday smallint DEFAULT 0 NOT NULL,
+	to_weekday smallint DEFAULT 6 NOT NULL,
+	from_time time without time zone DEFAULT '00:00:00'::time without time zone NOT NULL,
+	to_time time without time zone DEFAULT '23:59:59'::time without time zone NOT NULL,
+	tag text[] NOT NULL
+);
+
+-- table to hold tag - site links
+CREATE TABLE urls (
+	date_added timestamp without time zone DEFAULT ('now'::text)::timestamp(0) without time zone NOT NULL,
+	site text[] NOT NULL,
+	tag text[] NOT NULL,
+	regexp text
+);
+
+create unique index urls_rst on urls (regexp, usort(site), usort(tag));
+
+-- rule to join all tables into one to simplify access
+-- automaticall uses current day and time data
+create view site_rule as
+select redirect_url, netmask, site, regexp
+from rules join urls
+on (urls.tag && rules.tag)
+where ('now'::text)::time without time zone >= from_time
+	and ('now'::text)::time without time zone <= to_time
+	and date_part('dow'::text, now()) >= (from_weekday)::double precision
+	and date_part('dow'::text, now()) <= (to_weekday)::double precision;
+
+CREATE PROCEDURAL LANGUAGE plpgsql;
+
 -- transforms domain into ordered array for indexing
 CREATE or replace FUNCTION tripdomain(url text) RETURNS text[]
 	LANGUAGE plpgsql IMMUTABLE STRICT
 	AS $_$
 declare
@@ -131,116 +86,5 @@
 		x := x - 1;
 	end loop;
 	return array_to_string(splitted, '.');
 end;
 $_$;
-
--- functions that works with tables
--- this functions returns id of site
-create or replace function get_site(my_site text[]) returns integer
-	language plpgsql strict
-	as $$
-declare
-	site_id integer;
-begin
-	select id_site from site where my_site = site into site_id;
-	if not found then
-		insert into site (site) values (my_site);
-		select id_site from site where my_site = site into site_id;
-	end if;
-	return site_id;
-end;
-$$;
-
-create or replace function get_site(domain text) returns integer
-	language sql immutable strict
-	as $$
-select get_site(tripdomain($1)) as result;
-$$;
-
--- this function inserts or updates record with tags to site by site id with regexp
-CREATE or replace FUNCTION mark(my_id_site integer, my_id_tag integer, my_regexp text) RETURNS integer
-	LANGUAGE plpgsql STRICT
-	AS $$
-declare
-	-- maybe check should be added to make sure supplied site id really exists
-	my_tag text[];
-begin
-	-- selecting tags site already have and adding new tag to them
-	-- note that tags should be sorted to eliminate permutations
-	select coalesce(tag, '{}'::text[]) from urls natural left join tag
-		where id_site = my_id_site and regexp = my_regexp into my_tag;
-	if not found then
-		-- no records found - creating new tag
-		insert into urls (id_site, id_tag, regexp) values (my_id_site, my_id_tag, my_regexp);
-	else
-		-- joining tags
-		select usort(my_tag || tag) from tag where id_tag = my_id_tag into my_tag;
-		-- updating existing record
-		update urls set id_tag = get_tag(my_tag)
-			where id_site = my_id_site and regexp = my_regexp;
-	end if;
-	return my_id_site;
-end;
-$$;
-
--- this function adds tag to site by site id
-CREATE or replace FUNCTION mark(my_id_site integer, new_tag text) RETURNS integer
-	LANGUAGE sql immutable STRICT
-	AS $$
-select mark($1, get_tag(array[$2]), NULL) as result;
-$$;
-
--- this function adds tag to domain
-CREATE or replace FUNCTION mark(domain text, new_tag text) RETURNS integer
-	LANGUAGE sql immutable STRICT
-	AS $$
-select mark(get_site($1), get_tag(array[$2]), NULL) as result;
-$$;
-
--- this function sets tags for site without regexp
-CREATE or replace FUNCTION set(my_id_site integer, my_id_tag integer) RETURNS integer
-	LANGUAGE sql STRICT
-	AS $$
-delete from urls where $1 = id_site and regexp is NULL;
-insert into urls (id_site, id_tag) values ($1, $2);
-select $1;
-$$;
-
--- this function sets tags for site/regexp pair
-CREATE or replace FUNCTION set(my_id_site integer, my_id_tag integer, my_regexp text) RETURNS integer
-	LANGUAGE sql STRICT
-	AS $$
-delete from urls where $1 = id_site and $3 = regexp;
-insert into urls (id_site, id_tag, regexp) values ($1, $2, $3);
-select $1;
-$$;
-
--- this function stores new data for site/regexp pair
-create or replace function set(domain text, tags text, regexp text) returns integer
-	language sql immutable strict
-	as $$
-select set(get_site($1), get_tag($2::text[]), $3);
-$$;
-
--- this function stores new data for site/regexp pair
-create or replace function set(domain text, tags text) returns integer
-	language sql immutable strict
-	as $$
-select set(get_site($1), get_tag($2::text[]));
-$$;
-
--- this function returns id of tag array
-create or replace function get_tag(my_tag text[]) returns integer
-	language plpgsql strict
-	as $$
-declare
-	tag_id integer;
-begin
-	select id_tag from tag where usort(my_tag) = tag into tag_id;
-	if not found then
-		insert into tag (tag) values (usort(my_tag));
-		select id_tag from tag where usort(my_tag) = tag into tag_id;
-	end if;
-	return tag_id;
-end;
-$$;

Index: squid-tagger.py
==================================================================
--- squid-tagger.py
+++ squid-tagger.py
@@ -42,47 +42,45 @@
 		if self._check_stmt == None:
 			self._check_stmt = self._db.prepare("select redirect_url, regexp from site_rule where site <@ tripdomain($1) and netmask >> $2::text::inet order by array_length(site, 1) desc")
 		return(self._check_stmt(site, ip_address))
 
 	def dump(self):
-		return(self._db.prepare("select untrip(site), tag, regexp from urls natural join site natural join tag order by site, tag")())
+		return(self._db.prepare("copy (select untrip(site) as site, tag, regexp from urls order by site, tag) to stdout csv header")())
 
-	def load(self, csv_data):
+	def load(self, data):
 		with self._db.xact():
 			if config.options.flush_db:
 				self._db.execute('delete from urls;')
-				if config.options.flush_site:
-					self._db.execute('delete from site;');
-			insertreg = self._db.prepare("select set($1, $2, $3)")
-			insert = self._db.prepare("select set($1, $2)")
-			for row in csv_data:
-				if len(row[2]) > 0:
-					insertreg(row[0], row[1], row[2])
+			insert = self._db.prepare("insert into urls (site, tag, regexp) values (tripdomain($1), $2::text::text[], $3)")
+			for row in data:
+				if len(row) == 2:
+					insert(row[0], row[1], None)
 				else:
-					insert(row[0], row[1])
-		self._db.execute('vacuum analyze site;')
+					insert(row[0], row[1], row[2])
+			self._db.execute("update urls set regexp = NULL where regexp = ''")
 		self._db.execute('vacuum analyze urls;')
 
 	def load_conf(self, csv_data):
 		with self._db.xact():
 			self._db.execute('delete from rules;')
-			insertconf = self._db.prepare("insert into rules (netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, id_tag) values ($1::text::cidr, $2, $3, $4, $5::text::time, $6::text::time, get_tag($7::text::text[]))")
+			insertconf = self._db.prepare("insert into rules (netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag) values ($1::text::cidr, $2, $3, $4, $5::text::time, $6::text::time, $7::text::text[])")
 			for row in csv_data:
 				insertconf(row[0], row[1], int(row[2]), int(row[3]), row[4], row[5], row[6])
 		self._db.execute('vacuum analyze rules;')
 
 	def dump_conf(self):
-		return(self._db.prepare("select netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag from rules natural join tag")())
+		return(self._db.prepare("copy (select netmask, redirect_url, from_weekday, to_weekday, from_time, to_time, tag from rules) to stdout csv header")())
 
 # abstract class with basic checking functionality
 class Checker:
-	__slots__ = frozenset(['_db', '_log'])
+	__slots__ = frozenset(['_db', '_log', '_request'])
 
 	def __init__(self):
 		self._db = tagDB()
 		self._log = Logger()
 		self._log.info('started\n')
+		self._request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$')
 
 	def process(self, id, site, ip_address, url_path, line = None):
 		self._log.info('trying {}\n'.format(site))
 		result = self._db.check(site, ip_address)
 		reply = '-'
@@ -102,11 +100,11 @@
 					reply = row[0].format(url_path)
 					break
 		self.writeline('{} {}\n'.format(id, reply))
 
 	def check(self, line):
-		request = re.compile('^([0-9]+)\ (http|ftp):\/\/([-\w.:]+)\/([^ ]*)\ ([0-9.]+)\/(-|[\w\.]+)\ (-|\w+)\ (-|GET|HEAD|POST).*$').match(line)
+		request = self._request.match(line)
 		if request:
 			id = request.group(1)
 			#proto = request.group(2)
 			site = request.group(3)
 			url_path = request.group(4)
@@ -238,10 +236,11 @@
 
 				# detect end of stream and exit if possible
 				if kev.flags >> 15 == 1:
 					self._kq.control([self._select.kevent(sys.stdin, self._select.KQ_FILTER_READ, self._select.KQ_EV_DELETE)], 0)
 					eof = True
+					#timeout = 0
 
 			if len(kevs) == 0:
 				if len(self._queue) > 0:
 					# get one request and process it
 					req = self._queue.pop(0)
@@ -286,13 +285,10 @@
 			help = 'dump database', action = 'store_true', metavar = 'bool',
 			default = False)
 		parser.add_option('-f', '--flush-database', dest = 'flush_db',
 			help = 'flush previous database on load', default = False,
 			action = 'store_true', metavar = 'bool')
-		parser.add_option('-F', '--flush-site', dest = 'flush_site',
-			help = 'when flushing previous dtabase flush site index too',
-			action = 'store_true', default = False, metavar = 'bool')
 		parser.add_option('-l', '--load', dest = 'load',
 			help = 'load database', action = 'store_true', metavar = 'bool',
 			default = False)
 		parser.add_option('-D', '--dump-conf', dest = 'dump_conf',
 			help = 'dump filtering rules', default = False, metavar = 'bool',
@@ -332,42 +328,35 @@
 
 if config.options.dump or config.options.load or config.options.dump_conf or config.options.load_conf:
 	import csv
 
 	tagdb = tagDB()
-	data_fields = ['site', 'tags', 'regexp']
+	data_fields = ['site', 'tag', 'regexp']
 	conf_fields = ['netmask', 'redirect_url', 'from_weekday', 'to_weekday', 'from_time', 'to_time', 'tag']
 
 	if config.options.dump or config.options.dump_conf:
-		csv_writer = csv.writer(sys.stdout)
-		if config.options.dump:
-			# dumping database
-			csv_writer.writerow(data_fields)
-			for row in tagdb.dump():
-				csv_writer.writerow([row[0], '{' + ','.join(row[1]) + '}', row[2]])
-
-		elif config.options.dump_conf:
-			# dumping rules
-			csv_writer.writerow(conf_fields)
-			for row in tagdb.dump_conf():
-				csv_writer.writerow([row[0], row[1], row[2], row[3], row[4], row[5], '{' + ','.join(row[6]) + '}'])
+		if config.options.dump:
+			dump = tagdb.dump()
+		elif config.options.dump_conf:
+			dump = tagdb.dump_conf()
+
+		for line in dump:
+			sys.stdout.write(line.decode('utf-8'))
 
 	elif config.options.load or config.options.load_conf:
 		csv_reader = csv.reader(sys.stdin)
 		first_row = next(csv_reader)
 
 		if config.options.load:
-			# loading database
-			assert first_row == data_fields, 'File must contain csv data with theese columns: ' + repr(data_fields)
-
-			tagdb.load(csv_reader)
-
-		elif config.options.load_conf:
-			# loading database
-			assert first_row == conf_fields, 'File must contain csv data with theese columns: ' + repr(conf_fields)
-
-			tagdb.load_conf(csv_reader)
+			fields = data_fields
+			load = tagdb.load
+		elif config.options.load_conf:
+			fields = conf_fields
+			load = tagdb.load_conf
+
+		assert first_row == fields, 'File must contain csv data with theese columns: ' + repr(fields)
+		load(csv_reader)
 
 else:
 	# main loop
 	config.section('reactor')
 	if config['reactor'] == 'thread':