Overview
Comment: | * instead of fuzzy logic with merging many tags splitted some functionality from mark() which only works now with single tag, for many tags there is a set() function; * added option to drop site table. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | master | trunk |
Files: | files | file ages | folders |
SHA3-256: |
7224844efa8f0750b5354bc106903fa9 |
User & Date: | c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 on 2010-03-18 15:55:11.000 |
Other Links: | branch diff | manifest | tags |
Context
2010-03-22
| ||
15:32 | added sorting by site check-in: c27c7eb208 user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk | |
2010-03-18
| ||
15:55 | * instead of fuzzy logic with merging many tags splitted some functionality from mark() which only works now with single tag, for many tags there is a set() function; * added option to drop site table. check-in: 7224844efa user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk | |
15:26 | * reworked indexes * added scripts to dump/load db check-in: f1bafd194a user: c.kworr@d4daf22a-8aaf-11de-a64d-234b64dd91b4 tags: master, trunk | |
Changes
Modified database.sql
from [83b3f67d5d]
to [bb384fd21a].
︙ | ︙ | |||
132 133 134 135 136 137 138 | -- this function adds tag to domain CREATE or replace FUNCTION mark(domain text, new_tag text) RETURNS integer LANGUAGE sql immutable STRICT AS $$ select mark(get_site($1), $2) as result; $$; | | | | > > | | | | > > > > > > > > > > > > > > > > | | 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | -- this function adds tag to domain CREATE or replace FUNCTION mark(domain text, new_tag text) RETURNS integer LANGUAGE sql immutable STRICT AS $$ select mark(get_site($1), $2) as result; $$; -- this function sets tags for site/regexp pair CREATE or replace FUNCTION set(my_id_site integer, my_id_tag integer) RETURNS integer LANGUAGE sql STRICT AS $$ delete from urls where $1 = id_site and regexp is null; insert into urls (id_site, id_tag) values ($1, $2); select $1; $$; -- this function sets tags for site/regexp pair CREATE or replace FUNCTION set(my_id_site integer, my_id_tag integer, my_regexp text) RETURNS integer LANGUAGE sql STRICT AS $$ delete from urls where $1 = id_site and $3 = regexp; insert into urls (id_site, id_tag, regexp) values ($1, $2, $3); select $1; $$; -- this function stores new data for site/regexp pair create or replace function set(domain text, tags text, regexp text) returns integer language sql immutable strict as $$ select set(get_site($1), get_tag($2::text[]), $3); $$; -- this function stores new data for site/regexp pair create or replace function set(domain text, tags text) returns integer language sql immutable strict as $$ select set(get_site($1), get_tag($2::text[])); $$; -- this function returns id of tag array create or replace function get_tag(my_tag text[]) returns integer language plpgsql strict as $$ declare |
︙ | ︙ |
Modified st-load.py
from [3246bbaf0b]
to [02168859d1].
︙ | ︙ | |||
35 36 37 38 39 40 41 | config['user'], config['password'], config['host'], config['database'], ) ) def load(self, csv_data): | | | | > | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | config['user'], config['password'], config['host'], config['database'], ) ) def load(self, csv_data): insert = self._db.prepare("select set($1, $2, $3)") with self._db.xact(): config.section('loader') if config['drop_database']: self._db.execute('delete from urls;') if config['drop_site']: self._db.execute('delete from site;'); for row in csv_data: insert(row[0], row[1], row[2]) self._db.execute('vacuum analyze site;') self._db.execute('vacuum analyze urls;') # this classes processes config file and substitutes default values class Config: |
︙ | ︙ | |||
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | 'user': 'squidTag', 'password': 'password', 'host': 'localhost', 'database': 'squidTag', }, 'loader': { 'drop_database': False, },} # function to read in config file def __init__(self): parser = optparse.OptionParser() parser.add_option('-c', '--config', dest = 'config', help = 'config file location', metavar = 'FILE', default = '/usr/local/etc/squid-tagger.conf') parser.add_option('-d', '--drop-database', dest = 'drop_database', help = 'signals loader to drop previous database', action = 'store_true') (options, args) = parser.parse_args() if options.drop_database: self._default['loader']['drop_database'] = True if not os.access(options.config, os.R_OK): print("Can't read {}: exitting".format(options.config)) sys.exit(2) self._config = configparser.ConfigParser() self._config.readfp(open(options.config)) | > > > > > > > | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | 'user': 'squidTag', 'password': 'password', 'host': 'localhost', 'database': 'squidTag', }, 'loader': { 'drop_database': False, 'drop_site': False, },} # function to read in config file def __init__(self): parser = optparse.OptionParser() parser.add_option('-c', '--config', dest = 'config', help = 'config file location', metavar = 'FILE', default = '/usr/local/etc/squid-tagger.conf') parser.add_option('-d', '--drop-database', dest = 'drop_database', help = 'signals loader to drop previous database', action = 'store_true') parser.add_option('-D', '--drop-site', dest = 'drop_site', help = 'signals loader to drop not only url definitions but site index too', action = 'store_true') (options, args) = parser.parse_args() if options.drop_database: self._default['loader']['drop_database'] = True if options.drop_site: self._default['loader']['drop_site'] = True if not os.access(options.config, os.R_OK): print("Can't read {}: exitting".format(options.config)) sys.exit(2) self._config = configparser.ConfigParser() self._config.readfp(open(options.config)) |
︙ | ︙ |