1
2
3
4
5
6
7
8
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
CREATE PROCEDURAL LANGUAGE plpgsql;
-- general array sorting functions
-- sorts array
CREATE FUNCTION sort(original anyarray) RETURNS anyarray
LANGUAGE sql IMMUTABLE STRICT
AS $_$
select array_agg(item) as result from (select unnest($1) as item order by item) a;
$_$;
-- sorts array and removes duplicates
CREATE FUNCTION usort(original anyarray) RETURNS anyarray
LANGUAGE sql IMMUTABLE STRICT
AS $_$
select array_agg(item) as result from (select distinct unnest($1) as item order by item) a;
$_$;
-- this function adds tag to domain
CREATE FUNCTION mark(domain text, new_tag text) RETURNS void
LANGUAGE plpgsql STRICT
AS $$
declare
my_site text[];
|
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
-
-
+
-
-
+
+
-
-
+
-
-
+
+
|
-- note that tags should be sorted to eliminate permutations
select tag from urls natural join tag where id_site = my_site_id into my_tag;
if not found then
-- no records found - creating new tag
my_tag := array[new_tag];
else
-- joining tags
select array_agg(tag)
from (select distinct unnest(my_tag || array[new_tag]) as tag order by tag asc) a
select usort(my_tag || array[new_tag]) into my_tag;
into my_tag;
-- deleting old site specification
delete from urls where id_site = my_site_id;
end if;
-- selecting new tag id or adding tag to the table
select id_tag from tag where my_tag = tag into my_tag_id;
if not found then
insert into tag (tag) values(my_tag);
select id_tag from tag where my_tag = tag into my_tag_id;
end if;
-- adding new site specification
insert into urls (id_site, id_tag) values (my_site_id, my_tag_id);
end;$$;
end;
$$;
-- this function adds tag to site by site id
CREATE FUNCTION mark(my_site_id smallint, new_tag text) RETURNS void
LANGUAGE plpgsql STRICT
AS $$
declare
-- maybe check should be added to make sure supplied site id really exists
my_tag text[];
my_tag_id smallint;
begin
-- selecting tags site already have and adding new tag to them
-- note that tags should be sorted to eliminate permutations
select tag from urls natural join tag where id_site = my_site_id into my_tag;
if not found then
-- no records found - creating new tag
my_tag := array[new_tag];
else
-- joining tags
select array_agg(tag)
from (select distinct unnest(my_tag || array[new_tag]) as tag order by tag asc) a
select usort(my_tag || array[new_tag]) into my_tag;
into my_tag;
-- deleting old site specification
delete from urls where id_site = my_site_id;
end if;
-- selecting new tag id or adding tag to the table
select id_tag from tag where my_tag = tag into my_tag_id;
if not found then
insert into tag (tag) values(my_tag);
select id_tag from tag where my_tag = tag into my_tag_id;
end if;
-- adding new site specification
insert into urls (id_site, id_tag) values (my_site_id, my_tag_id);
end;$$;
end;
$$;
-- transforms domain into ordered array for indexing
CREATE FUNCTION tripdomain(url text) RETURNS text[]
LANGUAGE plpgsql IMMUTABLE STRICT
AS $_$
declare
result text[];
|
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
-
+
-
+
-
+
-
+
|
id_site serial,
site text[] NOT NULL
);
ALTER TABLE ONLY site
ADD CONSTRAINT site_id PRIMARY KEY (id_site);
CREATE UNIQUE INDEX site_s ON site USING btree (site);
CREATE UNIQUE INDEX site_s ON site (usort(site));
CREATE INDEX site_sg ON site USING gin (site);
CREATE INDEX site_g ON site USING gin (site);
-- table to hold tag combinations
CREATE TABLE tag (
id_tag serial,
tag text[] NOT NULL
);
ALTER TABLE ONLY tag
ADD CONSTRAINT tag_id PRIMARY KEY (id_tag);
CREATE INDEX tag_g ON tag USING gin (tag);
CREATE UNIQUE INDEX tag_s ON tag (usort(tag));
CREATE UNIQUE INDEX tag_s ON tag USING btree (tag);
CREATE INDEX tag_g ON tag USING gin (tag);
-- table to hold tag - site links
CREATE TABLE urls (
date_added timestamp without time zone DEFAULT ('now'::text)::timestamp(0) without time zone NOT NULL,
id_site smallint NOT NULL,
id_tag smallint NOT NULL,
regex text
|