Check-in [dc2089ff6a]
Logged in as anonymous
Overview
Comment:number of small tweaks, use url crate to parse links, simplify and comment
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: dc2089ff6a195fe3534b5c3783644dbcc3c463e25414cbd7bc83036701a6e789
User & Date: arcade on 2026-01-07 07:30:15.804
Other Links: manifest | tags
Context
2026-01-07
07:40
release 0.5.3 Leaf check-in: 7393d62235 user: arcade tags: release, v0.5.3
07:30
number of small tweaks, use url crate to parse links, simplify and comment Closed-Leaf check-in: dc2089ff6a user: arcade tags: trunk
2026-01-06
15:25
limit some code to debug builds, add proper error handling when parsing RSS check-in: 5d11e7d390 user: arcade tags: trunk
Changes
2010
2011
2012
2013
2014
2015
2016
2017

2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033

2034
2035
2036
2037
2038
2039
2040
2010
2011
2012
2013
2014
2015
2016

2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041







-
+
















+







 "derive_builder",
 "never",
 "quick-xml",
]

[[package]]
name = "rsstg"
version = "0.5.2"
version = "0.5.3"
dependencies = [
 "async-compat",
 "atom_syndication",
 "chrono",
 "config",
 "futures",
 "futures-util",
 "lazy_static",
 "regex",
 "reqwest",
 "rss",
 "sedregex",
 "smol",
 "sqlx",
 "stacked_errors",
 "tgbot",
 "url",
]

[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
2239
2240
2241
2242
2243
2244
2245
2246

2247
2248

2249
2250
2251
2252
2253
2254
2255
2240
2241
2242
2243
2244
2245
2246

2247
2248

2249
2250
2251
2252
2253
2254
2255
2256







-
+

-
+







 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "serde_json"
version = "1.0.148"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da"
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [
 "itoa",
 "memchr",
 "serde",
 "serde_core",
 "zmij",
]
2661
2662
2663
2664
2665
2666
2667
2668

2669
2670

2671
2672
2673
2674
2675
2676
2677
2662
2663
2664
2665
2666
2667
2668

2669
2670

2671
2672
2673
2674
2675
2676
2677
2678







-
+

-
+







name = "subtle"
version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"

[[package]]
name = "syn"
version = "2.0.113"
version = "2.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678faa00651c9eb72dd2020cbdf275d92eccb2400d568e419efdd64838145cb4"
checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
dependencies = [
 "proc-macro2",
 "quote",
 "unicode-ident",
]

[[package]]
2979
2980
2981
2982
2983
2984
2985
2986

2987
2988

2989
2990
2991
2992
2993
2994
2995
2980
2981
2982
2983
2984
2985
2986

2987
2988

2989
2990
2991
2992
2993
2994
2995
2996







-
+

-
+







name = "typenum"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"

[[package]]
name = "unicase"
version = "2.8.1"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"

[[package]]
name = "unicode-bidi"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"

3627
3628
3629
3630
3631
3632
3633
3634

3635
3636

3637
3638
3639
3640
3641
3642
3643

3644
3645

3646
3647
3648
3649
3650
3651
3652
3628
3629
3630
3631
3632
3633
3634

3635
3636

3637
3638
3639
3640
3641
3642
3643

3644
3645

3646
3647
3648
3649
3650
3651
3652
3653







-
+

-
+






-
+

-
+







 "quote",
 "syn",
 "synstructure",
]

[[package]]
name = "zerocopy"
version = "0.8.31"
version = "0.8.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
checksum = "1fabae64378cb18147bb18bca364e63bdbe72a0ffe4adf0addfec8aa166b2c56"
dependencies = [
 "zerocopy-derive",
]

[[package]]
name = "zerocopy-derive"
version = "0.8.31"
version = "0.8.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
checksum = "c9c2d862265a8bb4471d87e033e730f536e2a285cc7cb05dbce09a2a97075f90"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
1
2
3

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

23
24
25
26
1
2

3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27


-
+



















+




[package]
name = "rsstg"
version = "0.5.2"
version = "0.5.3"
authors = ["arcade"]
edition = "2021"

[dependencies]
async-compat = "0.2.5"
atom_syndication = { version = "0.12.4", features = [ "with-serde" ] }
chrono = "0.4.38"
config = { version = "0.15", default-features = false, features = [ "toml" ] }
tgbot = "0.41"
futures = "0.3.30"
futures-util = "0.3.30"
lazy_static = "1.5.0"
regex = "1.10.6"
reqwest = { version = "0.13.1", features = [ "brotli", "socks", "deflate" ]}
rss = "2.0.9"
sedregex = "0.2.5"
smol = "2.0.2"
stacked_errors = "0.7.1"
sqlx = { version = "0.8", features = [ "postgres", "runtime-tokio-rustls", "chrono", "macros" ], default-features = false }
url = "2.5.8"

[profile.release]
lto = true
codegen-units = 1
Modified rsstg.sql from [6bfa596be6] to [5ade9090d5].
18
19
20
21
22
23
24
25

26
27
28
29
30
31
32
18
19
20
21
22
23
24

25
26
27
28
29
30
31
32







-
+








create table rsstg_post (
	source_id integer not null,
	posted timestamptz not null,
	url text not null,
	hour smallint not null generated always as (extract('hour' from posted at time zone 'utc')) stored,
	hxm smallint not null generated always as (hxm(posted)) stored,
	FOREIGN KEY (source_id) REFERENCES rsstg_source(source_id) on delete cascade,
	FOREIGN KEY (source_id) REFERENCES rsstg_source(source_id) on delete cascade
);
create unique index rsstg_post__url on rsstg_post(url);
create index rsstg_post__hour on rsstg_post(hour);
create index rsstg_post__posted_hour on rsstg_post(posted,hour);
create index rsstg_post__hxm on rsstg_post(hxm);
create index rsstg_post__posted_hxm on rsstg_post(posted,hxm);

12
13
14
15
16
17
18

19
20
21
22
23
24
25
26
27
28
29
12
13
14
15
16
17
18
19
20
21
22

23
24
25
26
27
28
29







+



-







	ChatMember,
	ChatUsername,
	GetChat,
	GetChatAdministrators,
	Message,
	ParseMode::MarkdownV2,
};
use url::Url;

lazy_static! {
	static ref RE_USERNAME: Regex = Regex::new(r"^@([a-zA-Z][a-zA-Z0-9_]+)$").unwrap();
	static ref RE_LINK: Regex = Regex::new(r"^https?://[a-zA-Z.0-9-]+/[-_a-zA-Z.:;0-9/?=]+$").unwrap();
	static ref RE_IV_HASH: Regex = Regex::new(r"^[a-f0-9]{14}$").unwrap();
}

pub async fn start (core: &Core, msg: &Message) -> Result<()> {
	core.send("We are open\\. Probably\\. Visit [channel](https://t.me/rsstg_bot_help/3) for details\\.",
		Some(msg.chat.get_id()), Some(MarkdownV2)).await.stack()?;
	Ok(())
91
92
93
94
95
96
97



98
99






100
101
102
103
104
105
106
91
92
93
94
95
96
97
98
99
100


101
102
103
104
105
106
107
108
109
110
111
112
113







+
+
+
-
-
+
+
+
+
+
+







			bail!("Usernames should be something like \"@\\[a\\-zA\\-Z]\\[a\\-zA\\-Z0\\-9\\_]+\", aren't they?\nNot {channel:?}");
		},
	};
	*/
	if ! RE_USERNAME.is_match(channel) {
		bail!("Usernames should be something like \"@\\[a\\-zA\\-Z]\\[a\\-zA\\-Z0\\-9\\_]+\", aren't they?\nNot {channel:?}");
	};
	{
		let parsed_url = Url::parse(url)
			.stack_err("Expecting a valid link to ATOM/RSS feed.")?;
	if ! RE_LINK.is_match(url) {
		bail!("Link should be a link to atom/rss feed, something like \"https://domain/path\".\nNot {url:?}");
		match parsed_url.scheme() {
			"http" | "https" => {},
			scheme => {
				bail!("Unsupported URL scheme: {scheme}");
			},
		};
	}
	let iv_hash = match iv_hash {
		Some(hash) => {
			match hash.as_ref() {
				"-" => None,
				thing => {
					if ! RE_IV_HASH.is_match(thing) {
51
52
53
54
55
56
57
58

59
60
61


62
63
64
65
66
67
68
51
52
53
54
55
56
57

58
59


60
61
62
63
64
65
66
67
68







-
+

-
-
+
+







	bail,
};

lazy_static!{
	pub static ref RE_SPECIAL: Regex = Regex::new(r"([\-_*\[\]()~`>#+|{}\.!])").unwrap();
}

/// Escape characters that are special in Telegram HTML by prefixing them with a backslash.
/// Escape characters that are special in Telegram MarkdownV2 by prefixing them with a backslash.
///
/// This ensures the returned string can be used as HTML-formatted Telegram message content
/// without special characters being interpreted as HTML markup.
/// This ensures the returned string can be used as MarkdownV2-formatted Telegram message content
/// without special characters being interpreted as MarkdownV2 markup.
pub fn encode (text: &str) -> Cow<'_, str> {
	RE_SPECIAL.replace_all(text, "\\$1")
}

// This one does nothing except making sure only one token exists for each id
pub struct Token {
	running: Arc<Mutex<HashSet<i32>>>,
310
311
312
313
314
315
316
317

318
319
320
321
322
323
324
325
326








327
328
329


330
331
332
333
334
335
336
310
311
312
313
314
315
316

317









318
319
320
321
322
323
324
325



326
327
328
329
330
331
332
333
334







-
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
+
+







			}
		};
		for (date, post) in posts.iter() {
			let post_url: Cow<str> = match source.url_re {
				Some(ref x) => sedregex::ReplaceCommand::new(x).stack()?.execute(&post.uri),
				None => post.uri.clone().into(),
			};
			if let Some(exists) = conn.exists(&post_url, id).await.stack()? {
			if ! conn.exists(&post_url, id).await.stack()? {
				if ! exists {
					if this_fetch.is_none() || *date > this_fetch.unwrap() {
						this_fetch = Some(*date);
					};
					self.send( match &source.iv_hash {
						Some(hash) => format!("<a href=\"https://t.me/iv?url={post_url}&rhash={hash}\"> </a>{post_url}"),
						None => format!("{post_url}"),
					}, Some(destination), Some(ParseMode::Html)).await.stack()?;
					conn.add_post(id, date, &post_url).await.stack()?;
				if this_fetch.is_none() || *date > this_fetch.unwrap() {
					this_fetch = Some(*date);
				};
				self.send( match &source.iv_hash {
					Some(hash) => format!("<a href=\"https://t.me/iv?url={post_url}&rhash={hash}\"> </a>{post_url}"),
					None => format!("{post_url}"),
				}, Some(destination), Some(ParseMode::Html)).await.stack()?;
				conn.add_post(id, date, &post_url).await.stack()?;
				};
			};
			posted += 1;
				posted += 1;
			};
		};
		posts.clear();
		Ok(format!("Posted: {posted}"))
	}

	async fn autofetch(&self) -> Result<std::time::Duration> {
		let mut delay = chrono::Duration::minutes(1);
146
147
148
149
150
151
152
153

154
155
156
157
158
159
160
161
162








163
164
165
166
167
168
169
146
147
148
149
150
151
152

153
154
155
156
157
158




159
160
161
162
163
164
165
166
167
168
169
170
171
172
173







-
+





-
-
-
-
+
+
+
+
+
+
+
+







			.execute(&mut *self.0).await.stack()?.rows_affected() {
			1 => { Ok("Source enabled.") },
			0 => { Ok("Source not found.") },
			_ => { bail!("Database error.") },
		}
	}

	pub async fn exists <I> (&mut self, post_url: &str, id: I) -> Result<Option<bool>>
	pub async fn exists <I> (&mut self, post_url: &str, id: I) -> Result<bool>
	where I: Into<i64> {
		let row = sqlx::query("select exists(select true from rsstg_post where url = $1 and source_id = $2) as exists;")
			.bind(post_url)
			.bind(id.into())
			.fetch_one(&mut *self.0).await.stack()?;
		let exists: Option<bool> = row.try_get("exists").stack()?;
		Ok(exists)
	}

		if let Some(exists) = row.try_get("exists").stack()? {
			Ok(exists)
		} else {
			bail!("Database error: can't check whether source exists.");
		}
	}

	/// Get all pending events for (now + 1 minute)
	pub async fn get_queue (&mut self) -> Result<Vec<Queue>> {
		let block: Vec<Queue> = sqlx::query_as("select source_id, next_fetch, owner, last_scrape from rsstg_order natural left join rsstg_source where next_fetch < now() + interval '1 minute';")
			.fetch_all(&mut *self.0).await.stack()?;
		Ok(block)
	}

	pub async fn get_list <I> (&mut self, owner: I) -> Result<Vec<List>>