Index: Cargo.lock ================================================================== --- Cargo.lock +++ Cargo.lock @@ -2012,11 +2012,11 @@ "quick-xml", ] [[package]] name = "rsstg" -version = "0.5.2" +version = "0.5.3" dependencies = [ "async-compat", "atom_syndication", "chrono", "config", @@ -2029,10 +2029,11 @@ "sedregex", "smol", "sqlx", "stacked_errors", "tgbot", + "url", ] [[package]] name = "rustc-hash" version = "2.1.1" @@ -2241,13 +2242,13 @@ "syn", ] [[package]] name = "serde_json" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", "serde", "serde_core", @@ -2663,13 +2664,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.113" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678faa00651c9eb72dd2020cbdf275d92eccb2400d568e419efdd64838145cb4" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] @@ -2981,13 +2982,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unicase" -version = "2.8.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-bidi" version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -3629,22 +3630,22 @@ "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "1fabae64378cb18147bb18bca364e63bdbe72a0ffe4adf0addfec8aa166b2c56" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "c9c2d862265a8bb4471d87e033e730f536e2a285cc7cb05dbce09a2a97075f90" dependencies = [ "proc-macro2", "quote", "syn", ] Index: Cargo.toml ================================================================== --- Cargo.toml +++ Cargo.toml @@ -1,8 +1,8 @@ [package] name = "rsstg" -version = "0.5.2" +version = "0.5.3" authors = ["arcade"] edition = "2021" [dependencies] async-compat = "0.2.5" @@ -18,9 +18,10 @@ rss = "2.0.9" sedregex = "0.2.5" smol = "2.0.2" stacked_errors = "0.7.1" sqlx = { version = "0.8", features = [ "postgres", "runtime-tokio-rustls", "chrono", "macros" ], default-features = false } +url = "2.5.8" [profile.release] lto = true codegen-units = 1 Index: rsstg.sql ================================================================== --- rsstg.sql +++ rsstg.sql @@ -20,11 +20,11 @@ source_id integer not null, posted timestamptz not null, url text not null, hour smallint not null generated always as (extract('hour' from posted at time zone 'utc')) stored, hxm smallint not null generated always as (hxm(posted)) stored, - FOREIGN KEY (source_id) REFERENCES rsstg_source(source_id) on delete cascade, + FOREIGN KEY (source_id) REFERENCES rsstg_source(source_id) on delete cascade ); create unique index rsstg_post__url on rsstg_post(url); create index rsstg_post__hour on rsstg_post(hour); create index rsstg_post__posted_hour on rsstg_post(posted,hour); create index rsstg_post__hxm on rsstg_post(hxm); Index: src/command.rs ================================================================== --- src/command.rs +++ src/command.rs @@ -14,14 +14,14 @@ GetChat, GetChatAdministrators, Message, ParseMode::MarkdownV2, }; +use url::Url; lazy_static! { static ref RE_USERNAME: Regex = Regex::new(r"^@([a-zA-Z][a-zA-Z0-9_]+)$").unwrap(); - static ref RE_LINK: Regex = Regex::new(r"^https?://[a-zA-Z.0-9-]+/[-_a-zA-Z.:;0-9/?=]+$").unwrap(); static ref RE_IV_HASH: Regex = Regex::new(r"^[a-f0-9]{14}$").unwrap(); } pub async fn start (core: &Core, msg: &Message) -> Result<()> { core.send("We are open\\. Probably\\. Visit [channel](https://t.me/rsstg_bot_help/3) for details\\.", @@ -93,12 +93,19 @@ }; */ if ! RE_USERNAME.is_match(channel) { bail!("Usernames should be something like \"@\\[a\\-zA\\-Z]\\[a\\-zA\\-Z0\\-9\\_]+\", aren't they?\nNot {channel:?}"); }; - if ! RE_LINK.is_match(url) { - bail!("Link should be a link to atom/rss feed, something like \"https://domain/path\".\nNot {url:?}"); + { + let parsed_url = Url::parse(url) + .stack_err("Expecting a valid link to ATOM/RSS feed.")?; + match parsed_url.scheme() { + "http" | "https" => {}, + scheme => { + bail!("Unsupported URL scheme: {scheme}"); + }, + }; } let iv_hash = match iv_hash { Some(hash) => { match hash.as_ref() { "-" => None, Index: src/core.rs ================================================================== --- src/core.rs +++ src/core.rs @@ -53,14 +53,14 @@ lazy_static!{ pub static ref RE_SPECIAL: Regex = Regex::new(r"([\-_*\[\]()~`>#+|{}\.!])").unwrap(); } -/// Escape characters that are special in Telegram HTML by prefixing them with a backslash. +/// Escape characters that are special in Telegram MarkdownV2 by prefixing them with a backslash. /// -/// This ensures the returned string can be used as HTML-formatted Telegram message content -/// without special characters being interpreted as HTML markup. +/// This ensures the returned string can be used as MarkdownV2-formatted Telegram message content +/// without special characters being interpreted as MarkdownV2 markup. pub fn encode (text: &str) -> Cow<'_, str> { RE_SPECIAL.replace_all(text, "\\$1") } // This one does nothing except making sure only one token exists for each id @@ -312,23 +312,21 @@ for (date, post) in posts.iter() { let post_url: Cow = match source.url_re { Some(ref x) => sedregex::ReplaceCommand::new(x).stack()?.execute(&post.uri), None => post.uri.clone().into(), }; - if let Some(exists) = conn.exists(&post_url, id).await.stack()? { - if ! exists { - if this_fetch.is_none() || *date > this_fetch.unwrap() { - this_fetch = Some(*date); - }; - self.send( match &source.iv_hash { - Some(hash) => format!(" {post_url}"), - None => format!("{post_url}"), - }, Some(destination), Some(ParseMode::Html)).await.stack()?; - conn.add_post(id, date, &post_url).await.stack()?; - }; - }; - posted += 1; + if ! conn.exists(&post_url, id).await.stack()? { + if this_fetch.is_none() || *date > this_fetch.unwrap() { + this_fetch = Some(*date); + }; + self.send( match &source.iv_hash { + Some(hash) => format!(" {post_url}"), + None => format!("{post_url}"), + }, Some(destination), Some(ParseMode::Html)).await.stack()?; + conn.add_post(id, date, &post_url).await.stack()?; + posted += 1; + }; }; posts.clear(); Ok(format!("Posted: {posted}")) } Index: src/sql.rs ================================================================== --- src/sql.rs +++ src/sql.rs @@ -148,20 +148,24 @@ 0 => { Ok("Source not found.") }, _ => { bail!("Database error.") }, } } - pub async fn exists (&mut self, post_url: &str, id: I) -> Result> + pub async fn exists (&mut self, post_url: &str, id: I) -> Result where I: Into { let row = sqlx::query("select exists(select true from rsstg_post where url = $1 and source_id = $2) as exists;") .bind(post_url) .bind(id.into()) .fetch_one(&mut *self.0).await.stack()?; - let exists: Option = row.try_get("exists").stack()?; - Ok(exists) + if let Some(exists) = row.try_get("exists").stack()? { + Ok(exists) + } else { + bail!("Database error: can't check whether source exists."); + } } + /// Get all pending events for (now + 1 minute) pub async fn get_queue (&mut self) -> Result> { let block: Vec = sqlx::query_as("select source_id, next_fetch, owner, last_scrape from rsstg_order natural left join rsstg_source where next_fetch < now() + interval '1 minute';") .fetch_all(&mut *self.0).await.stack()?; Ok(block) }