Overview
| Comment: | number of small tweaks, use url crate to parse links, simplify and comment |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA3-256: |
dc2089ff6a195fe3534b5c3783644dbc |
| User & Date: | arcade on 2026-01-07 07:30:15.804 |
| Other Links: | manifest | tags |
Context
|
2026-01-07
| ||
| 07:40 | release 0.5.3 Leaf check-in: 7393d62235 user: arcade tags: release, v0.5.3 | |
| 07:30 | number of small tweaks, use url crate to parse links, simplify and comment Closed-Leaf check-in: dc2089ff6a user: arcade tags: trunk | |
|
2026-01-06
| ||
| 15:25 | limit some code to debug builds, add proper error handling when parsing RSS check-in: 5d11e7d390 user: arcade tags: trunk | |
Changes
Modified Cargo.lock
from [bcfaa49062]
to [beed9b6e19].
| ︙ | ︙ | |||
2010 2011 2012 2013 2014 2015 2016 | "derive_builder", "never", "quick-xml", ] [[package]] name = "rsstg" | | > | 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 | "derive_builder", "never", "quick-xml", ] [[package]] name = "rsstg" version = "0.5.3" dependencies = [ "async-compat", "atom_syndication", "chrono", "config", "futures", "futures-util", "lazy_static", "regex", "reqwest", "rss", "sedregex", "smol", "sqlx", "stacked_errors", "tgbot", "url", ] [[package]] name = "rustc-hash" version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" |
| ︙ | ︙ | |||
2239 2240 2241 2242 2243 2244 2245 | "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" | | | | 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 | "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", "serde", "serde_core", "zmij", ] |
| ︙ | ︙ | |||
2661 2662 2663 2664 2665 2666 2667 | name = "subtle" version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" | | | | 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 | name = "subtle" version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] |
| ︙ | ︙ | |||
2979 2980 2981 2982 2983 2984 2985 | name = "typenum" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unicase" | | | | 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 | name = "typenum" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unicase" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-bidi" version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" |
| ︙ | ︙ | |||
3627 3628 3629 3630 3631 3632 3633 | "quote", "syn", "synstructure", ] [[package]] name = "zerocopy" | | | | | | 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 | "quote", "syn", "synstructure", ] [[package]] name = "zerocopy" version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fabae64378cb18147bb18bca364e63bdbe72a0ffe4adf0addfec8aa166b2c56" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9c2d862265a8bb4471d87e033e730f536e2a285cc7cb05dbce09a2a97075f90" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] |
| ︙ | ︙ |
Modified Cargo.toml
from [57e33f525d]
to [df6a97a5f1].
1 2 | [package] name = "rsstg" | | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
[package]
name = "rsstg"
version = "0.5.3"
authors = ["arcade"]
edition = "2021"
[dependencies]
async-compat = "0.2.5"
atom_syndication = { version = "0.12.4", features = [ "with-serde" ] }
chrono = "0.4.38"
config = { version = "0.15", default-features = false, features = [ "toml" ] }
tgbot = "0.41"
futures = "0.3.30"
futures-util = "0.3.30"
lazy_static = "1.5.0"
regex = "1.10.6"
reqwest = { version = "0.13.1", features = [ "brotli", "socks", "deflate" ]}
rss = "2.0.9"
sedregex = "0.2.5"
smol = "2.0.2"
stacked_errors = "0.7.1"
sqlx = { version = "0.8", features = [ "postgres", "runtime-tokio-rustls", "chrono", "macros" ], default-features = false }
url = "2.5.8"
[profile.release]
lto = true
codegen-units = 1
|
Modified rsstg.sql
from [6bfa596be6]
to [5ade9090d5].
| ︙ | ︙ | |||
18 19 20 21 22 23 24 |
create table rsstg_post (
source_id integer not null,
posted timestamptz not null,
url text not null,
hour smallint not null generated always as (extract('hour' from posted at time zone 'utc')) stored,
hxm smallint not null generated always as (hxm(posted)) stored,
| | | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
create table rsstg_post (
source_id integer not null,
posted timestamptz not null,
url text not null,
hour smallint not null generated always as (extract('hour' from posted at time zone 'utc')) stored,
hxm smallint not null generated always as (hxm(posted)) stored,
FOREIGN KEY (source_id) REFERENCES rsstg_source(source_id) on delete cascade
);
create unique index rsstg_post__url on rsstg_post(url);
create index rsstg_post__hour on rsstg_post(hour);
create index rsstg_post__posted_hour on rsstg_post(posted,hour);
create index rsstg_post__hxm on rsstg_post(hxm);
create index rsstg_post__posted_hxm on rsstg_post(posted,hxm);
|
| ︙ | ︙ |
Modified src/command.rs
from [80a8df9d63]
to [f2ddddf16c].
| ︙ | ︙ | |||
12 13 14 15 16 17 18 19 20 21 |
ChatMember,
ChatUsername,
GetChat,
GetChatAdministrators,
Message,
ParseMode::MarkdownV2,
};
lazy_static! {
static ref RE_USERNAME: Regex = Regex::new(r"^@([a-zA-Z][a-zA-Z0-9_]+)$").unwrap();
| > < | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
ChatMember,
ChatUsername,
GetChat,
GetChatAdministrators,
Message,
ParseMode::MarkdownV2,
};
use url::Url;
lazy_static! {
static ref RE_USERNAME: Regex = Regex::new(r"^@([a-zA-Z][a-zA-Z0-9_]+)$").unwrap();
static ref RE_IV_HASH: Regex = Regex::new(r"^[a-f0-9]{14}$").unwrap();
}
pub async fn start (core: &Core, msg: &Message) -> Result<()> {
core.send("We are open\\. Probably\\. Visit [channel](https://t.me/rsstg_bot_help/3) for details\\.",
Some(msg.chat.get_id()), Some(MarkdownV2)).await.stack()?;
Ok(())
|
| ︙ | ︙ | |||
91 92 93 94 95 96 97 |
bail!("Usernames should be something like \"@\\[a\\-zA\\-Z]\\[a\\-zA\\-Z0\\-9\\_]+\", aren't they?\nNot {channel:?}");
},
};
*/
if ! RE_USERNAME.is_match(channel) {
bail!("Usernames should be something like \"@\\[a\\-zA\\-Z]\\[a\\-zA\\-Z0\\-9\\_]+\", aren't they?\nNot {channel:?}");
};
| > > > | > > | > > | 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
bail!("Usernames should be something like \"@\\[a\\-zA\\-Z]\\[a\\-zA\\-Z0\\-9\\_]+\", aren't they?\nNot {channel:?}");
},
};
*/
if ! RE_USERNAME.is_match(channel) {
bail!("Usernames should be something like \"@\\[a\\-zA\\-Z]\\[a\\-zA\\-Z0\\-9\\_]+\", aren't they?\nNot {channel:?}");
};
{
let parsed_url = Url::parse(url)
.stack_err("Expecting a valid link to ATOM/RSS feed.")?;
match parsed_url.scheme() {
"http" | "https" => {},
scheme => {
bail!("Unsupported URL scheme: {scheme}");
},
};
}
let iv_hash = match iv_hash {
Some(hash) => {
match hash.as_ref() {
"-" => None,
thing => {
if ! RE_IV_HASH.is_match(thing) {
|
| ︙ | ︙ |
Modified src/core.rs
from [1349238f3f]
to [170e5288e9].
| ︙ | ︙ | |||
51 52 53 54 55 56 57 |
bail,
};
lazy_static!{
pub static ref RE_SPECIAL: Regex = Regex::new(r"([\-_*\[\]()~`>#+|{}\.!])").unwrap();
}
| | | | | 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
bail,
};
lazy_static!{
pub static ref RE_SPECIAL: Regex = Regex::new(r"([\-_*\[\]()~`>#+|{}\.!])").unwrap();
}
/// Escape characters that are special in Telegram MarkdownV2 by prefixing them with a backslash.
///
/// This ensures the returned string can be used as MarkdownV2-formatted Telegram message content
/// without special characters being interpreted as MarkdownV2 markup.
pub fn encode (text: &str) -> Cow<'_, str> {
RE_SPECIAL.replace_all(text, "\\$1")
}
// This one does nothing except making sure only one token exists for each id
pub struct Token {
running: Arc<Mutex<HashSet<i32>>>,
|
| ︙ | ︙ | |||
310 311 312 313 314 315 316 |
}
};
for (date, post) in posts.iter() {
let post_url: Cow<str> = match source.url_re {
Some(ref x) => sedregex::ReplaceCommand::new(x).stack()?.execute(&post.uri),
None => post.uri.clone().into(),
};
| | < | | | | | | | | < < | > | 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
}
};
for (date, post) in posts.iter() {
let post_url: Cow<str> = match source.url_re {
Some(ref x) => sedregex::ReplaceCommand::new(x).stack()?.execute(&post.uri),
None => post.uri.clone().into(),
};
if ! conn.exists(&post_url, id).await.stack()? {
if this_fetch.is_none() || *date > this_fetch.unwrap() {
this_fetch = Some(*date);
};
self.send( match &source.iv_hash {
Some(hash) => format!("<a href=\"https://t.me/iv?url={post_url}&rhash={hash}\"> </a>{post_url}"),
None => format!("{post_url}"),
}, Some(destination), Some(ParseMode::Html)).await.stack()?;
conn.add_post(id, date, &post_url).await.stack()?;
posted += 1;
};
};
posts.clear();
Ok(format!("Posted: {posted}"))
}
async fn autofetch(&self) -> Result<std::time::Duration> {
let mut delay = chrono::Duration::minutes(1);
|
| ︙ | ︙ |
Modified src/sql.rs
from [6bfecdc0d2]
to [ccfc867118].
| ︙ | ︙ | |||
146 147 148 149 150 151 152 |
.execute(&mut *self.0).await.stack()?.rows_affected() {
1 => { Ok("Source enabled.") },
0 => { Ok("Source not found.") },
_ => { bail!("Database error.") },
}
}
| | | | > > | | > > | 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
.execute(&mut *self.0).await.stack()?.rows_affected() {
1 => { Ok("Source enabled.") },
0 => { Ok("Source not found.") },
_ => { bail!("Database error.") },
}
}
pub async fn exists <I> (&mut self, post_url: &str, id: I) -> Result<bool>
where I: Into<i64> {
let row = sqlx::query("select exists(select true from rsstg_post where url = $1 and source_id = $2) as exists;")
.bind(post_url)
.bind(id.into())
.fetch_one(&mut *self.0).await.stack()?;
if let Some(exists) = row.try_get("exists").stack()? {
Ok(exists)
} else {
bail!("Database error: can't check whether source exists.");
}
}
/// Get all pending events for (now + 1 minute)
pub async fn get_queue (&mut self) -> Result<Vec<Queue>> {
let block: Vec<Queue> = sqlx::query_as("select source_id, next_fetch, owner, last_scrape from rsstg_order natural left join rsstg_source where next_fetch < now() + interval '1 minute';")
.fetch_all(&mut *self.0).await.stack()?;
Ok(block)
}
pub async fn get_list <I> (&mut self, owner: I) -> Result<Vec<List>>
|
| ︙ | ︙ |