Index: src/core.rs ================================================================== --- src/core.rs +++ src/core.rs @@ -53,11 +53,14 @@ lazy_static!{ pub static ref RE_SPECIAL: Regex = Regex::new(r"([\-_*\[\]()~`>#+|{}\.!])").unwrap(); } -/// Encodes special HTML entities to prevent them interfering with Telegram HTML +/// Escape characters that are special in Telegram HTML by prefixing them with a backslash. +/// +/// This ensures the returned string can be used as HTML-formatted Telegram message content +/// without special characters being interpreted as HTML markup. pub fn encode (text: &str) -> Cow<'_, str> { RE_SPECIAL.replace_all(text, "\\$1") } // This one does nothing except making sure only one token exists for each id @@ -126,10 +129,22 @@ authors: String, summary: String, } impl Core { + /// Create a Core instance from configuration and start its background autofetch loop. + /// + /// The provided `settings` must include: + /// - `owner` (integer): chat id to use as the default destination, + /// - `api_key` (string): Telegram bot API key, + /// - `api_gateway` (string): Telegram API gateway host, + /// - `pg` (string): PostgreSQL connection string, + /// - optional `proxy` (string): proxy URL for the HTTP client. + /// + /// On success returns an initialized `Core` with Telegram and HTTP clients, database connection, + /// an empty running set for per-id tokens, and a spawned background task that periodically runs + /// `autofetch`. If any required setting is missing or initialization fails, an error is returned. pub async fn new(settings: config::Config) -> Result { let owner_chat = ChatPeerId::from(settings.get_int("owner").stack()?); let api_key = settings.get_string("api_key").stack()?; let tg = Client::new(&api_key).stack()? .with_host(settings.get_string("api_gateway").stack()?); @@ -178,10 +193,26 @@ SendMessage::new(target, msg) .with_parse_mode(mode) ).await.stack() } + /// Fetches the feed for a source, sends any newly discovered posts to the appropriate chat, and records them in the database. + /// + /// This acquires a per-source guard to prevent concurrent checks for the same `id`. If a check is already running for + /// the given `id`, the function returns an error. If `last_scrape` is provided, it is sent as the `If-Modified-Since` + /// header to the feed request. The function parses RSS or Atom feeds, sends unseen post URLs to either the source's + /// channel (when `real` is true) or the source owner (when `real` is false), and persists posted entries so they are + /// not reposted later. + /// + /// Parameters: + /// - `id`: Identifier of the source to check. + /// - `real`: When `true`, send posts to the source's channel; when `false`, send to the source owner. + /// - `last_scrape`: Optional timestamp used to set the `If-Modified-Since` header for the HTTP request. + /// + /// # Returns + /// + /// `Posted: N` where `N` is the number of posts processed and sent. pub async fn check (&self, id: i32, real: bool, last_scrape: Option>) -> Result { let mut posted: i32 = 0; let mut conn = self.db.begin().await.stack()?; let _token = Token::new(&self.running, id).await.stack()?; @@ -214,11 +245,18 @@ for item in feed.items() { if let Some(link) = item.link() { let date = match item.pub_date() { Some(feed_date) => DateTime::parse_from_rfc2822(feed_date), None => DateTime::parse_from_rfc3339(match item.dublin_core_ext() { - Some(dates) => &dates.dates()[0], + Some(ext) => { + let dates = ext.dates(); + if dates.is_empty() { + bail!("Feed item has Dublin Core extension but no dates.") + } else { + &dates[0] + } + }, None => bail!("Feed item misses posting date."), }), }.stack()?; let uri = link.to_string(); let title = item.title().unwrap_or("").to_string();