Overview
Comment: | 0.1.4: fetch less data, sort posts by post date before posting |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
6ac5737a72eecbce1b491784c7992ca2 |
User & Date: | arcade on 2020-11-18 19:46:12.545 |
Other Links: | manifest | tags |
Context
2020-11-19
| ||
17:43 | 0.1.5: logging, mostly check-in: ec616a2a43 user: arcade tags: trunk | |
2020-11-18
| ||
19:46 | 0.1.4: fetch less data, sort posts by post date before posting check-in: 6ac5737a72 user: arcade tags: trunk | |
18:30 | optimize prelease check-in: 6f950e082f user: arcade tags: trunk | |
Changes
Modified Cargo.toml
from [667f319344]
to [a133f234ff].
1 2 | [package] name = "rsstg" | | | 1 2 3 4 5 6 7 8 9 10 | [package] name = "rsstg" version = "0.1.4" authors = ["arcade"] edition = "2018" [dependencies] config = "*" futures = "*" |
︙ | ︙ |
Modified src/main.rs
from [979a29cb75]
to [363e6b408a].
1 2 3 4 5 6 7 | use config; use tokio; use rss; use chrono::DateTime; use regex::Regex; | > > | 1 2 3 4 5 6 7 8 9 | use std::collections::BTreeMap; use config; use tokio; use rss; use chrono::DateTime; use regex::Regex; |
︙ | ︙ | |||
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | let destination = match real { Some(true) => UserId::new(channel_id), Some(false) | None => UserId::new(row.try_get("owner")?), }; let url: &str = row.try_get("url")?; let mut this_fetch: Option<DateTime<chrono::FixedOffset>> = None; let iv_hash: Option<&str> = row.try_get("iv_hash")?; match rss::Channel::from_url(url) { Ok(feed) => { self.debug(&format!("# title:{:?} ttl:{:?} hours:{:?} days:{:?}", feed.title(), feed.ttl(), feed.skip_hours(), feed.skip_days()))?; for item in feed.items() { let date = match item.pub_date() { Some(feed_date) => DateTime::parse_from_rfc2822(feed_date), None => DateTime::parse_from_rfc3339(&item.dublin_core_ext().unwrap().dates()[0]), }?; let url = item.link().unwrap().to_string(); match sqlx::query("select exists(select true from rsstg_post where url = $1 and source_id = $2) as exists;") .bind(&url) .bind(id) .fetch_one(&self.pool).await { Ok(row) => { let exists: bool = row.try_get("exists")?; if ! exists { | > > > > | | | 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | let destination = match real { Some(true) => UserId::new(channel_id), Some(false) | None => UserId::new(row.try_get("owner")?), }; let url: &str = row.try_get("url")?; let mut this_fetch: Option<DateTime<chrono::FixedOffset>> = None; let iv_hash: Option<&str> = row.try_get("iv_hash")?; let mut posts: BTreeMap<DateTime<chrono::FixedOffset>, String> = BTreeMap::new(); match rss::Channel::from_url(url) { Ok(feed) => { self.debug(&format!("# title:{:?} ttl:{:?} hours:{:?} days:{:?}", feed.title(), feed.ttl(), feed.skip_hours(), feed.skip_days()))?; for item in feed.items() { let date = match item.pub_date() { Some(feed_date) => DateTime::parse_from_rfc2822(feed_date), None => DateTime::parse_from_rfc3339(&item.dublin_core_ext().unwrap().dates()[0]), }?; let url = item.link().unwrap().to_string(); posts.insert(date.clone(), url.clone()); }; for (date, url) in posts.iter() { match sqlx::query("select exists(select true from rsstg_post where url = $1 and source_id = $2) as exists;") .bind(&url) .bind(id) .fetch_one(&self.pool).await { Ok(row) => { let exists: bool = row.try_get("exists")?; if ! exists { if this_fetch == None || *date > this_fetch.unwrap() { this_fetch = Some(*date); } match self.tg.send( match iv_hash { Some(x) => SendMessage::new(destination, format!("<a href=\"https://t.me/iv?url={}&rhash={}\"> </a>{0}", url, x)), None => SendMessage::new(destination, format!("{}", url)), }.parse_mode(types::ParseMode::Html)).await { Ok(_) => { match sqlx::query("insert into rsstg_post (source_id, posted, url) values ($1, $2, $3);") |
︙ | ︙ | |||
111 112 113 114 115 116 117 118 119 120 121 122 123 124 | } }, Err(err) => { self.debug(&err.to_string())?; }, }; }; }, Err(err) => { self.debug(&err.to_string())?; }, }; match sqlx::query("update rsstg_source set last_scrape = now() where source_id = $1;") .bind(id) | > | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | } }, Err(err) => { self.debug(&err.to_string())?; }, }; }; posts.clear(); }, Err(err) => { self.debug(&err.to_string())?; }, }; match sqlx::query("update rsstg_source set last_scrape = now() where source_id = $1;") .bind(id) |
︙ | ︙ | |||
174 175 176 177 178 179 180 | async fn autofetch(&self) -> Result<()> { let mut delay = chrono::Duration::minutes(5); let mut next_fetch: DateTime<chrono::Local>; let mut now; loop { self.debug("cycle")?; | > | < | 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | async fn autofetch(&self) -> Result<()> { let mut delay = chrono::Duration::minutes(5); let mut next_fetch: DateTime<chrono::Local>; let mut now; loop { self.debug("cycle")?; now = chrono::Local::now(); let mut rows = sqlx::query("select source_id, next_fetch from rsstg_order natural left join rsstg_source natural left join rsstg_channel where next_fetch < now();") .fetch(&self.pool); while let Some(row) = rows.try_next().await.unwrap() { let source_id: i32 = row.try_get("source_id")?; next_fetch = row.try_get("next_fetch")?; if next_fetch < now { match sqlx::query("update rsstg_source set last_scrape = now() + interval '1 hour' where source_id = $1;") .bind(source_id) .execute(&self.pool).await { Ok(_) => {}, |
︙ | ︙ |