Lines of
src/main.rs
from check-in b2297258c7
that are changed by the sequence of edits moving toward
check-in cd827b4c75:
1: use std::collections::{BTreeMap, HashSet};
2: use std::sync::{Arc, Mutex};
3:
4: use chrono::DateTime;
5: use config;
6: use futures::StreamExt;
7: use regex::Regex;
8: use reqwest;
9: use sqlx::postgres::PgPoolOptions;
10: use sqlx::Row;
11: use tokio;
12:
13: use rss;
14: use atom_syndication;
15:
16: use telegram_bot::*;
17: //use tokio::stream::StreamExt;
18:
19: #[macro_use]
20: extern crate lazy_static;
21:
22: use anyhow::{anyhow, bail, Context, Result};
23:
24: #[derive(Clone)]
25: struct Core {
26: owner: i64,
27: api_key: String,
28: owner_chat: UserId,
29: tg: telegram_bot::Api,
30: my: User,
31: pool: sqlx::Pool<sqlx::Postgres>,
32: sources: Arc<Mutex<HashSet<Arc<i32>>>>,
33: }
34:
35: impl Core {
36: async fn new(settings: config::Config) -> Result<Core> {
37: let owner = settings.get_int("owner")?;
38: let api_key = settings.get_str("api_key")?;
39: let tg = Api::new(&api_key);
40: let core = Core {
41: owner: owner,
42: api_key: api_key.clone(),
43: my: tg.send(telegram_bot::GetMe).await?,
44: tg: tg,
45: owner_chat: UserId::new(owner),
46: pool: PgPoolOptions::new()
47: .max_connections(5)
48: .connect_timeout(std::time::Duration::new(300, 0))
49: .idle_timeout(std::time::Duration::new(60, 0))
50: .connect_lazy(&settings.get_str("pg")?)?,
51: sources: Arc::new(Mutex::new(HashSet::new())),
52: };
53: let clone = core.clone();
54: tokio::spawn(async move {
55: if let Err(err) = &clone.autofetch().await {
56: if let Err(err) = clone.debug(&format!("š {:?}", err), None) {
57: eprintln!("Autofetch error: {}", err);
58: };
59: }
60: });
61: Ok(core)
62: }
63:
64: fn stream(&self) -> telegram_bot::UpdatesStream {
65: self.tg.stream()
66: }
67:
68: fn debug(&self, msg: &str, target: Option<UserId>) -> Result<()> {
69: self.tg.spawn(SendMessage::new(match target {
70: Some(user) => user,
71: None => self.owner_chat,
72: }, msg));
73: Ok(())
74: }
75:
76: async fn check<S>(&self, id: i32, owner: S, real: bool) -> Result<()>
77: where S: Into<i64> {
78: let owner: i64 = owner.into();
79: let id = {
80: let mut set = self.sources.lock().unwrap();
81: match set.get(&id) {
82: Some(id) => id.clone(),
83: None => {
84: let id = Arc::new(id);
85: set.insert(id.clone());
86: id.clone()
87: },
88: }
89: };
90: let count = Arc::strong_count(&id);
91: if count == 2 {
92: let mut conn = self.pool.acquire().await
93: .with_context(|| format!("Query queue fetch conn:\n{:?}", &self.pool))?;
94: let row = sqlx::query("select source_id, channel_id, url, iv_hash, owner from rsstg_source where source_id = $1 and owner = $2")
95: .bind(*id)
96: .bind(owner)
97: .fetch_one(&mut conn).await
98: .with_context(|| format!("Query source:\n{:?}", &self.pool))?;
99: drop(conn);
100: let channel_id: i64 = row.try_get("channel_id")?;
101: let destination = match real {
102: true => UserId::new(channel_id),
103: false => UserId::new(row.try_get("owner")?),
104: };
105: let url: &str = row.try_get("url")?;
106: let mut this_fetch: Option<DateTime<chrono::FixedOffset>> = None;
107: let iv_hash: Option<&str> = row.try_get("iv_hash")?;
108: let mut posts: BTreeMap<DateTime<chrono::FixedOffset>, String> = BTreeMap::new();
109: let content = reqwest::get(url).await?.bytes().await?;
110: //let mut content_ = surf::get(url).await.map_err(|err| anyhow!(err))?;
111: //eprintln!("Data: {:#?}", &content_);
112: //let content = content_.body_bytes().await.map_err(|err| anyhow!(err))?;
113: /*
114: let feed = rss::Channel::read_from(&content[..])
115: .with_context(|| format!("Problem opening feed url:\n{}", &url))?;
116: for item in feed.items() {
117: let date = match item.pub_date() {
118: Some(feed_date) => DateTime::parse_from_rfc2822(feed_date),
119: None => DateTime::parse_from_rfc3339(&item.dublin_core_ext().unwrap().dates()[0]),
120: }?;
121: let url = item.link().unwrap().to_string();
122: posts.insert(date.clone(), url.clone());
123: };
124: */
125: match rss::Channel::read_from(&content[..]) {
126: Ok(feed) => {
127: for item in feed.items() {
b2297258c7 2021-11-06 128: let date = match item.pub_date() {
b2297258c7 2021-11-06 129: Some(feed_date) => DateTime::parse_from_rfc2822(feed_date),
b2297258c7 2021-11-06 130: None => DateTime::parse_from_rfc3339(&item.dublin_core_ext().unwrap().dates()[0]),
b2297258c7 2021-11-06 131: }?;
b2297258c7 2021-11-06 132: let url = item.link().unwrap().to_string();
b2297258c7 2021-11-06 133: posts.insert(date.clone(), url.clone());
134: };
135: },
136: Err(err) => match err {
137: rss::Error::InvalidStartTag => {
138: let feed = atom_syndication::Feed::read_from(&content[..])
139: .with_context(|| format!("Problem opening feed url:\n{}", &url))?;
140: for item in feed.entries() {
141: let date = item.published().unwrap();
142: let url = item.links()[0].href();
143: posts.insert(date.clone(), url.to_string());
144: };
145: },
146: rss::Error::Eof => (),
147: _ => bail!("Unsupported or mangled content:\n{:#?}\n", err)
148: }
149: };
150: for (date, url) in posts.iter() {
151: let mut conn = self.pool.acquire().await
152: .with_context(|| format!("Check post fetch conn:\n{:?}", &self.pool))?;
153: let row = sqlx::query("select exists(select true from rsstg_post where url = $1 and source_id = $2) as exists;")
154: .bind(&url)
155: .bind(*id)
156: .fetch_one(&mut conn).await
157: .with_context(|| format!("Check post:\n{:?}", &conn))?;
158: let exists: bool = row.try_get("exists")?;
159: if ! exists {
160: if this_fetch == None || *date > this_fetch.unwrap() {
161: this_fetch = Some(*date);
162: };
163: self.tg.send( match iv_hash {
164: Some(x) => SendMessage::new(destination, format!("<a href=\"https://t.me/iv?url={}&rhash={}\"> </a>{0}", url, x)),
165: None => SendMessage::new(destination, format!("{}", url)),
166: }.parse_mode(types::ParseMode::Html)).await
167: .context("Can't post message:")?;
168: sqlx::query("insert into rsstg_post (source_id, posted, url) values ($1, $2, $3);")
169: .bind(*id)
170: .bind(date)
171: .bind(url)
172: .execute(&mut conn).await
173: .with_context(|| format!("Record post:\n{:?}", &conn))?;
174: drop(conn);
175: tokio::time::sleep(std::time::Duration::new(4, 0)).await;
176: };
177: };
178: posts.clear();
179: };
180: let mut conn = self.pool.acquire().await
181: .with_context(|| format!("Update scrape fetch conn:\n{:?}", &self.pool))?;
182: sqlx::query("update rsstg_source set last_scrape = now() where source_id = $1;")
183: .bind(*id)
184: .execute(&mut conn).await
185: .with_context(|| format!("Update scrape:\n{:?}", &conn))?;
186: Ok(())
187: }
188:
189: async fn delete<S>(&self, source_id: &i32, owner: S) -> Result<String>
190: where S: Into<i64> {
191: let owner: i64 = owner.into();
192: let mut conn = self.pool.acquire().await
193: .with_context(|| format!("Delete fetch conn:\n{:?}", &self.pool))?;
194: match sqlx::query("delete from rsstg_source where source_id = $1 and owner = $2;")
195: .bind(source_id)
196: .bind(owner)
197: .execute(&mut conn).await
198: .with_context(|| format!("Delete source rule:\n{:?}", &self.pool))?
199: .rows_affected() {
200: 0 => { Ok("No data found found\\.".to_string()) },
201: x => { Ok(format!("{} sources removed\\.", x)) },
202: }
203: }
204:
205: async fn clean<S>(&self, source_id: &i32, owner: S) -> Result<String>
206: where S: Into<i64> {
207: let owner: i64 = owner.into();
208: let mut conn = self.pool.acquire().await
209: .with_context(|| format!("Clean fetch conn:\n{:?}", &self.pool))?;
210: match sqlx::query("delete from rsstg_post p using rsstg_source s where p.source_id = $1 and owner = $2 and p.source_id = s.source_id;")
211: .bind(source_id)
212: .bind(owner)
213: .execute(&mut conn).await
214: .with_context(|| format!("Clean seen posts:\n{:?}", &self.pool))?
215: .rows_affected() {
216: 0 => { Ok("No data found found\\.".to_string()) },
217: x => { Ok(format!("{} posts purged\\.", x)) },
218: }
219: }
220:
221: async fn enable<S>(&self, source_id: &i32, owner: S) -> Result<&str>
222: where S: Into<i64> {
223: let owner: i64 = owner.into();
224: let mut conn = self.pool.acquire().await
225: .with_context(|| format!("Enable fetch conn:\n{:?}", &self.pool))?;
226: match sqlx::query("update rsstg_source set enabled = true where source_id = $1 and owner = $2")
227: .bind(source_id)
228: .bind(owner)
229: .execute(&mut conn).await
230: .with_context(|| format!("Enable source:\n{:?}", &self.pool))?
231: .rows_affected() {
232: 1 => { Ok("Source enabled\\.") },
233: 0 => { Ok("Source not found\\.") },
234: _ => { Err(anyhow!("Database error.")) },
235: }
236: }
237:
238: async fn disable<S>(&self, source_id: &i32, owner: S) -> Result<&str>
239: where S: Into<i64> {
240: let owner: i64 = owner.into();
241: let mut conn = self.pool.acquire().await
242: .with_context(|| format!("Disable fetch conn:\n{:?}", &self.pool))?;
243: match sqlx::query("update rsstg_source set enabled = false where source_id = $1 and owner = $2")
244: .bind(source_id)
245: .bind(owner)
246: .execute(&mut conn).await
247: .with_context(|| format!("Disable source:\n{:?}", &self.pool))?
248: .rows_affected() {
249: 1 => { Ok("Source disabled\\.") },
250: 0 => { Ok("Source not found\\.") },
251: _ => { Err(anyhow!("Database error.")) },
252: }
253: }
254:
255: async fn update<S>(&self, update: Option<i32>, channel: &str, channel_id: i64, url: &str, iv_hash: Option<&str>, owner: S) -> Result<String>
256: where S: Into<i64> {
257: let owner: i64 = owner.into();
258: let mut conn = self.pool.acquire().await
259: .with_context(|| format!("Update fetch conn:\n{:?}", &self.pool))?;
260:
261: match match update {
262: Some(id) => {
263: sqlx::query("update rsstg_source set channel_id = $2, url = $3, iv_hash = $4, owner = $5, channel = $6 where source_id = $1").bind(id)
264: },
265: None => {
266: sqlx::query("insert into rsstg_source (channel_id, url, iv_hash, owner, channel) values ($1, $2, $3, $4, $5)")
267: },
268: }
269: .bind(channel_id)
270: .bind(url)
271: .bind(iv_hash)
272: .bind(owner)
273: .bind(channel)
274: .execute(&mut conn).await {
275: Ok(_) => return Ok(String::from(match update {
276: Some(_) => "Channel updated\\.",
277: None => "Channel added\\.",
278: })),
279: Err(sqlx::Error::Database(err)) => {
280: match err.downcast::<sqlx::postgres::PgDatabaseError>().routine() {
281: Some("_bt_check_unique", ) => {
282: return Ok("Duplicate key\\.".to_string())
283: },
284: Some(_) => {
285: return Ok("Database error\\.".to_string())
286: },
287: None => {
288: return Ok("No database error extracted\\.".to_string())
289: },
290: };
291: },
292: Err(err) => {
293: bail!("Sorry, unknown error:\n{:#?}\n", err);
294: },
295: };
296: }
297:
298: async fn autofetch(&self) -> Result<()> {
299: let mut delay = chrono::Duration::minutes(1);
300: let mut now;
301: loop {
302: let mut conn = self.pool.acquire().await
303: .with_context(|| format!("Autofetch fetch conn:\n{:?}", &self.pool))?;
304: now = chrono::Local::now();
305: let mut queue = sqlx::query("select source_id, next_fetch, owner from rsstg_order natural left join rsstg_source where next_fetch < now() + interval '1 minute';")
306: .fetch_all(&mut conn).await?;
307: for row in queue.iter() {
308: let source_id: i32 = row.try_get("source_id")?;
309: let owner: i64 = row.try_get("owner")?;
310: let next_fetch: DateTime<chrono::Local> = row.try_get("next_fetch")?;
311: if next_fetch < now {
312: //let clone = self.clone();
313: //clone.owner_chat(UserId::new(owner));
314: let clone = Core {
315: owner_chat: UserId::new(owner),
316: ..self.clone()
317: };
318: tokio::spawn(async move {
319: if let Err(err) = clone.check(source_id, owner, true).await {
320: if let Err(err) = clone.debug(&format!("š {:?}", err), None) {
321: eprintln!("Check error: {}", err);
322: };
323: };
324: });
325: } else {
326: if next_fetch - now < delay {
327: delay = next_fetch - now;
328: }
329: }
330: };
331: queue.clear();
332: tokio::time::sleep(delay.to_std()?).await;
333: delay = chrono::Duration::minutes(1);
334: }
335: }
336:
337: async fn list<S>(&self, owner: S) -> Result<Vec<String>>
338: where S: Into<i64> {
339: let owner = owner.into();
340: let mut reply = vec![];
341: let mut conn = self.pool.acquire().await
342: .with_context(|| format!("List fetch conn:\n{:?}", &self.pool))?;
343: reply.push("Channels:".to_string());
344: let rows = sqlx::query("select source_id, channel, enabled, url, iv_hash from rsstg_source where owner = $1 order by source_id")
345: .bind(owner)
346: .fetch_all(&mut conn).await?;
347: for row in rows.iter() {
348: let source_id: i32 = row.try_get("source_id")?;
349: let username: &str = row.try_get("channel")?;
350: let enabled: bool = row.try_get("enabled")?;
351: let url: &str = row.try_get("url")?;
352: let iv_hash: Option<&str> = row.try_get("iv_hash")?;
353: reply.push(format!("\n\\#ļøā£ {} \\*ļøā£ `{}` {}\nš `{}`", source_id, username,
354: match enabled {
355: true => "š enabled",
356: false => "ā disabled",
357: }, url));
358: if let Some(hash) = iv_hash {
359: reply.push(format!("IV `{}`", hash));
360: }
361: };
362: Ok(reply)
363: }
364: }
365:
366: #[tokio::main]
367: async fn main() -> Result<()> {
368: let mut settings = config::Config::default();
369: settings.merge(config::File::with_name("rsstg"))?;
370:
371: let core = Core::new(settings).await?;
372:
373: let mut stream = core.stream();
374: stream.allowed_updates(&[AllowedUpdate::Message]);
375: let mut reply_to: Option<UserId>;
376:
377: loop {
378: reply_to = None;
379: match stream.next().await {
380: Some(update) => {
381: if let Err(err) = handle(update?, &core, &mut reply_to).await {
382: core.debug(&format!("š {:?}", err), reply_to)?;
383: };
384: },
385: None => {
386: core.debug(&format!("š None error."), None)?;
387: }
388: };
389: }
390:
391: //Ok(())
392: }
393:
394: async fn handle(update: telegram_bot::Update, core: &Core, mut _reply_to: &Option<UserId>) -> Result<()> {
395: lazy_static! {
396: static ref RE_USERNAME: Regex = Regex::new(r"^@[a-zA-Z][a-zA-Z0-9_]+$").unwrap();
397: static ref RE_LINK: Regex = Regex::new(r"^https?://[a-zA-Z.0-9-]+/[-_a-zA-Z.0-9/?=]+$").unwrap();
398: static ref RE_IV_HASH: Regex = Regex::new(r"^[a-f0-9]{14}$").unwrap();
399: }
400:
401: match update.kind {
402: UpdateKind::Message(message) => {
403: let mut reply: Vec<String> = vec![];
404: match message.kind {
405: MessageKind::Text { ref data, .. } => {
406: let mut words = data.split_whitespace();
407: let cmd = words.next().unwrap();
408: match cmd {
409:
410: // start
411:
412: "/start" => {
413: reply.push("We are open\\. Probably\\. Visit [channel](https://t.me/rsstg_bot_help/3) for details\\.".to_string());
414: },
415:
416: // list
417:
418: "/list" => {
419: reply.append(&mut core.list(message.from.id).await?);
420: },
421:
422: // add
423:
424: "/add" | "/update" => {
425: _reply_to = &Some(message.from.id);
426: let mut source_id: Option<i32> = None;
427: let at_least = "Requires at least 3 parameters.";
428: if cmd == "/update" {
429: let first_word = words.next()
430: .context(at_least)?;
431: source_id = Some(first_word.parse::<i32>()
432: .with_context(|| format!("I need a number, but got {}.", first_word))?);
433: }
434: let (channel, url, iv_hash) = (
435: words.next().context(at_least)?,
436: words.next().context(at_least)?,
437: words.next());
438: if ! RE_USERNAME.is_match(&channel) {
439: reply.push("Usernames should be something like \"@\\[a\\-zA\\-Z]\\[a\\-zA\\-Z0\\-9\\_]+\", aren't they?".to_string());
440: bail!("Wrong username {:?}.", &channel);
441: }
442: if ! RE_LINK.is_match(&url) {
443: reply.push("Link should be link to atom/rss feed, something like \"https://domain/path\"\\.".to_string());
444: bail!("Url: {:?}", &url);
445: }
446: if let Some(hash) = iv_hash {
447: if ! RE_IV_HASH.is_match(&hash) {
448: reply.push("IV hash should be 14 hex digits.".to_string());
449: bail!("IV: {:?}", &iv_hash);
450: };
451: };
452: let channel_id = i64::from(core.tg.send(telegram_bot::GetChat::new(telegram_bot::types::ChatRef::ChannelUsername(channel.to_string()))).await?.id());
453: let chan_adm = core.tg.send(telegram_bot::GetChatAdministrators::new(telegram_bot::types::ChatRef::ChannelUsername(channel.to_string()))).await
454: .context("Sorry, I have no access to that chat\\.")?;
455: let (mut me, mut user) = (false, false);
456: for admin in chan_adm {
457: if admin.user.id == core.my.id {
458: me = true;
459: };
460: if admin.user.id == message.from.id {
461: user = true;
462: };
463: };
464: if ! me { bail!("I need to be admin on that channel\\."); };
465: if ! user { bail!("You should be admin on that channel\\."); };
466: reply.push(core.update(source_id, channel, channel_id, url, iv_hash, message.from.id).await?);
467: },
468:
469: // check
470:
471: "/check" => {
472: match &words.next().unwrap().parse::<i32>() {
473: Err(err) => {
474: reply.push(format!("I need a number\\.\n{}", &err));
475: },
476: Ok(number) => {
477: core.check(*number, message.from.id, false).await
478: .context("Channel check failed.")?;
479: },
480: };
481: },
482:
483: // clean
484:
485: "/clean" => {
486: match &words.next().unwrap().parse::<i32>() {
487: Err(err) => {
488: reply.push(format!("I need a number\\.\n{}", &err));
489: },
490: Ok(number) => {
491: let result = core.clean(&number, message.from.id).await?;
492: reply.push(result.to_string());
493: },
494: };
495: },
496:
497: // enable
498:
499: "/enable" => {
500: match &words.next().unwrap().parse::<i32>() {
501: Err(err) => {
502: reply.push(format!("I need a number\\.\n{}", &err));
503: },
504: Ok(number) => {
505: let result = core.enable(&number, message.from.id).await?;
506: reply.push(result.to_string());
507: },
508: };
509: },
510:
511: // delete
512:
513: "/delete" => {
514: match &words.next().unwrap().parse::<i32>() {
515: Err(err) => {
516: reply.push(format!("I need a number\\.\n{}", &err));
517: },
518: Ok(number) => {
519: let result = core.delete(&number, message.from.id).await?;
520: reply.push(result.to_string());
521: },
522: };
523: },
524:
525: // disable
526:
527: "/disable" => {
528: match &words.next().unwrap().parse::<i32>() {
529: Err(err) => {
530: reply.push(format!("I need a number\\.\n{}", &err));
531: },
532: Ok(number) => {
533: let result = core.disable(&number, message.from.id).await?;
534: reply.push(result.to_string());
535: },
536: };
537: },
538:
539: _ => {
540: },
541: };
542: },
543: _ => {
544: },
545: };
546:
547: if reply.len() > 0 {
548: if let Err(err) = core.tg.send(message.text_reply(reply.join("\n")).parse_mode(types::ParseMode::MarkdownV2)).await {
549: dbg!(reply.join("\n"));
550: println!("{}", err);
551: };
552: };
553: },
554: _ => {},
555: };
556:
557: Ok(())
558: }