Lines of
src/main.rs
from check-in d52a6ff5c8
that are changed by the sequence of edits moving toward
check-in 635e1b2b6d:
1: use std::collections::{BTreeMap, HashSet};
2: use std::sync::{Arc, Mutex};
3:
4: use config;
5:
6: use tokio;
7: use reqwest;
8:
9: use rss;
10: use atom_syndication;
11:
12: use chrono::DateTime;
13:
14: use regex::Regex;
15:
16: use telegram_bot::*;
17: use tokio::stream::StreamExt;
18:
19: use sqlx::postgres::PgPoolOptions;
20: use sqlx::Row;
21:
22: #[macro_use]
23: extern crate lazy_static;
24:
25: use anyhow::{anyhow, bail, Context, Result};
26:
27: #[derive(Clone)]
28: struct Core {
29: owner: i64,
30: api_key: String,
31: owner_chat: UserId,
32: tg: telegram_bot::Api,
33: my: User,
34: pool: sqlx::Pool<sqlx::Postgres>,
35: sources: Arc<Mutex<HashSet<Arc<i32>>>>,
36: }
37:
38: impl Core {
39: async fn new(settings: config::Config) -> Result<Core> {
40: let owner = settings.get_int("owner")?;
41: let api_key = settings.get_str("api_key")?;
42: let tg = Api::new(&api_key);
43: let core = Core {
44: owner: owner,
45: api_key: api_key.clone(),
46: my: tg.send(telegram_bot::GetMe).await?,
47: tg: tg,
48: owner_chat: UserId::new(owner),
49: pool: PgPoolOptions::new()
50: .max_connections(5)
51: .connect_timeout(std::time::Duration::new(300, 0))
52: .idle_timeout(std::time::Duration::new(60, 0))
53: .connect_lazy(&settings.get_str("pg")?)?,
54: sources: Arc::new(Mutex::new(HashSet::new())),
55: };
56: let clone = core.clone();
57: tokio::spawn(async move {
58: if let Err(err) = &clone.autofetch().await {
59: if let Err(err) = clone.debug(&format!("š {:?}", err), None) {
60: eprintln!("Autofetch error: {}", err);
61: };
62: }
63: });
64: Ok(core)
65: }
66:
67: fn stream(&self) -> telegram_bot::UpdatesStream {
68: self.tg.stream()
69: }
70:
71: fn debug(&self, msg: &str, target: Option<UserId>) -> Result<()> {
72: self.tg.spawn(SendMessage::new(match target {
73: Some(user) => user,
74: None => self.owner_chat,
75: }, msg));
76: Ok(())
77: }
78:
79: async fn check<S>(&self, id: i32, owner: S, real: bool) -> Result<()>
80: where S: Into<i64> {
81: let owner: i64 = owner.into();
82: let id = {
83: let mut set = self.sources.lock().unwrap();
84: match set.get(&id) {
85: Some(id) => id.clone(),
86: None => {
87: let id = Arc::new(id);
88: set.insert(id.clone());
89: id.clone()
90: },
91: }
92: };
93: let count = Arc::strong_count(&id);
94: if count == 2 {
95: let mut conn = self.pool.acquire().await
96: .with_context(|| format!("Query queue fetch conn:\n{:?}", &self.pool))?;
97: let row = sqlx::query("select source_id, channel_id, url, iv_hash, owner from rsstg_source where source_id = $1 and owner = $2")
98: .bind(*id)
99: .bind(owner)
100: .fetch_one(&mut conn).await
101: .with_context(|| format!("Query source:\n{:?}", &self.pool))?;
102: drop(conn);
103: let channel_id: i64 = row.try_get("channel_id")?;
104: let destination = match real {
105: true => UserId::new(channel_id),
106: false => UserId::new(row.try_get("owner")?),
107: };
108: let url: &str = row.try_get("url")?;
109: let mut this_fetch: Option<DateTime<chrono::FixedOffset>> = None;
110: let iv_hash: Option<&str> = row.try_get("iv_hash")?;
111: let mut posts: BTreeMap<DateTime<chrono::FixedOffset>, String> = BTreeMap::new();
112: let content = reqwest::get(url).await?.bytes().await?;
113: //let mut content_ = surf::get(url).await.map_err(|err| anyhow!(err))?;
114: //eprintln!("Data: {:#?}", &content_);
115: //let content = content_.body_bytes().await.map_err(|err| anyhow!(err))?;
116: /*
117: let feed = rss::Channel::read_from(&content[..])
118: .with_context(|| format!("Problem opening feed url:\n{}", &url))?;
119: for item in feed.items() {
120: let date = match item.pub_date() {
121: Some(feed_date) => DateTime::parse_from_rfc2822(feed_date),
122: None => DateTime::parse_from_rfc3339(&item.dublin_core_ext().unwrap().dates()[0]),
123: }?;
124: let url = item.link().unwrap().to_string();
125: posts.insert(date.clone(), url.clone());
126: };
127: */
128: match rss::Channel::read_from(&content[..]) {
129: Ok(feed) => {
130: for item in feed.items() {
131: let date = match item.pub_date() {
132: Some(feed_date) => DateTime::parse_from_rfc2822(feed_date),
133: None => DateTime::parse_from_rfc3339(&item.dublin_core_ext().unwrap().dates()[0]),
134: }?;
135: let url = item.link().unwrap().to_string();
136: posts.insert(date.clone(), url.clone());
137: };
138: },
139: Err(err) => match err {
140: rss::Error::InvalidStartTag => {
141: let feed = atom_syndication::Feed::read_from(&content[..])
142: .with_context(|| format!("Problem opening feed url:\n{}", &url))?;
143: for item in feed.entries() {
144: let date = item.published().unwrap();
145: let url = item.links()[0].href();
146: posts.insert(date.clone(), url.to_string());
147: };
148: },
149: rss::Error::Eof => (),
150: _ => bail!("Unsupported or mangled content:\n{:#?}\n", err)
151: }
152: };
153: for (date, url) in posts.iter() {
154: let mut conn = self.pool.acquire().await
155: .with_context(|| format!("Check post fetch conn:\n{:?}", &self.pool))?;
156: let row = sqlx::query("select exists(select true from rsstg_post where url = $1 and source_id = $2) as exists;")
157: .bind(&url)
158: .bind(*id)
159: .fetch_one(&mut conn).await
160: .with_context(|| format!("Check post:\n{:?}", &conn))?;
161: let exists: bool = row.try_get("exists")?;
162: if ! exists {
163: if this_fetch == None || *date > this_fetch.unwrap() {
164: this_fetch = Some(*date);
165: };
166: self.tg.send( match iv_hash {
167: Some(x) => SendMessage::new(destination, format!("<a href=\"https://t.me/iv?url={}&rhash={}\"> </a>{0}", url, x)),
168: None => SendMessage::new(destination, format!("{}", url)),
169: }.parse_mode(types::ParseMode::Html)).await
170: .context("Can't post message:")?;
171: sqlx::query("insert into rsstg_post (source_id, posted, url) values ($1, $2, $3);")
172: .bind(*id)
173: .bind(date)
174: .bind(url)
175: .execute(&mut conn).await
176: .with_context(|| format!("Record post:\n{:?}", &conn))?;
177: drop(conn);
178: tokio::time::delay_for(std::time::Duration::new(4, 0)).await;
179: };
180: };
181: posts.clear();
182: };
183: let mut conn = self.pool.acquire().await
184: .with_context(|| format!("Update scrape fetch conn:\n{:?}", &self.pool))?;
185: sqlx::query("update rsstg_source set last_scrape = now() where source_id = $1;")
186: .bind(*id)
187: .execute(&mut conn).await
188: .with_context(|| format!("Update scrape:\n{:?}", &conn))?;
189: Ok(())
190: }
191:
192: async fn delete<S>(&self, source_id: &i32, owner: S) -> Result<String>
193: where S: Into<i64> {
194: let owner: i64 = owner.into();
195: let mut conn = self.pool.acquire().await
196: .with_context(|| format!("Delete fetch conn:\n{:?}", &self.pool))?;
197: match sqlx::query("delete from rsstg_source where source_id = $1 and owner = $2;")
198: .bind(source_id)
199: .bind(owner)
200: .execute(&mut conn).await
201: .with_context(|| format!("Delete source rule:\n{:?}", &self.pool))?
202: .rows_affected() {
203: 0 => { Ok("No data found found\\.".to_string()) },
204: x => { Ok(format!("{} sources removed\\.", x)) },
205: }
206: }
207:
208: async fn clean<S>(&self, source_id: &i32, owner: S) -> Result<String>
209: where S: Into<i64> {
210: let owner: i64 = owner.into();
211: let mut conn = self.pool.acquire().await
212: .with_context(|| format!("Clean fetch conn:\n{:?}", &self.pool))?;
213: match sqlx::query("delete from rsstg_post p using rsstg_source s where p.source_id = $1 and owner = $2 and p.source_id = s.source_id;")
214: .bind(source_id)
215: .bind(owner)
216: .execute(&mut conn).await
217: .with_context(|| format!("Clean seen posts:\n{:?}", &self.pool))?
218: .rows_affected() {
219: 0 => { Ok("No data found found\\.".to_string()) },
220: x => { Ok(format!("{} posts purged\\.", x)) },
221: }
222: }
223:
224: async fn enable<S>(&self, source_id: &i32, owner: S) -> Result<&str>
225: where S: Into<i64> {
226: let owner: i64 = owner.into();
227: let mut conn = self.pool.acquire().await
228: .with_context(|| format!("Enable fetch conn:\n{:?}", &self.pool))?;
229: match sqlx::query("update rsstg_source set enabled = true where source_id = $1 and owner = $2")
230: .bind(source_id)
231: .bind(owner)
232: .execute(&mut conn).await
233: .with_context(|| format!("Enable source:\n{:?}", &self.pool))?
234: .rows_affected() {
235: 1 => { Ok("Source enabled\\.") },
236: 0 => { Ok("Source not found\\.") },
237: _ => { Err(anyhow!("Database error.")) },
238: }
239: }
240:
241: async fn disable<S>(&self, source_id: &i32, owner: S) -> Result<&str>
242: where S: Into<i64> {
243: let owner: i64 = owner.into();
244: let mut conn = self.pool.acquire().await
245: .with_context(|| format!("Disable fetch conn:\n{:?}", &self.pool))?;
246: match sqlx::query("update rsstg_source set enabled = false where source_id = $1 and owner = $2")
247: .bind(source_id)
248: .bind(owner)
249: .execute(&mut conn).await
250: .with_context(|| format!("Disable source:\n{:?}", &self.pool))?
251: .rows_affected() {
252: 1 => { Ok("Source disabled\\.") },
253: 0 => { Ok("Source not found\\.") },
254: _ => { Err(anyhow!("Database error.")) },
255: }
256: }
257:
258: async fn update<S>(&self, update: Option<i32>, channel: &str, channel_id: i64, url: &str, iv_hash: Option<&str>, owner: S) -> Result<String>
259: where S: Into<i64> {
260: let owner: i64 = owner.into();
261: let mut conn = self.pool.acquire().await
262: .with_context(|| format!("Update fetch conn:\n{:?}", &self.pool))?;
263:
264: match match update {
265: Some(id) => {
266: sqlx::query("update rsstg_source set channel_id = $2, url = $3, iv_hash = $4, owner = $5, channel = $6 where source_id = $1").bind(id)
267: },
268: None => {
269: sqlx::query("insert into rsstg_source (channel_id, url, iv_hash, owner, channel) values ($1, $2, $3, $4, $5)")
270: },
271: }
272: .bind(channel_id)
273: .bind(url)
274: .bind(iv_hash)
275: .bind(owner)
276: .bind(channel)
277: .execute(&mut conn).await {
d52a6ff5c8 2021-09-30 278: Ok(_) => return Ok(String::from("Channel added\\.")),
279: Err(sqlx::Error::Database(err)) => {
280: match err.downcast::<sqlx::postgres::PgDatabaseError>().routine() {
281: Some("_bt_check_unique", ) => {
282: return Ok("Duplicate key\\.".to_string())
283: },
284: Some(_) => {
285: return Ok("Database error\\.".to_string())
286: },
287: None => {
288: return Ok("No database error extracted\\.".to_string())
289: },
290: };
291: },
292: Err(err) => {
293: bail!("Sorry, unknown error:\n{:#?}\n", err);
294: },
295: };
296: }
297:
298: async fn autofetch(&self) -> Result<()> {
299: let mut delay = chrono::Duration::minutes(1);
300: let mut now;
301: loop {
302: let mut conn = self.pool.acquire().await
303: .with_context(|| format!("Autofetch fetch conn:\n{:?}", &self.pool))?;
304: now = chrono::Local::now();
305: let mut queue = sqlx::query("select source_id, next_fetch, owner from rsstg_order natural left join rsstg_source where next_fetch < now() + interval '1 minute';")
306: .fetch_all(&mut conn).await?;
307: for row in queue.iter() {
308: let source_id: i32 = row.try_get("source_id")?;
309: let owner: i64 = row.try_get("owner")?;
310: let next_fetch: DateTime<chrono::Local> = row.try_get("next_fetch")?;
311: if next_fetch < now {
312: //let clone = self.clone();
313: //clone.owner_chat(UserId::new(owner));
314: let clone = Core {
315: owner_chat: UserId::new(owner),
316: ..self.clone()
317: };
318: tokio::spawn(async move {
319: if let Err(err) = clone.check(source_id, owner, true).await {
320: if let Err(err) = clone.debug(&format!("š {:?}", err), None) {
321: eprintln!("Check error: {}", err);
322: };
323: };
324: });
325: } else {
326: if next_fetch - now < delay {
327: delay = next_fetch - now;
328: }
329: }
330: };
331: queue.clear();
332: tokio::time::delay_for(delay.to_std()?).await;
333: delay = chrono::Duration::minutes(1);
334: }
335: }
336:
337: async fn list<S>(&self, owner: S) -> Result<Vec<String>>
338: where S: Into<i64> {
339: let owner = owner.into();
340: let mut reply = vec![];
341: let mut conn = self.pool.acquire().await
342: .with_context(|| format!("List fetch conn:\n{:?}", &self.pool))?;
343: reply.push("Channels:".to_string());
344: let rows = sqlx::query("select source_id, channel, enabled, url, iv_hash from rsstg_source where owner = $1 order by source_id")
345: .bind(owner)
346: .fetch_all(&mut conn).await?;
347: for row in rows.iter() {
348: let source_id: i32 = row.try_get("source_id")?;
349: let username: &str = row.try_get("channel")?;
350: let enabled: bool = row.try_get("enabled")?;
351: let url: &str = row.try_get("url")?;
352: let iv_hash: Option<&str> = row.try_get("iv_hash")?;
353: reply.push(format!("\n\\#ļøā£ {} \\*ļøā£ `{}` {}\nš `{}`", source_id, username,
354: match enabled {
355: true => "š enabled",
356: false => "ā disabled",
357: }, url));
358: if let Some(hash) = iv_hash {
359: reply.push(format!("IV `{}`", hash));
360: }
361: };
362: Ok(reply)
363: }
364: }
365:
366: #[tokio::main]
367: async fn main() -> Result<()> {
368: let mut settings = config::Config::default();
369: settings.merge(config::File::with_name("rsstg"))?;
370:
371: let core = Core::new(settings).await?;
372:
373: let mut stream = core.stream();
374: stream.allowed_updates(&[AllowedUpdate::Message]);
375: let mut reply_to: Option<UserId>;
376:
377: loop {
378: reply_to = None;
379: match stream.next().await {
380: Some(update) => {
381: if let Err(err) = handle(update?, &core, &mut reply_to).await {
382: core.debug(&format!("š {:?}", err), reply_to)?;
383: };
384: },
385: None => {
386: core.debug(&format!("š None error."), None)?;
387: }
388: };
389: }
390:
391: //Ok(())
392: }
393:
394: async fn handle(update: telegram_bot::Update, core: &Core, mut _reply_to: &Option<UserId>) -> Result<()> {
395: lazy_static! {
396: static ref RE_USERNAME: Regex = Regex::new(r"^@[a-zA-Z][a-zA-Z0-9_]+$").unwrap();
397: static ref RE_LINK: Regex = Regex::new(r"^https?://[a-zA-Z.0-9-]+/[-_a-zA-Z.0-9/?=]+$").unwrap();
398: static ref RE_IV_HASH: Regex = Regex::new(r"^[a-f0-9]{14}$").unwrap();
399: }
400:
401: match update.kind {
402: UpdateKind::Message(message) => {
403: let mut reply: Vec<String> = vec![];
404: match message.kind {
405: MessageKind::Text { ref data, .. } => {
406: let mut words = data.split_whitespace();
407: let cmd = words.next().unwrap();
408: match cmd {
409:
410: // start
411:
412: "/start" => {
413: reply.push("We are open\\. Probably\\. Visit [channel](https://t.me/rsstg_bot_help/3) for details\\.".to_string());
414: },
415:
416: // list
417:
418: "/list" => {
419: reply.append(&mut core.list(message.from.id).await?);
420: },
421:
422: // add
423:
424: "/add" | "/update" => {
425: _reply_to = &Some(message.from.id);
426: let mut source_id: Option<i32> = None;
427: let at_least = "Requires at least 3 parameters.";
428: if cmd == "/update" {
429: let first_word = words.next()
430: .context(at_least)?;
431: source_id = Some(first_word.parse::<i32>()
432: .with_context(|| format!("I need a number, but got {}.", first_word))?);
433: }
434: let (channel, url, iv_hash) = (
435: words.next().context(at_least)?,
436: words.next().context(at_least)?,
437: words.next());
438: if ! RE_USERNAME.is_match(&channel) {
439: reply.push("Usernames should be something like \"@\\[a\\-zA\\-Z]\\[a\\-zA\\-Z0\\-9\\_]+\", aren't they?".to_string());
440: bail!("Wrong username {:?}.", &channel);
441: }
442: if ! RE_LINK.is_match(&url) {
443: reply.push("Link should be link to atom/rss feed, something like \"https://domain/path\"\\.".to_string());
444: bail!("Url: {:?}", &url);
445: }
446: if let Some(hash) = iv_hash {
447: if ! RE_IV_HASH.is_match(&hash) {
448: reply.push("IV hash should be 14 hex digits.".to_string());
449: bail!("IV: {:?}", &iv_hash);
450: };
451: };
452: let channel_id = i64::from(core.tg.send(telegram_bot::GetChat::new(telegram_bot::types::ChatRef::ChannelUsername(channel.to_string()))).await?.id());
453: let chan_adm = core.tg.send(telegram_bot::GetChatAdministrators::new(telegram_bot::types::ChatRef::ChannelUsername(channel.to_string()))).await
454: .context("Sorry, I have no access to that chat\\.")?;
455: let (mut me, mut user) = (false, false);
456: for admin in chan_adm {
457: if admin.user.id == core.my.id {
458: me = true;
459: };
460: if admin.user.id == message.from.id {
461: user = true;
462: };
463: };
464: if ! me { bail!("I need to be admin on that channel\\."); };
465: if ! user { bail!("You should be admin on that channel\\."); };
466: reply.push(core.update(source_id, channel, channel_id, url, iv_hash, message.from.id).await?);
467: },
468:
469: // check
470:
471: "/check" => {
472: match &words.next().unwrap().parse::<i32>() {
473: Err(err) => {
474: reply.push(format!("I need a number\\.\n{}", &err));
475: },
476: Ok(number) => {
477: core.check(*number, message.from.id, false).await
478: .context("Channel check failed.")?;
479: },
480: };
481: },
482:
483: // clean
484:
485: "/clean" => {
486: match &words.next().unwrap().parse::<i32>() {
487: Err(err) => {
488: reply.push(format!("I need a number\\.\n{}", &err));
489: },
490: Ok(number) => {
491: let result = core.clean(&number, message.from.id).await?;
492: reply.push(result.to_string());
493: },
494: };
495: },
496:
497: // enable
498:
499: "/enable" => {
500: match &words.next().unwrap().parse::<i32>() {
501: Err(err) => {
502: reply.push(format!("I need a number\\.\n{}", &err));
503: },
504: Ok(number) => {
505: let result = core.enable(&number, message.from.id).await?;
506: reply.push(result.to_string());
507: },
508: };
509: },
510:
511: // delete
512:
513: "/delete" => {
514: match &words.next().unwrap().parse::<i32>() {
515: Err(err) => {
516: reply.push(format!("I need a number\\.\n{}", &err));
517: },
518: Ok(number) => {
519: let result = core.delete(&number, message.from.id).await?;
520: reply.push(result.to_string());
521: },
522: };
523: },
524:
525: // disable
526:
527: "/disable" => {
528: match &words.next().unwrap().parse::<i32>() {
529: Err(err) => {
530: reply.push(format!("I need a number\\.\n{}", &err));
531: },
532: Ok(number) => {
533: let result = core.disable(&number, message.from.id).await?;
534: reply.push(result.to_string());
535: },
536: };
537: },
538:
539: _ => {
540: },
541: };
542: },
543: _ => {
544: },
545: };
546:
547: if reply.len() > 0 {
548: if let Err(err) = core.tg.send(message.text_reply(reply.join("\n")).parse_mode(types::ParseMode::MarkdownV2)).await {
549: dbg!(reply.join("\n"));
550: println!("{}", err);
551: };
552: };
553: },
554: _ => {},
555: };
556:
557: Ok(())
558: }