Check-in [704bf85f8c]
Logged in as anonymous
Overview
Comment:add more docstrings (by CodeRabbit), properly process dublin extension dates
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 704bf85f8c1716c7b60a91ab13751a2ae855db4f26b132bfc7108dabdb55d2ce
User & Date: arcade on 2026-01-06 13:07:13.891
Other Links: manifest | tags
Context
2026-01-06
15:24
bump check-in: 1497d5f02f user: arcade tags: trunk
13:07
add more docstrings (by CodeRabbit), properly process dublin extension dates check-in: 704bf85f8c user: arcade tags: trunk
13:06
change default rust toolchain action check-in: e9d3c3d224 user: arcade tags: trunk
Changes
51
52
53
54
55
56
57
58



59
60
61
62
63
64
65
	bail,
};

lazy_static!{
	pub static ref RE_SPECIAL: Regex = Regex::new(r"([\-_*\[\]()~`>#+|{}\.!])").unwrap();
}

/// Encodes special HTML entities to prevent them interfering with Telegram HTML



pub fn encode (text: &str) -> Cow<'_, str> {
	RE_SPECIAL.replace_all(text, "\\$1")
}

// This one does nothing except making sure only one token exists for each id
pub struct Token {
	running: Arc<Mutex<HashSet<i32>>>,







|
>
>
>







51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
	bail,
};

lazy_static!{
	pub static ref RE_SPECIAL: Regex = Regex::new(r"([\-_*\[\]()~`>#+|{}\.!])").unwrap();
}

/// Escape characters that are special in Telegram HTML by prefixing them with a backslash.
///
/// This ensures the returned string can be used as HTML-formatted Telegram message content
/// without special characters being interpreted as HTML markup.
pub fn encode (text: &str) -> Cow<'_, str> {
	RE_SPECIAL.replace_all(text, "\\$1")
}

// This one does nothing except making sure only one token exists for each id
pub struct Token {
	running: Arc<Mutex<HashSet<i32>>>,
124
125
126
127
128
129
130












131
132
133
134
135
136
137
	uri: String,
	title: String,
	authors: String,
	summary: String,
}

impl Core {












	pub async fn new(settings: config::Config) -> Result<Core> {
		let owner_chat = ChatPeerId::from(settings.get_int("owner").stack()?);
		let api_key = settings.get_string("api_key").stack()?;
		let tg = Client::new(&api_key).stack()?
			.with_host(settings.get_string("api_gateway").stack()?);

		let mut client = reqwest::Client::builder();







>
>
>
>
>
>
>
>
>
>
>
>







127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
	uri: String,
	title: String,
	authors: String,
	summary: String,
}

impl Core {
	/// Create a Core instance from configuration and start its background autofetch loop.
	///
	/// The provided `settings` must include:
	/// - `owner` (integer): chat id to use as the default destination,
	/// - `api_key` (string): Telegram bot API key,
	/// - `api_gateway` (string): Telegram API gateway host,
	/// - `pg` (string): PostgreSQL connection string,
	/// - optional `proxy` (string): proxy URL for the HTTP client.
	///
	/// On success returns an initialized `Core` with Telegram and HTTP clients, database connection,
	/// an empty running set for per-id tokens, and a spawned background task that periodically runs
	/// `autofetch`. If any required setting is missing or initialization fails, an error is returned.
	pub async fn new(settings: config::Config) -> Result<Core> {
		let owner_chat = ChatPeerId::from(settings.get_int("owner").stack()?);
		let api_key = settings.get_string("api_key").stack()?;
		let tg = Client::new(&api_key).stack()?
			.with_host(settings.get_string("api_gateway").stack()?);

		let mut client = reqwest::Client::builder();
176
177
178
179
180
181
182
















183
184
185
186
187
188
189
		let target = target.unwrap_or(self.owner_chat);
		self.tg.execute(
			SendMessage::new(target, msg)
				.with_parse_mode(mode)
		).await.stack()
	}

















	pub async fn check (&self, id: i32, real: bool, last_scrape: Option<DateTime<Local>>) -> Result<String> {
		let mut posted: i32 = 0;
		let mut conn = self.db.begin().await.stack()?;

		let _token = Token::new(&self.running, id).await.stack()?;
		let source = conn.get_source(id, self.owner_chat).await.stack()?;
		conn.set_scrape(id).await.stack()?;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
		let target = target.unwrap_or(self.owner_chat);
		self.tg.execute(
			SendMessage::new(target, msg)
				.with_parse_mode(mode)
		).await.stack()
	}

	/// Fetches the feed for a source, sends any newly discovered posts to the appropriate chat, and records them in the database.
	///
	/// This acquires a per-source guard to prevent concurrent checks for the same `id`. If a check is already running for
	/// the given `id`, the function returns an error. If `last_scrape` is provided, it is sent as the `If-Modified-Since`
	/// header to the feed request. The function parses RSS or Atom feeds, sends unseen post URLs to either the source's
	/// channel (when `real` is true) or the source owner (when `real` is false), and persists posted entries so they are
	/// not reposted later.
	///
	/// Parameters:
	/// - `id`: Identifier of the source to check.
	/// - `real`: When `true`, send posts to the source's channel; when `false`, send to the source owner.
	/// - `last_scrape`: Optional timestamp used to set the `If-Modified-Since` header for the HTTP request.
	///
	/// # Returns
	///
	/// `Posted: N` where `N` is the number of posts processed and sent.
	pub async fn check (&self, id: i32, real: bool, last_scrape: Option<DateTime<Local>>) -> Result<String> {
		let mut posted: i32 = 0;
		let mut conn = self.db.begin().await.stack()?;

		let _token = Token::new(&self.running, id).await.stack()?;
		let source = conn.get_source(id, self.owner_chat).await.stack()?;
		conn.set_scrape(id).await.stack()?;
212
213
214
215
216
217
218

219






220
221
222
223
224
225
226
		match rss::Channel::read_from(&content[..]) {
			Ok(feed) => {
				for item in feed.items() {
					if let Some(link) = item.link() {
						let date = match item.pub_date() {
							Some(feed_date) => DateTime::parse_from_rfc2822(feed_date),
							None => DateTime::parse_from_rfc3339(match item.dublin_core_ext() {

								Some(dates) => &dates.dates()[0],






								None => bail!("Feed item misses posting date."),
							}),
						}.stack()?;
						let uri = link.to_string();
						let title = item.title().unwrap_or("").to_string();
						let authors = item.author().unwrap_or("").to_string();
						let summary = item.content().unwrap_or("").to_string();







>
|
>
>
>
>
>
>







243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
		match rss::Channel::read_from(&content[..]) {
			Ok(feed) => {
				for item in feed.items() {
					if let Some(link) = item.link() {
						let date = match item.pub_date() {
							Some(feed_date) => DateTime::parse_from_rfc2822(feed_date),
							None => DateTime::parse_from_rfc3339(match item.dublin_core_ext() {
								Some(ext) => {
									let dates = ext.dates();
									if dates.is_empty() {
										bail!("Feed item has Dublin Core extension but no dates.")
									} else {
										&dates[0]
									}
								},
								None => bail!("Feed item misses posting date."),
							}),
						}.stack()?;
						let uri = link.to_string();
						let title = item.title().unwrap_or("").to_string();
						let authors = item.author().unwrap_or("").to_string();
						let summary = item.content().unwrap_or("").to_string();