Diff
Logged in as anonymous

Differences From Artifact [a705590999]:

To Artifact [85ea533bba]:


1
2



3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18
19
20
21
22
23



24
25
26
27
28




29
30
31
32
33
1
2
3
4
5
6
7

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


25
26
27





28
29
30
31


32

33


+
+
+


-







+









-
-
+
+
+
-
-
-
-
-
+
+
+
+
-
-

-

use crate::Cursor;

use std::borrow::Cow;

use html_escape::encode_text;
use lazy_static::lazy_static;
use regex::Regex;
use scraper::Html;
use stacked_errors::{
	bail,
	Result,
};

lazy_static! {
	pub static ref RE_DOMAIN: Regex = Regex::new(r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$").unwrap();
	pub static ref RE_CLOSING: Regex = Regex::new(r"</[ \t]*(pre|code)[ \t]*>").unwrap();
}

/// `Attachment` object to store number attachment data and corresponding file name
#[derive(Debug)]
pub struct Attachment {
	pub data: Cursor<Vec<u8>>,
	pub name: String,
}

/// Pass any text here to be validated as HTML, breaks on validation errors
pub fn validate (text: &str) -> Result<&str> {
/// Pass any text here to be validated as not breaking from Telegram preformatted blocks
/// escape all HTML chars afterwards
pub fn validate (text: &str) -> Result<Cow<'_, str>> {
	// Technically full validation is not needed nor required here, all text after validation
	// is used in Telegram messages as RAW text enclosed in `pre`/`code` tags, so the only reason
	// for this check is to make sure there's no dangling closing tags in the text that might
	// break Telegram message formatting
	let fragment = Html::parse_fragment(text);
	if RE_CLOSING.is_match(text) {
		bail!("Telegram closing tag found.");
	} else {
		Ok(encode_text(text))
	if !fragment.errors.is_empty() {
		bail!(fragment.errors.join("\n"));
	}
	Ok(text)
}