Skip to main content

hashiverse_lib/tools/
plain_text_post.rs

1//! # Plain-text → hashiverse HTML conversion
2//!
3//! Hashiverse posts are stored and transmitted as a constrained subset of HTML (so that
4//! rich posts from the web client, API clients, and plain-text API clients are all the
5//! same format on the wire). This module provides the one-way convenience path for
6//! callers that have nothing but a string of text — mainly the Python client, plain-text
7//! API integrations, and quick CLI posts.
8//!
9//! The output is the same HTML shape produced by the Tiptap editor in the web client:
10//! HTML-escaped body, `#hashtag` tokens rewritten as `<hashtag>` elements, `@<64-hex-id>`
11//! mentions rewritten as `<mention>` elements, and literal newlines turned into `<br>`.
12//! `submit_post()` then parses the result into the canonical on-wire representation.
13
14/// Converts a plain-text post into well-formed HTML that `submit_post()` can parse.
15///
16/// - HTML-escapes `<`, `>`, `&`, `"` in the input to prevent injection
17/// - Converts `#hashtag` patterns into `<hashtag hashtag="...">` elements
18/// - Converts `@<64-hex-char-id>` patterns into `<mention client_id="...">` elements
19/// - Converts newlines into `<br>` tags
20pub fn convert_text_to_hashiverse_html(text: &str) -> String {
21    let escaped = html_escape(text);
22    let chars: Vec<char> = escaped.chars().collect();
23    let len = chars.len();
24    let mut output = String::with_capacity(escaped.len() * 2);
25    let mut i = 0;
26
27    while i < len {
28        match chars[i] {
29            '#' => {
30                let start = i + 1;
31                let mut end = start;
32                while end < len && chars[end].is_alphanumeric() {
33                    end += 1;
34                }
35                if end > start {
36                    let hashtag_text: String = chars[start..end].iter().collect();
37                    let hashtag_lower = hashtag_text.to_lowercase();
38                    output.push_str(&format!(
39                        "<hashtag hashtag=\"{}\"><span class=\"plugin-hashtag-left\">#</span><span class=\"plugin-hashtag-right\">{}</span></hashtag>",
40                        hashtag_lower, hashtag_text
41                    ));
42                    i = end;
43                } else {
44                    output.push('#');
45                    i += 1;
46                }
47            }
48            '@' => {
49                let start = i + 1;
50                let mut end = start;
51                while end < len && end - start < 64 && is_hex_char(chars[end]) {
52                    end += 1;
53                }
54                let hex_len = end - start;
55                // Must be exactly 64 hex chars, and the next char (if any) must NOT be hex
56                // to avoid matching a prefix of a longer hex string
57                if hex_len == 64 && (end >= len || !is_hex_char(chars[end])) {
58                    let hex_string: String = chars[start..end].iter().collect();
59                    output.push_str(&format!("<mention client_id=\"{}\"></mention>", hex_string));
60                    i = end;
61                } else {
62                    output.push('@');
63                    i += 1;
64                }
65            }
66            '\n' => {
67                output.push_str("<br>");
68                i += 1;
69            }
70            '\r' => {
71                // Skip carriage returns — \r\n is handled by skipping \r and emitting <br> on \n
72                i += 1;
73            }
74            c => {
75                output.push(c);
76                i += 1;
77            }
78        }
79    }
80
81    output
82}
83
84fn html_escape(text: &str) -> String {
85    // Reserve a little more room in case we escape
86    let mut escaped = String::with_capacity(11 * text.len() / 10);
87    for c in text.chars() {
88        match c {
89            '&' => escaped.push_str("&amp;"),
90            '<' => escaped.push_str("&lt;"),
91            '>' => escaped.push_str("&gt;"),
92            '"' => escaped.push_str("&quot;"),
93            other => escaped.push(other),
94        }
95    }
96    escaped
97}
98
99fn is_hex_char(c: char) -> bool {
100    c.is_ascii_hexdigit()
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106
107    // --- Hashtag tests ---
108
109    #[test]
110    fn test_hashtag_at_start() {
111        let result = convert_text_to_hashiverse_html("#rust is great");
112        assert!(result.contains("<hashtag hashtag=\"rust\">"));
113        assert!(result.contains("<span class=\"plugin-hashtag-right\">rust</span>"));
114        assert!(result.ends_with(" is great"));
115    }
116
117    #[test]
118    fn test_hashtag_at_end() {
119        let result = convert_text_to_hashiverse_html("hello #rust");
120        assert!(result.starts_with("hello "));
121        assert!(result.contains("<hashtag hashtag=\"rust\">"));
122    }
123
124    #[test]
125    fn test_hashtag_in_middle() {
126        let result = convert_text_to_hashiverse_html("I love #rust programming");
127        assert!(result.contains("<hashtag hashtag=\"rust\">"));
128        assert!(result.contains(" programming"));
129    }
130
131    #[test]
132    fn test_multiple_hashtags() {
133        let result = convert_text_to_hashiverse_html("#rust and #golang");
134        assert!(result.contains("<hashtag hashtag=\"rust\">"));
135        assert!(result.contains("<hashtag hashtag=\"golang\">"));
136    }
137
138    #[test]
139    fn test_adjacent_hashtags() {
140        let result = convert_text_to_hashiverse_html("#rust#golang");
141        assert!(result.contains("<hashtag hashtag=\"rust\">"));
142        assert!(result.contains("<hashtag hashtag=\"golang\">"));
143    }
144
145    #[test]
146    fn test_hashtag_case_lowered_in_attribute() {
147        let result = convert_text_to_hashiverse_html("#RuStLang");
148        assert!(result.contains("hashtag=\"rustlang\""));
149        // The display text preserves original case
150        assert!(result.contains("<span class=\"plugin-hashtag-right\">RuStLang</span>"));
151    }
152
153    #[test]
154    fn test_bare_hash_no_alphanumeric() {
155        assert_eq!(convert_text_to_hashiverse_html("# alone"), "# alone");
156    }
157
158    #[test]
159    fn test_hash_at_end_of_string() {
160        assert_eq!(convert_text_to_hashiverse_html("test #"), "test #");
161    }
162
163    #[test]
164    fn test_unicode_hashtag() {
165        let result = convert_text_to_hashiverse_html("#日本語");
166        assert!(result.contains("hashtag=\"日本語\""));
167        assert!(result.contains("<span class=\"plugin-hashtag-right\">日本語</span>"));
168    }
169
170    #[test]
171    fn test_hashtag_with_numbers() {
172        let result = convert_text_to_hashiverse_html("#web3");
173        assert!(result.contains("hashtag=\"web3\""));
174    }
175
176    #[test]
177    fn test_hashtag_terminated_by_punctuation() {
178        let result = convert_text_to_hashiverse_html("#rust, nice");
179        assert!(result.contains("<hashtag hashtag=\"rust\">"));
180        assert!(result.contains("</hashtag>, nice"));
181    }
182
183    // --- Mention tests ---
184
185    #[test]
186    fn test_valid_mention() {
187        let hex_id = "a".repeat(64);
188        let input = format!("hello @{} world", hex_id);
189        let result = convert_text_to_hashiverse_html(&input);
190        assert!(result.contains(&format!("<mention client_id=\"{}\"></mention>", hex_id)));
191        assert!(result.starts_with("hello "));
192        assert!(result.ends_with(" world"));
193    }
194
195    #[test]
196    fn test_mention_mixed_case_hex() {
197        let hex_id = "aAbBcCdDeEfF0011223344556677889900112233445566778899aAbBcCdDeEfF";
198        assert_eq!(hex_id.len(), 64);
199        let input = format!("@{}", hex_id);
200        let result = convert_text_to_hashiverse_html(&input);
201        assert!(result.contains(&format!("<mention client_id=\"{}\"></mention>", hex_id)));
202    }
203
204    #[test]
205    fn test_mention_too_short() {
206        let result = convert_text_to_hashiverse_html("@abcdef");
207        assert_eq!(result, "@abcdef");
208        assert!(!result.contains("<mention"));
209    }
210
211    #[test]
212    fn test_mention_non_hex_after_at() {
213        let result = convert_text_to_hashiverse_html("@hello");
214        assert_eq!(result, "@hello");
215    }
216
217    #[test]
218    fn test_bare_at() {
219        assert_eq!(convert_text_to_hashiverse_html("@"), "@");
220    }
221
222    #[test]
223    fn test_at_end_of_string() {
224        assert_eq!(convert_text_to_hashiverse_html("test @"), "test @");
225    }
226
227    #[test]
228    fn test_mention_65_hex_chars_not_matched() {
229        // 65 hex chars — should NOT match as a mention (next char is also hex)
230        let hex_65 = "a".repeat(65);
231        let input = format!("@{}", hex_65);
232        let result = convert_text_to_hashiverse_html(&input);
233        assert!(!result.contains("<mention"));
234    }
235
236    #[test]
237    fn test_mention_64_hex_then_non_hex() {
238        let hex_id = "b".repeat(64);
239        let input = format!("@{}xyz", hex_id);
240        let result = convert_text_to_hashiverse_html(&input);
241        assert!(result.contains(&format!("<mention client_id=\"{}\"></mention>", hex_id)));
242        assert!(result.ends_with("xyz"));
243    }
244
245    // --- HTML escaping tests ---
246
247    #[test]
248    fn test_html_injection_escaped() {
249        let result = convert_text_to_hashiverse_html("<script>alert(1)</script>");
250        assert!(result.contains("&lt;script&gt;"));
251        assert!(!result.contains("<script>"));
252    }
253
254    #[test]
255    fn test_ampersand_escaped() {
256        let result = convert_text_to_hashiverse_html("AT&T");
257        assert_eq!(result, "AT&amp;T");
258    }
259
260    #[test]
261    fn test_quotes_escaped() {
262        let result = convert_text_to_hashiverse_html("he said \"hello\"");
263        assert!(result.contains("&quot;"));
264    }
265
266    // --- Newline tests ---
267
268    #[test]
269    fn test_newline_to_br() {
270        let result = convert_text_to_hashiverse_html("line1\nline2");
271        assert_eq!(result, "line1<br>line2");
272    }
273
274    #[test]
275    fn test_crlf_to_br() {
276        let result = convert_text_to_hashiverse_html("line1\r\nline2");
277        assert_eq!(result, "line1<br>line2");
278    }
279
280    #[test]
281    fn test_bare_cr_skipped() {
282        let result = convert_text_to_hashiverse_html("line1\rline2");
283        assert_eq!(result, "line1line2");
284    }
285
286    // --- Combined tests ---
287
288    #[test]
289    fn test_combined_post() {
290        let hex_id = "c".repeat(64);
291        let input = format!("Hello #hashiverse from @{}!\nGreat to be here.", hex_id);
292        let result = convert_text_to_hashiverse_html(&input);
293        assert!(result.contains("<hashtag hashtag=\"hashiverse\">"));
294        assert!(result.contains(&format!("<mention client_id=\"{}\"></mention>", hex_id)));
295        assert!(result.contains("<br>"));
296        assert!(result.contains("Great to be here."));
297    }
298
299    #[test]
300    fn test_empty_string() {
301        assert_eq!(convert_text_to_hashiverse_html(""), "");
302    }
303
304    #[test]
305    fn test_plain_text_no_specials() {
306        assert_eq!(convert_text_to_hashiverse_html("just a normal post"), "just a normal post");
307    }
308
309    // --- Round-trip test: verify scraper can parse the output the same way submit_post does ---
310
311    #[test]
312    fn test_round_trip_hashtag_extraction() {
313        let result = convert_text_to_hashiverse_html("I love #Rust and #golang");
314        let html = scraper::Html::parse_fragment(&result);
315        let selector = scraper::Selector::parse("hashtag").unwrap();
316        let hashtags: Vec<&str> = html.select(&selector)
317            .filter_map(|el| el.attr("hashtag"))
318            .collect();
319        assert_eq!(hashtags, vec!["rust", "golang"]);
320    }
321
322    #[test]
323    fn test_round_trip_mention_extraction() {
324        let hex_id = "d".repeat(64);
325        let result = convert_text_to_hashiverse_html(&format!("hello @{}", hex_id));
326        let html = scraper::Html::parse_fragment(&result);
327        let selector = scraper::Selector::parse("mention").unwrap();
328        let client_ids: Vec<&str> = html.select(&selector)
329            .filter_map(|el| el.attr("client_id"))
330            .collect();
331        assert_eq!(client_ids, vec![hex_id.as_str()]);
332    }
333
334    #[test]
335    fn test_round_trip_combined() {
336        let hex_id = "e".repeat(64);
337        let input = format!("#hashiverse post by @{} about #Rust", hex_id);
338        let result = convert_text_to_hashiverse_html(&input);
339        let html = scraper::Html::parse_fragment(&result);
340
341        let hashtag_selector = scraper::Selector::parse("hashtag").unwrap();
342        let hashtags: Vec<&str> = html.select(&hashtag_selector)
343            .filter_map(|el| el.attr("hashtag"))
344            .collect();
345        assert_eq!(hashtags, vec!["hashiverse", "rust"]);
346
347        let mention_selector = scraper::Selector::parse("mention").unwrap();
348        let client_ids: Vec<&str> = html.select(&mention_selector)
349            .filter_map(|el| el.attr("client_id"))
350            .collect();
351        assert_eq!(client_ids, vec![hex_id.as_str()]);
352    }
353}