2023-05-15 18:33:14 +00:00
|
|
|
/*
|
|
|
|
Program: Bibliofile
|
|
|
|
Language: Rustc 1.69.0
|
|
|
|
ide: CLion
|
|
|
|
Operating system: Fedora 38/WSL
|
|
|
|
Purpose: This class is meant to process and return HTML formatted text as strings.
|
2023-05-19 03:01:14 +00:00
|
|
|
Last edited: 5/18/23
|
2023-05-15 18:33:14 +00:00
|
|
|
*/
|
|
|
|
|
2023-05-19 23:15:48 +00:00
|
|
|
|
|
|
|
use regex::Regex;
|
2023-05-19 03:01:14 +00:00
|
|
|
use epub::archive;
|
2023-05-19 23:15:48 +00:00
|
|
|
use soup::{NodeExt, QueryBuilderExt, Soup};
|
2023-05-15 18:33:14 +00:00
|
|
|
|
|
|
|
|
2023-05-19 03:01:14 +00:00
|
|
|
pub fn main(content: String) -> String{
|
|
|
|
|
|
|
|
println!("IF YOU CAN READ THIS, I HAVE ENTERED THE HTML MODULE");
|
2023-05-19 23:15:48 +00:00
|
|
|
|
|
|
|
|
2023-05-19 03:01:14 +00:00
|
|
|
let mut str_content = content;
|
2023-05-19 23:15:48 +00:00
|
|
|
let soup = Soup::new(&str_content);
|
|
|
|
let results = soup.tag(true)
|
|
|
|
.find_all()
|
|
|
|
.map(|tag| tag.name().to_string())
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
assert_eq!(results, vec![
|
|
|
|
"html".to_string(),
|
|
|
|
"head".to_string(),
|
|
|
|
"body".to_string(),
|
|
|
|
"p".to_string(),
|
|
|
|
"b".to_string(),
|
|
|
|
]);
|
|
|
|
|
|
|
|
return results.join("\n");
|
2023-05-15 18:33:14 +00:00
|
|
|
}
|