fix: fix mikan web extractors
This commit is contained in:
@@ -1,3 +1,11 @@
|
||||
pub mod styles;
|
||||
|
||||
pub use styles::parse_style_attr;
|
||||
use html_escape::decode_html_entities;
|
||||
use itertools::Itertools;
|
||||
use scraper::ElementRef;
|
||||
pub use styles::{extract_background_image_src_from_style_attr, extract_style_from_attr};
|
||||
|
||||
pub fn extract_inner_text_from_element_ref(el: ElementRef<'_>) -> String {
|
||||
let raw_text = el.text().collect_vec().join(",");
|
||||
decode_html_entities(&raw_text).trim().to_string()
|
||||
}
|
||||
|
||||
@@ -1,6 +1,45 @@
|
||||
use lightningcss::declaration::DeclarationBlock;
|
||||
use lightningcss::{
|
||||
declaration::DeclarationBlock, properties::Property, values::image::Image as CSSImage,
|
||||
};
|
||||
use url::Url;
|
||||
|
||||
pub fn parse_style_attr(style_attr: &str) -> Option<DeclarationBlock> {
|
||||
use crate::extract::media::extract_image_src_from_str;
|
||||
|
||||
pub fn extract_style_from_attr(style_attr: &str) -> Option<DeclarationBlock> {
|
||||
let result = DeclarationBlock::parse_string(style_attr, Default::default()).ok()?;
|
||||
Some(result)
|
||||
}
|
||||
|
||||
pub fn extract_background_image_src_from_style_attr(
|
||||
style_attr: &str,
|
||||
base_url: &Url,
|
||||
) -> Option<Url> {
|
||||
extract_style_from_attr(style_attr).and_then(|style| {
|
||||
style.iter().find_map(|(prop, _)| {
|
||||
match prop {
|
||||
Property::BackgroundImage(images) => {
|
||||
for img in images {
|
||||
if let CSSImage::Url(path) = img {
|
||||
if let Some(url) = extract_image_src_from_str(path.url.trim(), base_url)
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Property::Background(backgrounds) => {
|
||||
for bg in backgrounds {
|
||||
if let CSSImage::Url(path) = &bg.image {
|
||||
if let Some(url) = extract_image_src_from_str(path.url.trim(), base_url)
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
None
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user