fix: fix mikan web extractors
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
use std::{borrow::Cow, error::Error as StdError};
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@@ -16,4 +18,19 @@ pub enum ExtractError {
|
||||
MikanRssFormatError { url: String },
|
||||
#[error("Parse mikan rss item format error, {reason}")]
|
||||
MikanRssItemFormatError { reason: String },
|
||||
#[error("Missing field {field} in extracting meta")]
|
||||
MikanMetaMissingFieldError {
|
||||
field: Cow<'static, str>,
|
||||
#[source]
|
||||
source: Option<Box<dyn StdError + Send + Sync>>,
|
||||
},
|
||||
}
|
||||
|
||||
impl ExtractError {
|
||||
pub fn from_mikan_meta_missing_field(field: Cow<'static, str>) -> Self {
|
||||
Self::MikanMetaMissingFieldError {
|
||||
field,
|
||||
source: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
pub mod styles;
|
||||
|
||||
pub use styles::parse_style_attr;
|
||||
use html_escape::decode_html_entities;
|
||||
use itertools::Itertools;
|
||||
use scraper::ElementRef;
|
||||
pub use styles::{extract_background_image_src_from_style_attr, extract_style_from_attr};
|
||||
|
||||
pub fn extract_inner_text_from_element_ref(el: ElementRef<'_>) -> String {
|
||||
let raw_text = el.text().collect_vec().join(",");
|
||||
decode_html_entities(&raw_text).trim().to_string()
|
||||
}
|
||||
|
||||
@@ -1,6 +1,45 @@
|
||||
use lightningcss::declaration::DeclarationBlock;
|
||||
use lightningcss::{
|
||||
declaration::DeclarationBlock, properties::Property, values::image::Image as CSSImage,
|
||||
};
|
||||
use url::Url;
|
||||
|
||||
pub fn parse_style_attr(style_attr: &str) -> Option<DeclarationBlock> {
|
||||
use crate::extract::media::extract_image_src_from_str;
|
||||
|
||||
pub fn extract_style_from_attr(style_attr: &str) -> Option<DeclarationBlock> {
|
||||
let result = DeclarationBlock::parse_string(style_attr, Default::default()).ok()?;
|
||||
Some(result)
|
||||
}
|
||||
|
||||
pub fn extract_background_image_src_from_style_attr(
|
||||
style_attr: &str,
|
||||
base_url: &Url,
|
||||
) -> Option<Url> {
|
||||
extract_style_from_attr(style_attr).and_then(|style| {
|
||||
style.iter().find_map(|(prop, _)| {
|
||||
match prop {
|
||||
Property::BackgroundImage(images) => {
|
||||
for img in images {
|
||||
if let CSSImage::Url(path) = img {
|
||||
if let Some(url) = extract_image_src_from_str(path.url.trim(), base_url)
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Property::Background(backgrounds) => {
|
||||
for bg in backgrounds {
|
||||
if let CSSImage::Url(path) = &bg.image {
|
||||
if let Some(url) = extract_image_src_from_str(path.url.trim(), base_url)
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
None
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
8
apps/recorder/src/extract/media/mod.rs
Normal file
8
apps/recorder/src/extract/media/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
use url::Url;
|
||||
|
||||
pub fn extract_image_src_from_str(image_src: &str, base_url: &Url) -> Option<Url> {
|
||||
let mut image_url = base_url.join(image_src).ok()?;
|
||||
image_url.set_query(None);
|
||||
image_url.set_fragment(None);
|
||||
Some(image_url)
|
||||
}
|
||||
@@ -3,15 +3,17 @@ use std::ops::Deref;
|
||||
use async_trait::async_trait;
|
||||
use loco_rs::app::{AppContext, Initializer};
|
||||
use once_cell::sync::OnceCell;
|
||||
use url::Url;
|
||||
|
||||
use super::AppMikanConfig;
|
||||
use crate::{config::AppConfigExt, fetch::HttpClient};
|
||||
|
||||
static APP_MIKAN_CLIENT: OnceCell<AppMikanClient> = OnceCell::new();
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AppMikanClient {
|
||||
http_client: HttpClient,
|
||||
base_url: String,
|
||||
base_url: Url,
|
||||
}
|
||||
|
||||
impl AppMikanClient {
|
||||
@@ -31,7 +33,7 @@ impl AppMikanClient {
|
||||
.expect("AppMikanClient is not initialized")
|
||||
}
|
||||
|
||||
pub fn base_url(&self) -> &str {
|
||||
pub fn base_url(&self) -> &Url {
|
||||
&self.base_url
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use url::Url;
|
||||
|
||||
use crate::fetch::HttpClientConfig;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct AppMikanConfig {
|
||||
pub http_client: HttpClientConfig,
|
||||
pub base_url: String,
|
||||
pub base_url: Url,
|
||||
}
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
pub mod client;
|
||||
pub mod config;
|
||||
pub mod constants;
|
||||
pub mod rss_parser;
|
||||
pub mod web_parser;
|
||||
pub mod rss_extract;
|
||||
pub mod web_extract;
|
||||
|
||||
pub use client::{AppMikanClient, AppMikanClientInitializer};
|
||||
pub use config::AppMikanConfig;
|
||||
pub use constants::MIKAN_BUCKET_KEY;
|
||||
pub use rss_parser::{
|
||||
build_mikan_bangumi_rss_link, build_mikan_subscriber_aggregation_rss_link,
|
||||
parse_mikan_bangumi_id_from_rss_link, parse_mikan_rss_channel_from_rss_link,
|
||||
parse_mikan_rss_items_from_rss_link, parse_mikan_subscriber_aggregation_id_from_rss_link,
|
||||
pub use rss_extract::{
|
||||
MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanBangumiRssLink,
|
||||
MikanRssChannel, MikanRssItem, MikanSubscriberAggregationRssChannel,
|
||||
MikanSubscriberAggregationRssLink,
|
||||
MikanSubscriberAggregationRssLink, build_mikan_bangumi_rss_link,
|
||||
build_mikan_subscriber_aggregation_rss_link, extract_mikan_bangumi_id_from_rss_link,
|
||||
extract_mikan_subscriber_aggregation_id_from_rss_link, parse_mikan_rss_channel_from_rss_link,
|
||||
parse_mikan_rss_items_from_rss_link,
|
||||
};
|
||||
pub use web_parser::{
|
||||
build_mikan_bangumi_homepage, build_mikan_episode_homepage,
|
||||
parse_mikan_bangumi_meta_from_mikan_homepage, parse_mikan_episode_meta_from_mikan_homepage,
|
||||
MikanBangumiMeta, MikanEpisodeMeta,
|
||||
pub use web_extract::{
|
||||
MikanBangumiMeta, MikanEpisodeMeta, build_mikan_bangumi_homepage, build_mikan_episode_homepage,
|
||||
extract_mikan_bangumi_meta_from_bangumi_homepage,
|
||||
extract_mikan_episode_meta_from_episode_homepage,
|
||||
};
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::ops::Deref;
|
||||
|
||||
use chrono::DateTime;
|
||||
use color_eyre::eyre;
|
||||
use itertools::Itertools;
|
||||
use reqwest::IntoUrl;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -10,8 +11,8 @@ use crate::{
|
||||
extract::{
|
||||
errors::ExtractError,
|
||||
mikan::{
|
||||
web_parser::{parse_mikan_episode_id_from_homepage, MikanEpisodeHomepage},
|
||||
AppMikanClient,
|
||||
web_extract::{MikanEpisodeHomepage, parse_mikan_episode_id_from_homepage},
|
||||
},
|
||||
},
|
||||
fetch::bytes::fetch_bytes,
|
||||
@@ -163,11 +164,11 @@ pub struct MikanSubscriberAggregationRssLink {
|
||||
}
|
||||
|
||||
pub fn build_mikan_bangumi_rss_link(
|
||||
mikan_base_url: &str,
|
||||
mikan_base_url: impl IntoUrl,
|
||||
mikan_bangumi_id: &str,
|
||||
mikan_fansub_id: Option<&str>,
|
||||
) -> color_eyre::eyre::Result<Url> {
|
||||
let mut url = Url::parse(mikan_base_url)?;
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = mikan_base_url.into_url()?;
|
||||
url.set_path("/RSS/Bangumi");
|
||||
url.query_pairs_mut()
|
||||
.append_pair("bangumiId", mikan_bangumi_id);
|
||||
@@ -181,7 +182,7 @@ pub fn build_mikan_bangumi_rss_link(
|
||||
pub fn build_mikan_subscriber_aggregation_rss_link(
|
||||
mikan_base_url: &str,
|
||||
mikan_aggregation_id: &str,
|
||||
) -> color_eyre::eyre::Result<Url> {
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = Url::parse(mikan_base_url)?;
|
||||
url.set_path("/RSS/MyBangumi");
|
||||
url.query_pairs_mut()
|
||||
@@ -189,7 +190,7 @@ pub fn build_mikan_subscriber_aggregation_rss_link(
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn parse_mikan_bangumi_id_from_rss_link(url: &Url) -> Option<MikanBangumiRssLink> {
|
||||
pub fn extract_mikan_bangumi_id_from_rss_link(url: &Url) -> Option<MikanBangumiRssLink> {
|
||||
if url.path() == "/RSS/Bangumi" {
|
||||
url.query_pairs()
|
||||
.find(|(k, _)| k == "bangumiId")
|
||||
@@ -205,7 +206,7 @@ pub fn parse_mikan_bangumi_id_from_rss_link(url: &Url) -> Option<MikanBangumiRss
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_mikan_subscriber_aggregation_id_from_rss_link(
|
||||
pub fn extract_mikan_subscriber_aggregation_id_from_rss_link(
|
||||
url: &Url,
|
||||
) -> Option<MikanSubscriberAggregationRssLink> {
|
||||
if url.path() == "/RSS/MyBangumi" {
|
||||
@@ -222,7 +223,7 @@ pub fn parse_mikan_subscriber_aggregation_id_from_rss_link(
|
||||
pub async fn parse_mikan_rss_items_from_rss_link(
|
||||
client: Option<&AppMikanClient>,
|
||||
url: impl IntoUrl,
|
||||
) -> color_eyre::eyre::Result<Vec<MikanRssItem>> {
|
||||
) -> eyre::Result<Vec<MikanRssItem>> {
|
||||
let channel = parse_mikan_rss_channel_from_rss_link(client, url).await?;
|
||||
|
||||
Ok(channel.into_items())
|
||||
@@ -231,7 +232,7 @@ pub async fn parse_mikan_rss_items_from_rss_link(
|
||||
pub async fn parse_mikan_rss_channel_from_rss_link(
|
||||
client: Option<&AppMikanClient>,
|
||||
url: impl IntoUrl,
|
||||
) -> color_eyre::eyre::Result<MikanRssChannel> {
|
||||
) -> eyre::Result<MikanRssChannel> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let bytes = fetch_bytes(http_client, url.as_str()).await?;
|
||||
|
||||
@@ -242,7 +243,7 @@ pub async fn parse_mikan_rss_channel_from_rss_link(
|
||||
if let Some(MikanBangumiRssLink {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id,
|
||||
}) = parse_mikan_bangumi_id_from_rss_link(&channel_link)
|
||||
}) = extract_mikan_bangumi_id_from_rss_link(&channel_link)
|
||||
{
|
||||
let channel_name = channel.title().replace("Mikan Project - ", "");
|
||||
|
||||
@@ -274,7 +275,7 @@ pub async fn parse_mikan_rss_channel_from_rss_link(
|
||||
} else if let Some(MikanSubscriberAggregationRssLink {
|
||||
mikan_aggregation_id,
|
||||
..
|
||||
}) = parse_mikan_subscriber_aggregation_id_from_rss_link(&channel_link)
|
||||
}) = extract_mikan_subscriber_aggregation_id_from_rss_link(&channel_link)
|
||||
{
|
||||
let items = channel
|
||||
.items
|
||||
@@ -304,8 +305,8 @@ mod tests {
|
||||
|
||||
use crate::{
|
||||
extract::mikan::{
|
||||
parse_mikan_rss_channel_from_rss_link, MikanBangumiAggregationRssChannel,
|
||||
MikanBangumiRssChannel, MikanRssChannel,
|
||||
MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanRssChannel,
|
||||
parse_mikan_rss_channel_from_rss_link,
|
||||
},
|
||||
sync::core::BITTORRENT_MIME_TYPE,
|
||||
};
|
||||
@@ -333,10 +334,12 @@ mod tests {
|
||||
|
||||
assert_eq!(first_sub_item.mime, BITTORRENT_MIME_TYPE);
|
||||
|
||||
assert!(&first_sub_item
|
||||
.homepage
|
||||
.as_str()
|
||||
.starts_with("https://mikanani.me/Home/Episode"));
|
||||
assert!(
|
||||
&first_sub_item
|
||||
.homepage
|
||||
.as_str()
|
||||
.starts_with("https://mikanani.me/Home/Episode")
|
||||
);
|
||||
|
||||
let name = first_sub_item.title.as_str();
|
||||
assert!(name.contains("葬送的芙莉莲"));
|
||||
644
apps/recorder/src/extract/mikan/web_extract.rs
Normal file
644
apps/recorder/src/extract/mikan/web_extract.rs
Normal file
@@ -0,0 +1,644 @@
|
||||
use std::{borrow::Cow, ops::Deref};
|
||||
|
||||
use bytes::Bytes;
|
||||
use color_eyre::eyre;
|
||||
use loco_rs::app::AppContext;
|
||||
use reqwest::IntoUrl;
|
||||
use scraper::{Html, Selector};
|
||||
use tracing::instrument;
|
||||
use url::Url;
|
||||
|
||||
use super::{
|
||||
AppMikanClient, MIKAN_BUCKET_KEY, MikanBangumiRssLink, extract_mikan_bangumi_id_from_rss_link,
|
||||
};
|
||||
use crate::{
|
||||
app::AppContextExt,
|
||||
dal::DalContentCategory,
|
||||
extract::{
|
||||
errors::ExtractError,
|
||||
html::{extract_background_image_src_from_style_attr, extract_inner_text_from_element_ref},
|
||||
media::extract_image_src_from_str,
|
||||
},
|
||||
fetch::{html::fetch_html, image::fetch_image},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanEpisodeMeta {
|
||||
pub homepage: Url,
|
||||
pub origin_poster_src: Option<Url>,
|
||||
pub bangumi_title: String,
|
||||
pub episode_title: String,
|
||||
pub fansub: String,
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: String,
|
||||
pub mikan_episode_id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanBangumiMeta {
|
||||
pub homepage: Url,
|
||||
pub origin_poster_src: Option<Url>,
|
||||
pub bangumi_title: String,
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: Option<String>,
|
||||
pub fansub: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanBangumiPosterMeta {
|
||||
pub origin_poster_src: Url,
|
||||
pub poster_data: Option<Bytes>,
|
||||
pub poster_src: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanEpisodeHomepage {
|
||||
pub mikan_episode_id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanBangumiHomepage {
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: Option<String>,
|
||||
}
|
||||
|
||||
pub fn build_mikan_bangumi_homepage(
|
||||
mikan_base_url: impl IntoUrl,
|
||||
mikan_bangumi_id: &str,
|
||||
mikan_fansub_id: Option<&str>,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = mikan_base_url.into_url()?;
|
||||
url.set_path(&format!("/Home/Bangumi/{mikan_bangumi_id}"));
|
||||
url.set_fragment(mikan_fansub_id);
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn build_mikan_episode_homepage(
|
||||
mikan_base_url: impl IntoUrl,
|
||||
mikan_episode_id: &str,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = mikan_base_url.into_url()?;
|
||||
url.set_path(&format!("/Home/Episode/{mikan_episode_id}"));
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn build_mikan_bangumi_expand_info_url(
|
||||
mikan_base_url: impl IntoUrl,
|
||||
mikan_bangumi_id: &str,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = mikan_base_url.into_url()?;
|
||||
url.set_path("/ExpandBangumi");
|
||||
url.query_pairs_mut()
|
||||
.append_pair("bangumiId", mikan_bangumi_id)
|
||||
.append_pair("showSubscribed", "true");
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn parse_mikan_bangumi_id_from_homepage(url: &Url) -> Option<MikanBangumiHomepage> {
|
||||
if url.path().starts_with("/Home/Bangumi/") {
|
||||
let mikan_bangumi_id = url.path().replace("/Home/Bangumi/", "");
|
||||
|
||||
Some(MikanBangumiHomepage {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id: url.fragment().map(String::from),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_mikan_episode_id_from_homepage(url: &Url) -> Option<MikanEpisodeHomepage> {
|
||||
if url.path().starts_with("/Home/Episode/") {
|
||||
let mikan_episode_id = url.path().replace("/Home/Episode/", "");
|
||||
Some(MikanEpisodeHomepage { mikan_episode_id })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn extract_mikan_poster_meta_from_src(
|
||||
client: Option<&AppMikanClient>,
|
||||
origin_poster_src_url: Url,
|
||||
) -> eyre::Result<MikanBangumiPosterMeta> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let poster_data = fetch_image(http_client, origin_poster_src_url.clone()).await?;
|
||||
Ok(MikanBangumiPosterMeta {
|
||||
origin_poster_src: origin_poster_src_url,
|
||||
poster_data: Some(poster_data),
|
||||
poster_src: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn extract_mikan_bangumi_poster_meta_from_src_with_cache(
|
||||
ctx: &AppContext,
|
||||
origin_poster_src_url: Url,
|
||||
subscriber_id: i32,
|
||||
) -> eyre::Result<MikanBangumiPosterMeta> {
|
||||
let dal_client = ctx.get_dal_client();
|
||||
let mikan_client = ctx.get_mikan_client();
|
||||
if let Some(poster_src) = dal_client
|
||||
.exists_object(
|
||||
DalContentCategory::Image,
|
||||
subscriber_id,
|
||||
Some(MIKAN_BUCKET_KEY),
|
||||
&origin_poster_src_url.path().replace("/images/Bangumi/", ""),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
return Ok(MikanBangumiPosterMeta {
|
||||
origin_poster_src: origin_poster_src_url,
|
||||
poster_data: None,
|
||||
poster_src: Some(poster_src.to_string()),
|
||||
});
|
||||
}
|
||||
|
||||
let poster_data =
|
||||
fetch_image(Some(mikan_client.deref()), origin_poster_src_url.clone()).await?;
|
||||
|
||||
let poster_str = dal_client
|
||||
.store_object(
|
||||
DalContentCategory::Image,
|
||||
subscriber_id,
|
||||
Some(MIKAN_BUCKET_KEY),
|
||||
&origin_poster_src_url.path().replace("/images/Bangumi/", ""),
|
||||
poster_data.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(MikanBangumiPosterMeta {
|
||||
origin_poster_src: origin_poster_src_url,
|
||||
poster_data: Some(poster_data),
|
||||
poster_src: Some(poster_str.to_string()),
|
||||
})
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(mikan_episode_homepage_url = mikan_episode_homepage_url.as_str()))]
|
||||
pub async fn extract_mikan_episode_meta_from_episode_homepage(
|
||||
client: Option<&AppMikanClient>,
|
||||
mikan_episode_homepage_url: Url,
|
||||
) -> eyre::Result<MikanEpisodeMeta> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let mikan_base_url = Url::parse(&mikan_episode_homepage_url.origin().unicode_serialization())?;
|
||||
let content = fetch_html(http_client, mikan_episode_homepage_url.as_str()).await?;
|
||||
|
||||
let html = Html::parse_document(&content);
|
||||
|
||||
let bangumi_title_selector =
|
||||
&Selector::parse(".bangumi-title > a[href^='/Home/Bangumi/']").unwrap();
|
||||
let mikan_bangumi_id_selector =
|
||||
&Selector::parse(".bangumi-title > a.mikan-rss[data-original-title='RSS']").unwrap();
|
||||
let bangumi_poster_selector = &Selector::parse(".bangumi-poster").unwrap();
|
||||
|
||||
let bangumi_title = html
|
||||
.select(bangumi_title_selector)
|
||||
.next()
|
||||
.map(extract_inner_text_from_element_ref)
|
||||
.ok_or_else(|| ExtractError::from_mikan_meta_missing_field(Cow::Borrowed("bangumi_title")))
|
||||
.inspect_err(|error| {
|
||||
tracing::warn!(error = %error);
|
||||
})?;
|
||||
|
||||
let MikanBangumiRssLink {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id,
|
||||
..
|
||||
} = html
|
||||
.select(mikan_bangumi_id_selector)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("href"))
|
||||
.and_then(|s| mikan_episode_homepage_url.join(s).ok())
|
||||
.and_then(|rss_link_url| extract_mikan_bangumi_id_from_rss_link(&rss_link_url))
|
||||
.ok_or_else(|| {
|
||||
ExtractError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_bangumi_id"))
|
||||
})
|
||||
.inspect_err(|error| tracing::error!(error = %error))?;
|
||||
|
||||
let mikan_fansub_id = mikan_fansub_id
|
||||
.ok_or_else(|| {
|
||||
ExtractError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_fansub_id"))
|
||||
})
|
||||
.inspect_err(|error| tracing::error!(error = %error))?;
|
||||
|
||||
let episode_title = html
|
||||
.select(&Selector::parse("title").unwrap())
|
||||
.next()
|
||||
.map(extract_inner_text_from_element_ref)
|
||||
.ok_or_else(|| ExtractError::from_mikan_meta_missing_field(Cow::Borrowed("episode_title")))
|
||||
.inspect_err(|error| {
|
||||
tracing::warn!(error = %error);
|
||||
})?;
|
||||
|
||||
let MikanEpisodeHomepage {
|
||||
mikan_episode_id, ..
|
||||
} = parse_mikan_episode_id_from_homepage(&mikan_episode_homepage_url)
|
||||
.ok_or_else(|| {
|
||||
ExtractError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_episode_id"))
|
||||
})
|
||||
.inspect_err(|error| {
|
||||
tracing::warn!(error = %error);
|
||||
})?;
|
||||
|
||||
let fansub_name = html
|
||||
.select(
|
||||
&Selector::parse(".bangumi-info a.magnet-link-wrap[href^='/Home/PublishGroup/']")
|
||||
.unwrap(),
|
||||
)
|
||||
.next()
|
||||
.map(extract_inner_text_from_element_ref)
|
||||
.ok_or_else(|| ExtractError::from_mikan_meta_missing_field(Cow::Borrowed("fansub_name")))
|
||||
.inspect_err(|error| {
|
||||
tracing::warn!(error = %error);
|
||||
})?;
|
||||
|
||||
let origin_poster_src = html.select(bangumi_poster_selector).next().and_then(|el| {
|
||||
el.value()
|
||||
.attr("data-src")
|
||||
.and_then(|data_src| extract_image_src_from_str(data_src, &mikan_base_url))
|
||||
.or_else(|| {
|
||||
el.value().attr("style").and_then(|style| {
|
||||
extract_background_image_src_from_style_attr(style, &mikan_base_url)
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
tracing::trace!(
|
||||
bangumi_title,
|
||||
mikan_bangumi_id,
|
||||
episode_title,
|
||||
mikan_episode_id,
|
||||
origin_poster_src = origin_poster_src.as_ref().map(|url| url.as_str()),
|
||||
fansub_name,
|
||||
mikan_fansub_id,
|
||||
"mikan episode meta extracted"
|
||||
);
|
||||
|
||||
Ok(MikanEpisodeMeta {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id,
|
||||
bangumi_title,
|
||||
episode_title,
|
||||
homepage: mikan_episode_homepage_url,
|
||||
origin_poster_src,
|
||||
fansub: fansub_name,
|
||||
mikan_episode_id,
|
||||
})
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(mikan_bangumi_homepage_url = mikan_bangumi_homepage_url.as_str()))]
|
||||
pub async fn extract_mikan_bangumi_meta_from_bangumi_homepage(
|
||||
client: Option<&AppMikanClient>,
|
||||
mikan_bangumi_homepage_url: Url,
|
||||
) -> eyre::Result<MikanBangumiMeta> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let mikan_base_url = Url::parse(&mikan_bangumi_homepage_url.origin().unicode_serialization())?;
|
||||
let content = fetch_html(http_client, mikan_bangumi_homepage_url.as_str()).await?;
|
||||
let html = Html::parse_document(&content);
|
||||
|
||||
let bangumi_title_selector = &Selector::parse(".bangumi-title").unwrap();
|
||||
let mikan_bangumi_id_selector =
|
||||
&Selector::parse(".bangumi-title > .mikan-rss[data-original-title='RSS']").unwrap();
|
||||
let bangumi_poster_selector = &Selector::parse(".bangumi-poster").unwrap();
|
||||
|
||||
let bangumi_title = html
|
||||
.select(bangumi_title_selector)
|
||||
.next()
|
||||
.map(extract_inner_text_from_element_ref)
|
||||
.ok_or_else(|| ExtractError::from_mikan_meta_missing_field(Cow::Borrowed("bangumi_title")))
|
||||
.inspect_err(|error| tracing::warn!(error = %error))?;
|
||||
|
||||
let mikan_bangumi_id = html
|
||||
.select(mikan_bangumi_id_selector)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("href"))
|
||||
.and_then(|s| mikan_bangumi_homepage_url.join(s).ok())
|
||||
.and_then(|rss_link_url| extract_mikan_bangumi_id_from_rss_link(&rss_link_url))
|
||||
.map(
|
||||
|MikanBangumiRssLink {
|
||||
mikan_bangumi_id, ..
|
||||
}| mikan_bangumi_id,
|
||||
)
|
||||
.ok_or_else(|| {
|
||||
ExtractError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_bangumi_id"))
|
||||
})
|
||||
.inspect_err(|error| tracing::error!(error = %error))?;
|
||||
|
||||
let origin_poster_src = html.select(bangumi_poster_selector).next().and_then(|el| {
|
||||
el.value()
|
||||
.attr("data-src")
|
||||
.and_then(|data_src| extract_image_src_from_str(data_src, &mikan_base_url))
|
||||
.or_else(|| {
|
||||
el.value().attr("style").and_then(|style| {
|
||||
extract_background_image_src_from_style_attr(style, &mikan_base_url)
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
let (mikan_fansub_id, fansub_name) = mikan_bangumi_homepage_url
|
||||
.fragment()
|
||||
.and_then(|id| {
|
||||
html.select(
|
||||
&Selector::parse(&format!("a.subgroup-name[data-anchor='#{}']", id)).unwrap(),
|
||||
)
|
||||
.next()
|
||||
.map(extract_inner_text_from_element_ref)
|
||||
.map(|fansub_name| (id.to_string(), fansub_name))
|
||||
})
|
||||
.unzip();
|
||||
|
||||
tracing::trace!(
|
||||
bangumi_title,
|
||||
mikan_bangumi_id,
|
||||
origin_poster_src = origin_poster_src.as_ref().map(|url| url.as_str()),
|
||||
fansub_name,
|
||||
mikan_fansub_id,
|
||||
"mikan bangumi meta extracted"
|
||||
);
|
||||
|
||||
Ok(MikanBangumiMeta {
|
||||
homepage: mikan_bangumi_homepage_url,
|
||||
bangumi_title,
|
||||
origin_poster_src,
|
||||
mikan_bangumi_id,
|
||||
fansub: fansub_name,
|
||||
mikan_fansub_id,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* @logined-required
|
||||
*/
|
||||
#[instrument(skip_all, fields(my_bangumi_page_url = my_bangumi_page_url.as_str()))]
|
||||
pub async fn extract_mikan_bangumis_meta_from_my_bangumi_page(
|
||||
client: Option<&AppMikanClient>,
|
||||
my_bangumi_page_url: Url,
|
||||
) -> eyre::Result<Vec<MikanBangumiMeta>> {
|
||||
let http_client = client.map(|c| c.deref());
|
||||
let mikan_base_url = Url::parse(&my_bangumi_page_url.origin().unicode_serialization())?;
|
||||
|
||||
let content = fetch_html(http_client, my_bangumi_page_url.clone()).await?;
|
||||
|
||||
let bangumi_container_selector = &Selector::parse(".sk-bangumi .an-ul>li").unwrap();
|
||||
let bangumi_info_selector = &Selector::parse(".an-info a.an-text").unwrap();
|
||||
let bangumi_poster_selector =
|
||||
&Selector::parse("span[data-src][data-bangumiid], span[data-bangumiid][style]").unwrap();
|
||||
let fansub_container_selector =
|
||||
&Selector::parse(".js-expand_bangumi-subgroup.js-subscribed").unwrap();
|
||||
let fansub_title_selector = &Selector::parse(".tag-res-name[title]").unwrap();
|
||||
let fansub_id_selector =
|
||||
&Selector::parse(".active[data-subtitlegroupid][data-bangumiid]").unwrap();
|
||||
|
||||
let html = Html::parse_document(&content);
|
||||
|
||||
let mut bangumi_list = vec![];
|
||||
|
||||
for bangumi_elem in html.select(bangumi_container_selector) {
|
||||
let title_and_href_elem = bangumi_elem.select(bangumi_info_selector).next();
|
||||
let poster_elem = bangumi_elem.select(bangumi_poster_selector).next();
|
||||
if let (Some(bangumi_home_page_url), Some(bangumi_title)) = (
|
||||
title_and_href_elem.and_then(|elem| elem.attr("href")),
|
||||
title_and_href_elem.and_then(|elem| elem.attr("title")),
|
||||
) {
|
||||
let origin_poster_src = poster_elem.and_then(|ele| {
|
||||
ele.attr("data-src")
|
||||
.and_then(|data_src| extract_image_src_from_str(data_src, &mikan_base_url))
|
||||
.or_else(|| {
|
||||
ele.attr("style").and_then(|style| {
|
||||
extract_background_image_src_from_style_attr(style, &mikan_base_url)
|
||||
})
|
||||
})
|
||||
});
|
||||
let bangumi_home_page_url = my_bangumi_page_url.join(bangumi_home_page_url)?;
|
||||
if let Some(MikanBangumiHomepage {
|
||||
ref mikan_bangumi_id,
|
||||
..
|
||||
}) = parse_mikan_bangumi_id_from_homepage(&bangumi_home_page_url)
|
||||
{
|
||||
if let Some(origin_poster_src) = origin_poster_src.as_ref() {
|
||||
tracing::trace!(
|
||||
origin_poster_src = origin_poster_src.as_str(),
|
||||
bangumi_title,
|
||||
mikan_bangumi_id,
|
||||
"bangumi info extracted"
|
||||
);
|
||||
} else {
|
||||
tracing::warn!(
|
||||
bangumi_title,
|
||||
mikan_bangumi_id,
|
||||
"bangumi info extracted, but failed to extract poster_src"
|
||||
);
|
||||
}
|
||||
let bangumi_expand_info_url =
|
||||
build_mikan_bangumi_expand_info_url(mikan_base_url.clone(), mikan_bangumi_id)?;
|
||||
let bangumi_expand_info_content =
|
||||
fetch_html(http_client, bangumi_expand_info_url).await?;
|
||||
let bangumi_expand_info_fragment =
|
||||
Html::parse_fragment(&bangumi_expand_info_content);
|
||||
for fansub_info in bangumi_expand_info_fragment.select(fansub_container_selector) {
|
||||
if let (Some(fansub_name), Some(mikan_fansub_id)) = (
|
||||
fansub_info
|
||||
.select(fansub_title_selector)
|
||||
.next()
|
||||
.and_then(|ele| ele.attr("title")),
|
||||
fansub_info
|
||||
.select(fansub_id_selector)
|
||||
.next()
|
||||
.and_then(|ele| ele.attr("data-subtitlegroupid")),
|
||||
) {
|
||||
tracing::trace!(
|
||||
fansub_name = &fansub_name,
|
||||
mikan_fansub_id,
|
||||
"subscribed fansub extracted"
|
||||
);
|
||||
bangumi_list.push(MikanBangumiMeta {
|
||||
homepage: build_mikan_bangumi_homepage(
|
||||
mikan_base_url.clone(),
|
||||
mikan_bangumi_id.as_str(),
|
||||
Some(mikan_fansub_id),
|
||||
)?,
|
||||
bangumi_title: bangumi_title.to_string(),
|
||||
mikan_bangumi_id: mikan_bangumi_id.to_string(),
|
||||
mikan_fansub_id: Some(mikan_fansub_id.to_string()),
|
||||
fansub: Some(fansub_name.to_string()),
|
||||
origin_poster_src: origin_poster_src.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(bangumi_list)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
#![allow(unused_variables)]
|
||||
use color_eyre::eyre;
|
||||
use rstest::{fixture, rstest};
|
||||
use tracing::Level;
|
||||
use url::Url;
|
||||
use zune_image::{codecs::ImageFormat, image::Image};
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
extract::mikan::web_extract::extract_mikan_bangumis_meta_from_my_bangumi_page,
|
||||
test_utils::{mikan::build_testing_mikan_client, tracing::init_testing_tracing},
|
||||
};
|
||||
|
||||
#[fixture]
|
||||
fn before_each() {
|
||||
init_testing_tracing(Level::INFO);
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[tokio::test]
|
||||
async fn test_extract_mikan_poster_from_src(before_each: ()) -> eyre::Result<()> {
|
||||
let mut mikan_server = mockito::Server::new_async().await;
|
||||
let mikan_base_url = Url::parse(&mikan_server.url())?;
|
||||
let mikan_client = build_testing_mikan_client(mikan_base_url.clone())?;
|
||||
|
||||
let bangumi_poster_url = mikan_base_url.join("/images/Bangumi/202309/5ce9fed1.jpg")?;
|
||||
|
||||
let bangumi_poster_mock = mikan_server
|
||||
.mock("GET", bangumi_poster_url.path())
|
||||
.with_body_from_file("tests/resources/mikan/Bangumi-202309-5ce9fed1.jpg")
|
||||
.create_async()
|
||||
.await;
|
||||
|
||||
let bgm_poster =
|
||||
extract_mikan_poster_meta_from_src(Some(&mikan_client), bangumi_poster_url).await?;
|
||||
bangumi_poster_mock.expect(1);
|
||||
let u8_data = bgm_poster.poster_data.expect("should have poster data");
|
||||
let image = Image::read(u8_data.to_vec(), Default::default());
|
||||
assert!(
|
||||
image.is_ok_and(|img| img
|
||||
.metadata()
|
||||
.get_image_format()
|
||||
.is_some_and(|fmt| matches!(fmt, ImageFormat::JPEG))),
|
||||
"should start with valid jpeg data magic number"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[tokio::test]
|
||||
async fn test_extract_mikan_episode(before_each: ()) -> eyre::Result<()> {
|
||||
let mut mikan_server = mockito::Server::new_async().await;
|
||||
let mikan_base_url = Url::parse(&mikan_server.url())?;
|
||||
let mikan_client = build_testing_mikan_client(mikan_base_url.clone())?;
|
||||
|
||||
let episode_homepage_url =
|
||||
mikan_base_url.join("/Home/Episode/475184dce83ea2b82902592a5ac3343f6d54b36a")?;
|
||||
|
||||
let episode_homepage_mock = mikan_server
|
||||
.mock("GET", episode_homepage_url.path())
|
||||
.with_body_from_file(
|
||||
"tests/resources/mikan/Episode-475184dce83ea2b82902592a5ac3343f6d54b36a.htm",
|
||||
)
|
||||
.create_async()
|
||||
.await;
|
||||
|
||||
let ep_meta = extract_mikan_episode_meta_from_episode_homepage(
|
||||
Some(&mikan_client),
|
||||
episode_homepage_url.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(ep_meta.homepage, episode_homepage_url);
|
||||
assert_eq!(ep_meta.bangumi_title, "葬送的芙莉莲");
|
||||
assert_eq!(
|
||||
ep_meta
|
||||
.origin_poster_src
|
||||
.as_ref()
|
||||
.map(|s| s.path().to_string()),
|
||||
Some(String::from("/images/Bangumi/202309/5ce9fed1.jpg"))
|
||||
);
|
||||
assert_eq!(ep_meta.fansub, "LoliHouse");
|
||||
assert_eq!(ep_meta.mikan_fansub_id, "370");
|
||||
assert_eq!(ep_meta.mikan_bangumi_id, "3141");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[tokio::test]
|
||||
async fn test_extract_mikan_bangumi_meta_from_bangumi_homepage(
|
||||
before_each: (),
|
||||
) -> eyre::Result<()> {
|
||||
let mut mikan_server = mockito::Server::new_async().await;
|
||||
let mikan_base_url = Url::parse(&mikan_server.url())?;
|
||||
let mikan_client = build_testing_mikan_client(mikan_base_url.clone())?;
|
||||
|
||||
let bangumi_homepage_url = mikan_base_url.join("/Home/Bangumi/3416#370")?;
|
||||
|
||||
let bangumi_homepage_mock = mikan_server
|
||||
.mock("GET", bangumi_homepage_url.path())
|
||||
.with_body_from_file("tests/resources/mikan/Bangumi-3416-370.htm")
|
||||
.create_async()
|
||||
.await;
|
||||
|
||||
let bgm_meta = extract_mikan_bangumi_meta_from_bangumi_homepage(
|
||||
Some(&mikan_client),
|
||||
bangumi_homepage_url.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(bgm_meta.homepage, bangumi_homepage_url);
|
||||
assert_eq!(bgm_meta.bangumi_title, "叹气的亡灵想隐退");
|
||||
assert_eq!(
|
||||
bgm_meta
|
||||
.origin_poster_src
|
||||
.as_ref()
|
||||
.map(|s| s.path().to_string()),
|
||||
Some(String::from("/images/Bangumi/202410/480ef127.jpg"))
|
||||
);
|
||||
assert_eq!(bgm_meta.fansub, Some(String::from("LoliHouse")));
|
||||
assert_eq!(bgm_meta.mikan_fansub_id, Some(String::from("370")));
|
||||
assert_eq!(bgm_meta.mikan_bangumi_id, "3416");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[tokio::test]
|
||||
async fn test_extract_mikan_bangumis_meta_from_my_bangumi_page(
|
||||
before_each: (),
|
||||
) -> eyre::Result<()> {
|
||||
let mut mikan_server = mockito::Server::new_async().await;
|
||||
|
||||
let mikan_base_url = Url::parse(&mikan_server.url())?;
|
||||
|
||||
let mikan_client = build_testing_mikan_client(mikan_base_url.clone())?;
|
||||
|
||||
let my_bangumi_page_url = mikan_base_url.join("/Home/MyBangumi")?;
|
||||
|
||||
let mock_my_bangumi = mikan_server
|
||||
.mock("GET", my_bangumi_page_url.path())
|
||||
.with_body_from_file("tests/resources/mikan/MyBangumi.htm")
|
||||
.create_async()
|
||||
.await;
|
||||
|
||||
let mock_expand_bangumi = mikan_server
|
||||
.mock("GET", "/ExpandBangumi")
|
||||
.match_query(mockito::Matcher::Any)
|
||||
.with_body_from_file("tests/resources/mikan/ExpandBangumi.htm")
|
||||
.create_async()
|
||||
.await;
|
||||
|
||||
let bangumi_metas = extract_mikan_bangumis_meta_from_my_bangumi_page(
|
||||
Some(&mikan_client),
|
||||
my_bangumi_page_url,
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert!(!bangumi_metas.is_empty());
|
||||
|
||||
assert!(bangumi_metas[0].origin_poster_src.is_some());
|
||||
|
||||
mock_my_bangumi.expect(1);
|
||||
mock_expand_bangumi.expect(bangumi_metas.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,595 +0,0 @@
|
||||
use std::ops::Deref;
|
||||
|
||||
use bytes::Bytes;
|
||||
use color_eyre::eyre::{self, ContextCompat};
|
||||
use html_escape::decode_html_entities;
|
||||
use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use lightningcss::{properties::Property, values::image::Image as CSSImage};
|
||||
use loco_rs::app::AppContext;
|
||||
use regex::Regex;
|
||||
use reqwest::IntoUrl;
|
||||
use scraper::Html;
|
||||
use url::Url;
|
||||
|
||||
use super::{
|
||||
AppMikanClient, MIKAN_BUCKET_KEY, MikanBangumiRssLink, parse_mikan_bangumi_id_from_rss_link,
|
||||
};
|
||||
use crate::{
|
||||
app::AppContextExt,
|
||||
dal::DalContentCategory,
|
||||
extract::html::parse_style_attr,
|
||||
fetch::{html::fetch_html, image::fetch_image},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanEpisodeMeta {
|
||||
pub homepage: Url,
|
||||
pub origin_poster_src: Option<Url>,
|
||||
pub bangumi_title: String,
|
||||
pub episode_title: String,
|
||||
pub fansub: String,
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: String,
|
||||
pub mikan_episode_id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanBangumiMeta {
|
||||
pub homepage: Url,
|
||||
pub origin_poster_src: Option<Url>,
|
||||
pub bangumi_title: String,
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: Option<String>,
|
||||
pub fansub: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanBangumiPosterMeta {
|
||||
pub origin_poster_src: Url,
|
||||
pub poster_data: Option<Bytes>,
|
||||
pub poster_src: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanEpisodeHomepage {
|
||||
pub mikan_episode_id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanBangumiHomepage {
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: Option<String>,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref MIKAN_TITLE_SEASON: Regex = Regex::new("第.*季").unwrap();
|
||||
}
|
||||
|
||||
pub fn build_mikan_bangumi_homepage(
|
||||
mikan_base_url: impl IntoUrl,
|
||||
mikan_bangumi_id: &str,
|
||||
mikan_fansub_id: Option<&str>,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = mikan_base_url.into_url()?;
|
||||
url.set_path(&format!("/Home/Bangumi/{mikan_bangumi_id}"));
|
||||
url.set_fragment(mikan_fansub_id);
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn build_mikan_episode_homepage(
|
||||
mikan_base_url: impl IntoUrl,
|
||||
mikan_episode_id: &str,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = mikan_base_url.into_url()?;
|
||||
url.set_path(&format!("/Home/Episode/{mikan_episode_id}"));
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn build_mikan_bangumi_expand_info_url(
|
||||
mikan_base_url: impl IntoUrl,
|
||||
mikan_bangumi_id: &str,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = mikan_base_url.into_url()?;
|
||||
url.set_path("/ExpandBangumi");
|
||||
url.query_pairs_mut()
|
||||
.append_pair("bangumiId", mikan_bangumi_id)
|
||||
.append_pair("showSubscribed", "true");
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn parse_mikan_bangumi_id_from_homepage(url: &Url) -> Option<MikanBangumiHomepage> {
|
||||
if url.path().starts_with("/Home/Bangumi/") {
|
||||
let mikan_bangumi_id = url.path().replace("/Home/Bangumi/", "");
|
||||
|
||||
Some(MikanBangumiHomepage {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id: url.fragment().map(String::from),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_mikan_episode_id_from_homepage(url: &Url) -> Option<MikanEpisodeHomepage> {
|
||||
if url.path().starts_with("/Home/Episode/") {
|
||||
let mikan_episode_id = url.path().replace("/Home/Episode/", "");
|
||||
Some(MikanEpisodeHomepage { mikan_episode_id })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_bangumi_poster_from_origin_poster_src(
|
||||
client: Option<&AppMikanClient>,
|
||||
origin_poster_src_url: Url,
|
||||
) -> eyre::Result<MikanBangumiPosterMeta> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let poster_data = fetch_image(http_client, origin_poster_src_url.clone()).await?;
|
||||
Ok(MikanBangumiPosterMeta {
|
||||
origin_poster_src: origin_poster_src_url,
|
||||
poster_data: Some(poster_data),
|
||||
poster_src: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_bangumi_poster_from_origin_poster_src_with_cache(
|
||||
ctx: &AppContext,
|
||||
origin_poster_src_url: Url,
|
||||
subscriber_id: i32,
|
||||
) -> eyre::Result<MikanBangumiPosterMeta> {
|
||||
let dal_client = ctx.get_dal_client();
|
||||
let mikan_client = ctx.get_mikan_client();
|
||||
if let Some(poster_src) = dal_client
|
||||
.exists_object(
|
||||
DalContentCategory::Image,
|
||||
subscriber_id,
|
||||
Some(MIKAN_BUCKET_KEY),
|
||||
&origin_poster_src_url.path().replace("/images/Bangumi/", ""),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
return Ok(MikanBangumiPosterMeta {
|
||||
origin_poster_src: origin_poster_src_url,
|
||||
poster_data: None,
|
||||
poster_src: Some(poster_src.to_string()),
|
||||
});
|
||||
}
|
||||
|
||||
let poster_data =
|
||||
fetch_image(Some(mikan_client.deref()), origin_poster_src_url.clone()).await?;
|
||||
|
||||
let poster_str = dal_client
|
||||
.store_object(
|
||||
DalContentCategory::Image,
|
||||
subscriber_id,
|
||||
Some(MIKAN_BUCKET_KEY),
|
||||
&origin_poster_src_url.path().replace("/images/Bangumi/", ""),
|
||||
poster_data.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(MikanBangumiPosterMeta {
|
||||
origin_poster_src: origin_poster_src_url,
|
||||
poster_data: Some(poster_data),
|
||||
poster_src: Some(poster_str.to_string()),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse_mikan_origin_poster_src_from_style_attr(
|
||||
mikan_base_url: impl IntoUrl,
|
||||
style_attr: &str,
|
||||
) -> Option<Url> {
|
||||
let base_url = mikan_base_url.into_url().ok()?;
|
||||
parse_style_attr(style_attr)
|
||||
.and_then(|style| {
|
||||
style.iter().find_map(|(prop, _)| {
|
||||
match prop {
|
||||
Property::BackgroundImage(images) => {
|
||||
for img in images {
|
||||
if let CSSImage::Url(path) = img {
|
||||
if let Ok(url) = base_url.join(path.url.trim()) {
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Property::Background(backgrounds) => {
|
||||
for bg in backgrounds {
|
||||
if let CSSImage::Url(path) = &bg.image {
|
||||
if let Ok(url) = base_url.join(path.url.trim()) {
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
None
|
||||
})
|
||||
})
|
||||
.map(|mut poster_str| {
|
||||
poster_str.set_query(None);
|
||||
poster_str.set_fragment(None);
|
||||
poster_str
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_bangumi_meta_from_mikan_homepage(
|
||||
client: Option<&AppMikanClient>,
|
||||
mikan_bangumi_homepage_url: Url,
|
||||
) -> eyre::Result<MikanBangumiMeta> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let mikan_base_url = mikan_bangumi_homepage_url.origin().unicode_serialization();
|
||||
let content = fetch_html(http_client, mikan_bangumi_homepage_url.as_str()).await?;
|
||||
let html = Html::parse_document(&content);
|
||||
|
||||
let bangumi_fansubs = html
|
||||
.select(&scraper::Selector::parse(".subgroup-text").unwrap())
|
||||
.filter_map(|el| {
|
||||
if let (Some(fansub_id), Some(fansub_name)) = (
|
||||
el.value()
|
||||
.attr("id")
|
||||
.map(|s| decode_html_entities(s).trim().to_string()),
|
||||
el.select(&scraper::Selector::parse("a:nth-child(1)").unwrap())
|
||||
.next()
|
||||
.map(|child| {
|
||||
let mut s = String::from(
|
||||
child
|
||||
.prev_sibling()
|
||||
.and_then(|t| t.value().as_text())
|
||||
.map(|s| s.trim())
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
s.extend(child.text());
|
||||
decode_html_entities(&s).trim().to_string()
|
||||
}),
|
||||
) {
|
||||
Some((fansub_id, fansub_name))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
let fansub_info = mikan_bangumi_homepage_url.fragment().and_then(|b| {
|
||||
bangumi_fansubs
|
||||
.iter()
|
||||
.find_map(|(id, name)| if id == b { Some((id, name)) } else { None })
|
||||
});
|
||||
|
||||
let bangumi_title = html
|
||||
.select(&scraper::Selector::parse(".bangumi-title").unwrap())
|
||||
.next()
|
||||
.map(|el| {
|
||||
decode_html_entities(&el.text().collect::<String>())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
.and_then(|title| if title.is_empty() { None } else { Some(title) })
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!(
|
||||
"Missing mikan bangumi official title for {}",
|
||||
mikan_bangumi_homepage_url
|
||||
)
|
||||
})?;
|
||||
|
||||
let MikanBangumiRssLink {
|
||||
mikan_bangumi_id, ..
|
||||
} = html
|
||||
.select(&scraper::Selector::parse(".bangumi-title > .mikan-rss").unwrap())
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("href"))
|
||||
.as_ref()
|
||||
.and_then(|s| mikan_bangumi_homepage_url.join(s).ok())
|
||||
.and_then(|rss_link_url| parse_mikan_bangumi_id_from_rss_link(&rss_link_url))
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!(
|
||||
"Missing mikan bangumi rss link or error format for {}",
|
||||
mikan_bangumi_homepage_url
|
||||
)
|
||||
})?;
|
||||
|
||||
let origin_poster_src = html
|
||||
.select(&scraper::Selector::parse(".bangumi-poster").unwrap())
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("style"))
|
||||
.and_then(|style_attr| {
|
||||
parse_mikan_origin_poster_src_from_style_attr(&mikan_base_url, style_attr)
|
||||
});
|
||||
|
||||
Ok(MikanBangumiMeta {
|
||||
homepage: mikan_bangumi_homepage_url,
|
||||
bangumi_title,
|
||||
origin_poster_src,
|
||||
mikan_bangumi_id,
|
||||
fansub: fansub_info.map(|s| s.1.to_string()),
|
||||
mikan_fansub_id: fansub_info.map(|s| s.0.to_string()),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_episode_meta_from_mikan_homepage(
|
||||
client: Option<&AppMikanClient>,
|
||||
mikan_episode_homepage_url: Url,
|
||||
) -> eyre::Result<MikanEpisodeMeta> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let mikan_base_url = mikan_episode_homepage_url.origin().unicode_serialization();
|
||||
let content = fetch_html(http_client, mikan_episode_homepage_url.as_str()).await?;
|
||||
|
||||
let html = Html::parse_document(&content);
|
||||
|
||||
let bangumi_title = html
|
||||
.select(&scraper::Selector::parse(".bangumi-title").unwrap())
|
||||
.next()
|
||||
.map(|el| {
|
||||
decode_html_entities(&el.text().collect::<String>())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
.and_then(|title| if title.is_empty() { None } else { Some(title) })
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!(
|
||||
"Missing mikan bangumi official title for {}",
|
||||
mikan_episode_homepage_url
|
||||
)
|
||||
})?;
|
||||
|
||||
let episode_title = html
|
||||
.select(&scraper::Selector::parse("title").unwrap())
|
||||
.next()
|
||||
.map(|el| {
|
||||
decode_html_entities(&el.text().collect::<String>())
|
||||
.replace(" - Mikan Project", "")
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
.and_then(|title| if title.is_empty() { None } else { Some(title) })
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!(
|
||||
"Missing mikan episode official title for {}",
|
||||
mikan_episode_homepage_url
|
||||
)
|
||||
})?;
|
||||
|
||||
let (mikan_bangumi_id, mikan_fansub_id) = html
|
||||
.select(&scraper::Selector::parse(".bangumi-title > .mikan-rss").unwrap())
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("href"))
|
||||
.as_ref()
|
||||
.and_then(|s| mikan_episode_homepage_url.join(s).ok())
|
||||
.and_then(|rss_link_url| parse_mikan_bangumi_id_from_rss_link(&rss_link_url))
|
||||
.and_then(
|
||||
|MikanBangumiRssLink {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id,
|
||||
..
|
||||
}| {
|
||||
mikan_fansub_id.map(|mikan_fansub_id| (mikan_bangumi_id, mikan_fansub_id))
|
||||
},
|
||||
)
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!(
|
||||
"Missing mikan bangumi rss link or error format for {}",
|
||||
mikan_episode_homepage_url
|
||||
)
|
||||
})?;
|
||||
|
||||
let fansub = html
|
||||
.select(&scraper::Selector::parse(".bangumi-info>.magnet-link-wrap").unwrap())
|
||||
.next()
|
||||
.map(|el| {
|
||||
decode_html_entities(&el.text().collect::<String>())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!(
|
||||
"Missing mikan bangumi fansub name for {}",
|
||||
mikan_episode_homepage_url
|
||||
)
|
||||
})?;
|
||||
|
||||
let origin_poster_src = html
|
||||
.select(&scraper::Selector::parse(".bangumi-poster").unwrap())
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("style"))
|
||||
.and_then(|s| parse_mikan_origin_poster_src_from_style_attr(mikan_base_url, s));
|
||||
|
||||
let MikanEpisodeHomepage {
|
||||
mikan_episode_id, ..
|
||||
} = parse_mikan_episode_id_from_homepage(&mikan_episode_homepage_url).wrap_err_with(|| {
|
||||
format!(
|
||||
"Failed to extract mikan_episode_id from {}",
|
||||
&mikan_episode_homepage_url
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(MikanEpisodeMeta {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id,
|
||||
bangumi_title,
|
||||
episode_title,
|
||||
homepage: mikan_episode_homepage_url,
|
||||
origin_poster_src,
|
||||
fansub,
|
||||
mikan_episode_id,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* @logined-required
|
||||
*/
|
||||
pub async fn parse_mikan_bangumis_meta_from_my_bangumi_page(
|
||||
client: Option<&AppMikanClient>,
|
||||
my_bangumi_page_url: Url,
|
||||
) -> eyre::Result<Vec<MikanBangumiMeta>> {
|
||||
let http_client = client.map(|c| c.deref());
|
||||
let mikan_base_url = my_bangumi_page_url.origin().unicode_serialization();
|
||||
|
||||
let content = fetch_html(http_client, my_bangumi_page_url.clone()).await?;
|
||||
|
||||
let html = Html::parse_document(&content);
|
||||
|
||||
let mut bangumi_list = vec![];
|
||||
for bangumi_elem in
|
||||
html.select(&scraper::Selector::parse(".sk-bangumi .an-info a.an-text").unwrap())
|
||||
{
|
||||
if let (Some(bangumi_home_page_url), Some(bangumi_title)) =
|
||||
(bangumi_elem.attr("href"), bangumi_elem.attr("title"))
|
||||
{
|
||||
let origin_poster_src = bangumi_elem
|
||||
.prev_sibling()
|
||||
.and_then(|ele| ele.value().as_element())
|
||||
.and_then(|ele| ele.attr("style"))
|
||||
.and_then(|style_attr| {
|
||||
parse_mikan_origin_poster_src_from_style_attr(
|
||||
mikan_base_url.clone(),
|
||||
style_attr,
|
||||
)
|
||||
});
|
||||
let bangumi_home_page_url = my_bangumi_page_url.join(bangumi_home_page_url)?;
|
||||
if let Some(MikanBangumiHomepage {
|
||||
ref mikan_bangumi_id,
|
||||
..
|
||||
}) = parse_mikan_bangumi_id_from_homepage(&bangumi_home_page_url)
|
||||
{
|
||||
let bangumi_expand_info_url =
|
||||
build_mikan_bangumi_expand_info_url(mikan_base_url.clone(), mikan_bangumi_id)?;
|
||||
let bangumi_expand_info_content =
|
||||
fetch_html(http_client, bangumi_expand_info_url).await?;
|
||||
let bangumi_expand_info_fragment =
|
||||
Html::parse_fragment(&bangumi_expand_info_content);
|
||||
for fansub_info in bangumi_expand_info_fragment.select(
|
||||
&scraper::Selector::parse("js-expand_bangumi-subgroup.js-subscribed").unwrap(),
|
||||
) {
|
||||
if let (Some(fansub_name), Some(mikan_fansub_id)) = (
|
||||
fansub_info
|
||||
.select(&scraper::Selector::parse(".tag-res-name[title]").unwrap())
|
||||
.next()
|
||||
.and_then(|ele| ele.attr("title")),
|
||||
fansub_info
|
||||
.select(
|
||||
&scraper::Selector::parse(
|
||||
".active[data-subtitlegroupid][data-bangumiid]",
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.next()
|
||||
.and_then(|ele| ele.attr("data-subtitlegroupid")),
|
||||
) {
|
||||
bangumi_list.push(MikanBangumiMeta {
|
||||
homepage: build_mikan_bangumi_homepage(
|
||||
mikan_base_url.clone(),
|
||||
mikan_bangumi_id.as_str(),
|
||||
Some(mikan_fansub_id),
|
||||
)?,
|
||||
bangumi_title: bangumi_title.to_string(),
|
||||
mikan_bangumi_id: mikan_bangumi_id.to_string(),
|
||||
mikan_fansub_id: Some(mikan_fansub_id.to_string()),
|
||||
fansub: Some(fansub_name.to_string()),
|
||||
origin_poster_src: origin_poster_src.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(bangumi_list)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use color_eyre::eyre;
|
||||
use url::Url;
|
||||
use zune_image::{codecs::ImageFormat, image::Image};
|
||||
|
||||
use super::{
|
||||
parse_mikan_bangumi_meta_from_mikan_homepage,
|
||||
parse_mikan_bangumi_poster_from_origin_poster_src,
|
||||
parse_mikan_episode_meta_from_mikan_homepage,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_mikan_episode() {
|
||||
let test_fn = async || -> eyre::Result<()> {
|
||||
let url_str =
|
||||
"https://mikanani.me/Home/Episode/475184dce83ea2b82902592a5ac3343f6d54b36a";
|
||||
let url = Url::parse(url_str)?;
|
||||
|
||||
let ep_meta = parse_mikan_episode_meta_from_mikan_homepage(None, url.clone()).await?;
|
||||
|
||||
assert_eq!(ep_meta.homepage, url);
|
||||
assert_eq!(ep_meta.bangumi_title, "葬送的芙莉莲");
|
||||
assert_eq!(
|
||||
ep_meta.origin_poster_src,
|
||||
Some(Url::parse(
|
||||
"https://mikanani.me/images/Bangumi/202309/5ce9fed1.jpg"
|
||||
)?)
|
||||
);
|
||||
assert_eq!(ep_meta.fansub, "LoliHouse");
|
||||
assert_eq!(ep_meta.mikan_fansub_id, "370");
|
||||
assert_eq!(ep_meta.mikan_bangumi_id, "3141");
|
||||
|
||||
assert_matches!(ep_meta.origin_poster_src, Some(..));
|
||||
|
||||
let bgm_poster = parse_mikan_bangumi_poster_from_origin_poster_src(
|
||||
None,
|
||||
ep_meta.origin_poster_src.unwrap(),
|
||||
)
|
||||
.await?;
|
||||
let u8_data = bgm_poster.poster_data.expect("should have poster data");
|
||||
let image = Image::read(u8_data.to_vec(), Default::default());
|
||||
assert!(
|
||||
image.is_ok_and(|img| img
|
||||
.metadata()
|
||||
.get_image_format()
|
||||
.is_some_and(|fmt| matches!(fmt, ImageFormat::JPEG))),
|
||||
"should start with valid jpeg data magic number"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
test_fn().await.expect("test parse mikan failed");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_mikan_bangumi() {
|
||||
let test_fn = async || -> eyre::Result<()> {
|
||||
let url_str = "https://mikanani.me/Home/Bangumi/3416#370";
|
||||
let url = Url::parse(url_str)?;
|
||||
|
||||
let bgm_meta = parse_mikan_bangumi_meta_from_mikan_homepage(None, url.clone()).await?;
|
||||
|
||||
assert_eq!(bgm_meta.homepage, url);
|
||||
assert_eq!(bgm_meta.bangumi_title, "叹气的亡灵想隐退");
|
||||
assert_eq!(
|
||||
bgm_meta.origin_poster_src,
|
||||
Some(Url::parse(
|
||||
"https://mikanani.me/images/Bangumi/202410/480ef127.jpg"
|
||||
)?)
|
||||
);
|
||||
assert_eq!(bgm_meta.fansub, Some(String::from("LoliHouse")));
|
||||
assert_eq!(bgm_meta.mikan_fansub_id, Some(String::from("370")));
|
||||
assert_eq!(bgm_meta.mikan_bangumi_id, "3416");
|
||||
|
||||
assert_eq!(
|
||||
bgm_meta.homepage.as_str(),
|
||||
"https://mikanani.me/Home/Bangumi/3416#370"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
test_fn().await.expect("test parse mikan failed");
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ pub mod defs;
|
||||
pub mod errors;
|
||||
pub mod html;
|
||||
pub mod http;
|
||||
pub mod media;
|
||||
pub mod mikan;
|
||||
pub mod rawname;
|
||||
pub mod torrent;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::{ops::Deref, sync::Arc, time::Duration};
|
||||
use std::{fmt::Debug, ops::Deref, sync::Arc, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use axum::http::{self, Extensions};
|
||||
@@ -11,7 +11,7 @@ use reqwest::{ClientBuilder, Request, Response};
|
||||
use reqwest_middleware::{
|
||||
ClientBuilder as ClientWithMiddlewareBuilder, ClientWithMiddleware, Next,
|
||||
};
|
||||
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
|
||||
use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff};
|
||||
use reqwest_tracing::TracingMiddleware;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::serde_as;
|
||||
@@ -101,6 +101,14 @@ pub struct HttpClient {
|
||||
pub config: HttpClientConfig,
|
||||
}
|
||||
|
||||
impl Debug for HttpClient {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("HttpClient")
|
||||
.field("config", &self.config)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<HttpClient> for ClientWithMiddleware {
|
||||
fn from(val: HttpClient) -> Self {
|
||||
val.client
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
#![feature(duration_constructors, assert_matches, unboxed_closures)]
|
||||
#![feature(
|
||||
duration_constructors,
|
||||
assert_matches,
|
||||
unboxed_closures,
|
||||
impl_trait_in_bindings
|
||||
)]
|
||||
|
||||
pub mod app;
|
||||
pub mod auth;
|
||||
|
||||
@@ -201,7 +201,7 @@ impl ActiveModel {
|
||||
.ok()
|
||||
.unwrap_or_default();
|
||||
let homepage = build_mikan_episode_homepage(
|
||||
ctx.get_mikan_client().base_url(),
|
||||
ctx.get_mikan_client().base_url().clone(),
|
||||
&item.mikan_episode_id,
|
||||
)?;
|
||||
|
||||
|
||||
@@ -12,11 +12,11 @@ use crate::{
|
||||
extract::{
|
||||
mikan::{
|
||||
build_mikan_bangumi_homepage, build_mikan_bangumi_rss_link,
|
||||
parse_mikan_bangumi_meta_from_mikan_homepage,
|
||||
parse_mikan_episode_meta_from_mikan_homepage, parse_mikan_rss_channel_from_rss_link,
|
||||
web_parser::{
|
||||
MikanBangumiPosterMeta,
|
||||
parse_mikan_bangumi_poster_from_origin_poster_src_with_cache,
|
||||
extract_mikan_bangumi_meta_from_bangumi_homepage,
|
||||
extract_mikan_episode_meta_from_episode_homepage,
|
||||
parse_mikan_rss_channel_from_rss_link,
|
||||
web_extract::{
|
||||
MikanBangumiPosterMeta, extract_mikan_bangumi_poster_meta_from_src_with_cache,
|
||||
},
|
||||
},
|
||||
rawname::extract_season_from_title_body,
|
||||
@@ -256,7 +256,7 @@ impl Model {
|
||||
let mut new_metas = vec![];
|
||||
for new_rss_item in new_rss_items.iter() {
|
||||
new_metas.push(
|
||||
parse_mikan_episode_meta_from_mikan_homepage(
|
||||
extract_mikan_episode_meta_from_episode_homepage(
|
||||
Some(mikan_client),
|
||||
new_rss_item.homepage.clone(),
|
||||
)
|
||||
@@ -272,12 +272,12 @@ impl Model {
|
||||
{
|
||||
let mikan_base_url = ctx.get_mikan_client().base_url();
|
||||
let bgm_homepage = build_mikan_bangumi_homepage(
|
||||
mikan_base_url,
|
||||
mikan_base_url.clone(),
|
||||
&mikan_bangumi_id,
|
||||
Some(&mikan_fansub_id),
|
||||
)?;
|
||||
let bgm_rss_link = build_mikan_bangumi_rss_link(
|
||||
mikan_base_url,
|
||||
mikan_base_url.clone(),
|
||||
&mikan_bangumi_id,
|
||||
Some(&mikan_fansub_id),
|
||||
)?;
|
||||
@@ -289,7 +289,7 @@ impl Model {
|
||||
mikan_bangumi_id.to_string(),
|
||||
mikan_fansub_id.to_string(),
|
||||
async |am| -> color_eyre::eyre::Result<()> {
|
||||
let bgm_meta = parse_mikan_bangumi_meta_from_mikan_homepage(
|
||||
let bgm_meta = extract_mikan_bangumi_meta_from_bangumi_homepage(
|
||||
Some(mikan_client),
|
||||
bgm_homepage.clone(),
|
||||
)
|
||||
@@ -306,9 +306,9 @@ impl Model {
|
||||
am.fansub = ActiveValue::Set(bgm_meta.fansub);
|
||||
if let Some(origin_poster_src) = bgm_meta.origin_poster_src {
|
||||
if let MikanBangumiPosterMeta {
|
||||
poster_src: Some(poster_src),
|
||||
..
|
||||
} = parse_mikan_bangumi_poster_from_origin_poster_src_with_cache(
|
||||
poster_src: Some(poster_src),
|
||||
..
|
||||
} = extract_mikan_bangumi_poster_meta_from_src_with_cache(
|
||||
ctx,
|
||||
origin_poster_src,
|
||||
self.subscriber_id,
|
||||
@@ -318,7 +318,7 @@ impl Model {
|
||||
am.poster_link = ActiveValue::Set(Some(poster_src))
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
.await?,
|
||||
|
||||
17
apps/recorder/src/test_utils/mikan.rs
Normal file
17
apps/recorder/src/test_utils/mikan.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
use color_eyre::eyre;
|
||||
use reqwest::IntoUrl;
|
||||
|
||||
use crate::{
|
||||
extract::mikan::{AppMikanClient, AppMikanConfig},
|
||||
fetch::HttpClientConfig,
|
||||
};
|
||||
|
||||
pub fn build_testing_mikan_client(base_mikan_url: impl IntoUrl) -> eyre::Result<AppMikanClient> {
|
||||
let mikan_client = AppMikanClient::new(AppMikanConfig {
|
||||
http_client: HttpClientConfig {
|
||||
..Default::default()
|
||||
},
|
||||
base_url: base_mikan_url.into_url()?,
|
||||
})?;
|
||||
Ok(mikan_client)
|
||||
}
|
||||
@@ -1,2 +1,4 @@
|
||||
pub mod mikan;
|
||||
#[cfg(feature = "testcontainers")]
|
||||
pub mod testcontainers;
|
||||
pub mod tracing;
|
||||
|
||||
12
apps/recorder/src/test_utils/tracing.rs
Normal file
12
apps/recorder/src/test_utils/tracing.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
use tracing::Level;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
pub fn init_testing_tracing(level: Level) {
|
||||
let crate_name = env!("CARGO_PKG_NAME");
|
||||
let filter = EnvFilter::new(format!(
|
||||
"{}[]={}",
|
||||
crate_name,
|
||||
level.as_str().to_lowercase()
|
||||
));
|
||||
tracing_subscriber::fmt().with_env_filter(filter).init();
|
||||
}
|
||||
Reference in New Issue
Block a user