feat: switch mikan bangumi metas from my bangumi page to stream

This commit is contained in:
2025-02-27 01:03:32 +08:00
parent 6887b2a069
commit c0707d17bb
5 changed files with 292 additions and 290 deletions

View File

@@ -42,7 +42,7 @@ axum = "0.8"
uuid = { version = "1.6.0", features = ["v4"] }
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
sea-orm-migration = { version = "1.1", features = ["runtime-tokio-rustls"] }
reqwest = { version = "0.12", features = [
reqwest = { version = "0.12", default-features = false, features = [
"charset",
"http2",
"json",
@@ -100,7 +100,7 @@ axum-extra = "0.10.0"
tower-http = "0.6.2"
serde_yaml = "0.9.34"
tera = "1.20.0"
openidconnect = "4"
openidconnect = { version = "4", features = ["rustls-tls"] }
http-cache-reqwest = { version = "0.15", features = [
"manager-cacache",
"manager-moka",
@@ -117,6 +117,7 @@ nom = "8.0.0"
secrecy = { version = "0.10.3", features = ["serde"] }
http = "1.2.0"
cookie = "0.18.1"
async-stream = "0.3.6"
[dev-dependencies]
serial_test = "3"

View File

@@ -10,16 +10,16 @@ use async_trait::async_trait;
pub use ext::AppContextExt;
use itertools::Itertools;
use loco_rs::{
Result,
app::{AppContext, Hooks},
boot::{create_app, BootResult, StartMode},
boot::{BootResult, StartMode, create_app},
cache,
config::Config,
controller::{middleware, middleware::MiddlewareLayer, AppRoutes},
controller::{AppRoutes, middleware, middleware::MiddlewareLayer},
db::truncate_table,
environment::Environment,
prelude::*,
task::Tasks,
Result,
};
use once_cell::sync::OnceCell;

View File

@@ -1,6 +1,8 @@
use std::borrow::Cow;
use async_stream::try_stream;
use bytes::Bytes;
use futures::Stream;
use itertools::Itertools;
use loco_rs::app::AppContext;
use scraper::{Html, Selector};
@@ -357,134 +359,140 @@ pub async fn extract_mikan_bangumi_meta_from_bangumi_homepage(
* @logined-required
*/
#[instrument(skip_all, fields(my_bangumi_page_url = my_bangumi_page_url.as_str()))]
pub async fn extract_mikan_bangumis_meta_from_my_bangumi_page(
pub fn extract_mikan_bangumis_meta_from_my_bangumi_page(
http_client: &AppMikanClient,
my_bangumi_page_url: Url,
) -> Result<Vec<MikanBangumiMeta>, RecorderError> {
let mikan_base_url = Url::parse(&my_bangumi_page_url.origin().unicode_serialization())?;
) -> impl Stream<Item = Result<MikanBangumiMeta, RecorderError>> {
try_stream! {
let mikan_base_url = Url::parse(&my_bangumi_page_url.origin().unicode_serialization())?;
let content = fetch_html(http_client, my_bangumi_page_url.clone()).await?;
let content = fetch_html(http_client, my_bangumi_page_url.clone()).await?;
let fansub_container_selector =
&Selector::parse(".js-expand_bangumi-subgroup.js-subscribed").unwrap();
let fansub_title_selector = &Selector::parse(".tag-res-name[title]").unwrap();
let fansub_id_selector =
&Selector::parse(".active[data-subtitlegroupid][data-bangumiid]").unwrap();
let bangumi_iters = {
let bangumi_container_selector = &Selector::parse(".sk-bangumi .an-ul>li").unwrap();
let bangumi_info_selector = &Selector::parse(".an-info a.an-text").unwrap();
let bangumi_poster_selector =
&Selector::parse("span[data-src][data-bangumiid], span[data-bangumiid][style]")
.unwrap();
let html = Html::parse_document(&content);
let fansub_container_selector =
&Selector::parse(".js-expand_bangumi-subgroup.js-subscribed").unwrap();
let fansub_title_selector = &Selector::parse(".tag-res-name[title]").unwrap();
let fansub_id_selector =
&Selector::parse(".active[data-subtitlegroupid][data-bangumiid]").unwrap();
html.select(bangumi_container_selector)
.filter_map(|bangumi_elem| {
let title_and_href_elem = bangumi_elem.select(bangumi_info_selector).next();
let poster_elem = bangumi_elem.select(bangumi_poster_selector).next();
if let (Some(bangumi_home_page_url), Some(bangumi_title)) = (
title_and_href_elem.and_then(|elem| elem.attr("href")),
title_and_href_elem.and_then(|elem| elem.attr("title")),
) {
let origin_poster_src = poster_elem.and_then(|ele| {
ele.attr("data-src")
.and_then(|data_src| {
extract_image_src_from_str(data_src, &mikan_base_url)
})
.or_else(|| {
ele.attr("style").and_then(|style| {
extract_background_image_src_from_style_attr(
style,
&mikan_base_url,
)
let bangumi_iters = {
let html = Html::parse_document(&content);
html.select(bangumi_container_selector)
.filter_map(|bangumi_elem| {
let title_and_href_elem = bangumi_elem.select(bangumi_info_selector).next();
let poster_elem = bangumi_elem.select(bangumi_poster_selector).next();
if let (Some(bangumi_home_page_url), Some(bangumi_title)) = (
title_and_href_elem.and_then(|elem| elem.attr("href")),
title_and_href_elem.and_then(|elem| elem.attr("title")),
) {
let origin_poster_src = poster_elem.and_then(|ele| {
ele.attr("data-src")
.and_then(|data_src| {
extract_image_src_from_str(data_src, &mikan_base_url)
})
})
});
let bangumi_title = bangumi_title.to_string();
let bangumi_home_page_url =
my_bangumi_page_url.join(bangumi_home_page_url).ok()?;
let MikanBangumiHomepage {
mikan_bangumi_id, ..
} = extract_mikan_bangumi_id_from_homepage(&bangumi_home_page_url)?;
if let Some(origin_poster_src) = origin_poster_src.as_ref() {
tracing::trace!(
origin_poster_src = origin_poster_src.as_str(),
.or_else(|| {
ele.attr("style").and_then(|style| {
extract_background_image_src_from_style_attr(
style,
&mikan_base_url,
)
})
})
});
let bangumi_title = bangumi_title.to_string();
let bangumi_home_page_url =
my_bangumi_page_url.join(bangumi_home_page_url).ok()?;
let MikanBangumiHomepage {
mikan_bangumi_id, ..
} = extract_mikan_bangumi_id_from_homepage(&bangumi_home_page_url)?;
if let Some(origin_poster_src) = origin_poster_src.as_ref() {
tracing::trace!(
origin_poster_src = origin_poster_src.as_str(),
bangumi_title,
mikan_bangumi_id,
"bangumi info extracted"
);
} else {
tracing::warn!(
bangumi_title,
mikan_bangumi_id,
"bangumi info extracted, but failed to extract poster_src"
);
}
let bangumi_expand_info_url = build_mikan_bangumi_expand_info_url(
mikan_base_url.clone(),
&mikan_bangumi_id,
);
Some((
bangumi_title,
mikan_bangumi_id,
"bangumi info extracted"
);
bangumi_expand_info_url,
origin_poster_src,
))
} else {
tracing::warn!(
bangumi_title,
mikan_bangumi_id,
"bangumi info extracted, but failed to extract poster_src"
);
None
}
let bangumi_expand_info_url = build_mikan_bangumi_expand_info_url(
mikan_base_url.clone(),
&mikan_bangumi_id,
);
Some((
bangumi_title,
mikan_bangumi_id,
bangumi_expand_info_url,
origin_poster_src,
))
} else {
None
}
})
.collect_vec()
};
})
.collect_vec()
};
let mut bangumi_list = vec![];
for (bangumi_title, mikan_bangumi_id, bangumi_expand_info_url, origin_poster_src) in
for (bangumi_title, mikan_bangumi_id, bangumi_expand_info_url, origin_poster_src) in
bangumi_iters
{
let bangumi_expand_info_content = fetch_html(http_client, bangumi_expand_info_url).await?;
let bangumi_expand_info_fragment = Html::parse_fragment(&bangumi_expand_info_content);
for fansub_info in bangumi_expand_info_fragment.select(fansub_container_selector) {
if let (Some(fansub_name), Some(mikan_fansub_id)) = (
fansub_info
.select(fansub_title_selector)
.next()
.and_then(|ele| ele.attr("title")),
fansub_info
.select(fansub_id_selector)
.next()
.and_then(|ele| ele.attr("data-subtitlegroupid")),
) {
{
if let Some((fansub_name, mikan_fansub_id)) = {
let bangumi_expand_info_content = fetch_html(http_client, bangumi_expand_info_url).await?;
let bangumi_expand_info_fragment = Html::parse_fragment(&bangumi_expand_info_content);
bangumi_expand_info_fragment.select(fansub_container_selector).next().and_then(|fansub_info| {
if let (Some(fansub_name), Some(mikan_fansub_id)) = (
fansub_info
.select(fansub_title_selector)
.next()
.and_then(|ele| ele.attr("title"))
.map(String::from),
fansub_info
.select(fansub_id_selector)
.next()
.and_then(|ele| ele.attr("data-subtitlegroupid"))
.map(String::from)
) {
Some((fansub_name, mikan_fansub_id))
} else {
None
}
})
} {
tracing::trace!(
fansub_name = &fansub_name,
fansub_name,
mikan_fansub_id,
"subscribed fansub extracted"
);
bangumi_list.push(MikanBangumiMeta {
yield MikanBangumiMeta {
homepage: build_mikan_bangumi_homepage(
mikan_base_url.clone(),
mikan_bangumi_id.as_str(),
Some(mikan_fansub_id),
&mikan_bangumi_id,
Some(&mikan_fansub_id),
),
bangumi_title: bangumi_title.to_string(),
mikan_bangumi_id: mikan_bangumi_id.to_string(),
mikan_fansub_id: Some(mikan_fansub_id.to_string()),
fansub: Some(fansub_name.to_string()),
mikan_fansub_id: Some(mikan_fansub_id),
fansub: Some(fansub_name),
origin_poster_src: origin_poster_src.clone(),
})
};
}
}
}
Ok(bangumi_list)
}
#[cfg(test)]
mod test {
#![allow(unused_variables)]
use color_eyre::eyre;
use futures::{TryStreamExt, pin_mut};
use http::header;
use rstest::{fixture, rstest};
use secrecy::SecretString;
@@ -639,8 +647,11 @@ mod test {
let bangumi_metas = extract_mikan_bangumis_meta_from_my_bangumi_page(
&mikan_client,
my_bangumi_page_url.clone(),
)
.await?;
);
pin_mut!(bangumi_metas);
let bangumi_metas = bangumi_metas.try_collect::<Vec<_>>().await?;
assert!(bangumi_metas.is_empty());
@@ -679,11 +690,13 @@ mod test {
Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0",
)),
})?;
let bangumi_metas = extract_mikan_bangumis_meta_from_my_bangumi_page(
&mikan_client_with_cookie,
my_bangumi_page_url,
)
.await?;
);
pin_mut!(bangumi_metas);
let bangumi_metas = bangumi_metas.try_collect::<Vec<_>>().await?;
assert!(!bangumi_metas.is_empty());

View File

@@ -1,9 +1,9 @@
use futures::{TryStreamExt, pin_mut};
use loco_rs::prelude::*;
use crate::{
app::AppContextExt,
extract::mikan::{
MikanAuthSecrecy, web_extract::extract_mikan_bangumis_meta_from_my_bangumi_page,
MikanAuthSecrecy
},
};
@@ -31,15 +31,21 @@ impl Task for CreateMikanRSSFromMyBangumiTask {
.get_mikan_client()
.fork_with_auth(self.auth_secrecy.clone())?;
// TODO
let _bangumi_metas = extract_mikan_bangumis_meta_from_my_bangumi_page(
&mikan_client,
mikan_client
.base_url()
.join("/Home/MyBangumi")
.map_err(loco_rs::Error::wrap)?,
)
.await?;
{
let bangumi_metas = extract_mikan_bangumis_meta_from_my_bangumi_page(
&mikan_client,
mikan_client
.base_url()
.join("/Home/MyBangumi")
.map_err(loco_rs::Error::wrap)?,
);
pin_mut!(bangumi_metas);
let bangumi_metas = bangumi_metas.try_collect::<Vec<_>>().await?;
tokio::sync::broadcast::
}
Ok(())
}