feat: add replay-stream-tasks pattern support

This commit is contained in:
2025-03-08 16:43:00 +08:00
parent e66573b315
commit f94e175082
47 changed files with 989 additions and 318 deletions

View File

@@ -1,7 +1,7 @@
use std::ops::Deref;
use std::{fmt::Debug, ops::Deref};
use reqwest_middleware::ClientWithMiddleware;
use secrecy::{ExposeSecret, SecretString};
use serde::{Deserialize, Serialize};
use url::Url;
use super::MikanConfig;
@@ -10,15 +10,24 @@ use crate::{
fetch::{HttpClient, HttpClientTrait, client::HttpClientCookiesAuth},
};
#[derive(Debug, Default, Clone)]
#[derive(Default, Clone, Deserialize, Serialize)]
pub struct MikanAuthSecrecy {
pub cookie: SecretString,
pub cookie: String,
pub user_agent: Option<String>,
}
impl Debug for MikanAuthSecrecy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("MikanAuthSecrecy")
.field("cookie", &String::from("[secrecy]"))
.field("user_agent", &String::from("[secrecy]"))
.finish()
}
}
impl MikanAuthSecrecy {
pub fn into_cookie_auth(self, url: &Url) -> Result<HttpClientCookiesAuth, RError> {
HttpClientCookiesAuth::from_cookies(self.cookie.expose_secret(), url, self.user_agent)
HttpClientCookiesAuth::from_cookies(&self.cookie, url, self.user_agent)
}
}
@@ -38,9 +47,13 @@ impl MikanClient {
})
}
pub fn fork_with_auth(&self, secrecy: MikanAuthSecrecy) -> Result<Self, RError> {
let cookie_auth = secrecy.into_cookie_auth(&self.base_url)?;
let fork = self.http_client.fork().attach_secrecy(cookie_auth);
pub fn fork_with_auth(&self, secrecy: Option<MikanAuthSecrecy>) -> Result<Self, RError> {
let mut fork = self.http_client.fork();
if let Some(secrecy) = secrecy {
let cookie_auth = secrecy.into_cookie_auth(&self.base_url)?;
fork = fork.attach_secrecy(cookie_auth);
}
Ok(Self {
http_client: HttpClient::from_fork(fork)?,

View File

@@ -1,18 +1,20 @@
use std::borrow::Cow;
use std::{borrow::Cow, sync::Arc};
use async_stream::try_stream;
use bytes::Bytes;
use futures::Stream;
use itertools::Itertools;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use tracing::instrument;
use url::Url;
use super::{
MIKAN_BUCKET_KEY, MikanBangumiRssLink, MikanClient, extract_mikan_bangumi_id_from_rss_link,
MIKAN_BUCKET_KEY, MikanAuthSecrecy, MikanBangumiRssLink, MikanClient,
extract_mikan_bangumi_id_from_rss_link,
};
use crate::{
app::AppContext,
app::AppContextTrait,
errors::{RError, RResult},
extract::{
html::{extract_background_image_src_from_style_attr, extract_inner_text_from_element_ref},
@@ -20,6 +22,7 @@ use crate::{
},
fetch::{html::fetch_html, image::fetch_image},
storage::StorageContentCategory,
tasks::core::{StandardStreamTaskReplayLayout, StreamTaskRunnerTrait},
};
#[derive(Clone, Debug, PartialEq)]
@@ -34,7 +37,7 @@ pub struct MikanEpisodeMeta {
pub mikan_episode_id: String,
}
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct MikanBangumiMeta {
pub homepage: Url,
pub origin_poster_src: Option<Url>,
@@ -123,12 +126,12 @@ pub async fn extract_mikan_poster_meta_from_src(
}
pub async fn extract_mikan_bangumi_poster_meta_from_src_with_cache(
ctx: &AppContext,
ctx: &dyn AppContextTrait,
origin_poster_src_url: Url,
subscriber_id: i32,
) -> RResult<MikanBangumiPosterMeta> {
let dal_client = &ctx.storage;
let mikan_client = &ctx.mikan;
let dal_client = ctx.storage();
let mikan_client = ctx.mikan();
if let Some(poster_src) = dal_client
.exists_object(
StorageContentCategory::Image,
@@ -346,126 +349,141 @@ pub async fn extract_mikan_bangumi_meta_from_bangumi_homepage(
})
}
/**
* @logined-required
*/
#[instrument(skip_all, fields(my_bangumi_page_url = my_bangumi_page_url.as_str()))]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ExtractMikanBangumisMetaFromMyBangumiRequest {
pub my_bangumi_page_url: Url,
pub auth_securcy: Option<MikanAuthSecrecy>,
}
pub type ExtractMikanBangumisMetaFromMyBangumiTask =
StandardStreamTaskReplayLayout<ExtractMikanBangumisMetaFromMyBangumiRequest, MikanBangumiMeta>;
#[instrument(skip_all, fields(my_bangumi_page_url, auth_securcy = ?auth_securcy, history = history.len()))]
pub fn extract_mikan_bangumis_meta_from_my_bangumi_page(
http_client: &MikanClient,
context: Arc<dyn AppContextTrait>,
my_bangumi_page_url: Url,
) -> impl Stream<Item = Result<MikanBangumiMeta, RError>> {
auth_securcy: Option<MikanAuthSecrecy>,
history: &[Arc<RResult<MikanBangumiMeta>>],
) -> impl Stream<Item = RResult<MikanBangumiMeta>> {
try_stream! {
let http_client = &context.mikan().fork_with_auth(auth_securcy.clone())?;
let mikan_base_url = Url::parse(&my_bangumi_page_url.origin().unicode_serialization())?;
let content = fetch_html(http_client, my_bangumi_page_url.clone()).await?;
let bangumi_container_selector = &Selector::parse(".sk-bangumi .an-ul>li").unwrap();
let bangumi_info_selector = &Selector::parse(".an-info a.an-text").unwrap();
let bangumi_poster_selector =
&Selector::parse("span[data-src][data-bangumiid], span[data-bangumiid][style]")
.unwrap();
let fansub_container_selector =
&Selector::parse(".js-expand_bangumi-subgroup.js-subscribed").unwrap();
&Selector::parse(".js-expand_bangumi-subgroup.js-subscribed").unwrap();
let fansub_title_selector = &Selector::parse(".tag-res-name[title]").unwrap();
let fansub_id_selector =
&Selector::parse(".active[data-subtitlegroupid][data-bangumiid]").unwrap();
let bangumi_iters = {
let bangumi_items = {
let html = Html::parse_document(&content);
let bangumi_container_selector = &Selector::parse(".sk-bangumi .an-ul>li").unwrap();
let bangumi_info_selector = &Selector::parse(".an-info a.an-text").unwrap();
let bangumi_poster_selector =
&Selector::parse("span[data-src][data-bangumiid], span[data-bangumiid][style]")
.unwrap();
html.select(bangumi_container_selector)
.filter_map(|bangumi_elem| {
let title_and_href_elem = bangumi_elem.select(bangumi_info_selector).next();
let poster_elem = bangumi_elem.select(bangumi_poster_selector).next();
if let (Some(bangumi_home_page_url), Some(bangumi_title)) = (
title_and_href_elem.and_then(|elem| elem.attr("href")),
title_and_href_elem.and_then(|elem| elem.attr("title")),
) {
let origin_poster_src = poster_elem.and_then(|ele| {
ele.attr("data-src")
.and_then(|data_src| {
extract_image_src_from_str(data_src, &mikan_base_url)
.filter_map(|bangumi_elem| {
let title_and_href_elem =
bangumi_elem.select(bangumi_info_selector).next();
let poster_elem = bangumi_elem.select(bangumi_poster_selector).next();
if let (Some(bangumi_home_page_url), Some(bangumi_title)) = (
title_and_href_elem.and_then(|elem| elem.attr("href")),
title_and_href_elem.and_then(|elem| elem.attr("title")),
) {
let origin_poster_src = poster_elem.and_then(|ele| {
ele.attr("data-src")
.and_then(|data_src| {
extract_image_src_from_str(data_src, &mikan_base_url)
})
.or_else(|| {
ele.attr("style").and_then(|style| {
extract_background_image_src_from_style_attr(
style,
&mikan_base_url,
)
})
.or_else(|| {
ele.attr("style").and_then(|style| {
extract_background_image_src_from_style_attr(
style,
&mikan_base_url,
)
})
})
});
let bangumi_title = bangumi_title.to_string();
let bangumi_home_page_url =
my_bangumi_page_url.join(bangumi_home_page_url).ok()?;
let MikanBangumiHomepage {
mikan_bangumi_id, ..
} = extract_mikan_bangumi_id_from_homepage(&bangumi_home_page_url)?;
if let Some(origin_poster_src) = origin_poster_src.as_ref() {
tracing::trace!(
origin_poster_src = origin_poster_src.as_str(),
bangumi_title,
mikan_bangumi_id,
"bangumi info extracted"
);
} else {
tracing::warn!(
bangumi_title,
mikan_bangumi_id,
"bangumi info extracted, but failed to extract poster_src"
);
}
let bangumi_expand_info_url = build_mikan_bangumi_expand_info_url(
mikan_base_url.clone(),
&mikan_bangumi_id,
);
Some((
})
});
let bangumi_title = bangumi_title.to_string();
let bangumi_home_page_url =
my_bangumi_page_url.join(bangumi_home_page_url).ok()?;
let MikanBangumiHomepage {
mikan_bangumi_id, ..
} = extract_mikan_bangumi_id_from_homepage(&bangumi_home_page_url)?;
if let Some(origin_poster_src) = origin_poster_src.as_ref() {
tracing::trace!(
origin_poster_src = origin_poster_src.as_str(),
bangumi_title,
mikan_bangumi_id,
bangumi_expand_info_url,
origin_poster_src,
))
"bangumi info extracted"
);
} else {
None
tracing::warn!(
bangumi_title,
mikan_bangumi_id,
"bangumi info extracted, but failed to extract poster_src"
);
}
})
.collect_vec()
};
for (bangumi_title, mikan_bangumi_id, bangumi_expand_info_url, origin_poster_src) in
bangumi_iters
{
if let Some((fansub_name, mikan_fansub_id)) = {
let bangumi_expand_info_content = fetch_html(http_client, bangumi_expand_info_url).await?;
let bangumi_expand_info_fragment = Html::parse_fragment(&bangumi_expand_info_content);
bangumi_expand_info_fragment.select(fansub_container_selector).next().and_then(|fansub_info| {
if let (Some(fansub_name), Some(mikan_fansub_id)) = (
fansub_info
.select(fansub_title_selector)
.next()
.and_then(|ele| ele.attr("title"))
.map(String::from),
fansub_info
.select(fansub_id_selector)
.next()
.and_then(|ele| ele.attr("data-subtitlegroupid"))
.map(String::from)
) {
Some((fansub_name, mikan_fansub_id))
} else {
None
}
})
} {
tracing::trace!(
fansub_name,
mikan_fansub_id,
"subscribed fansub extracted"
);
yield MikanBangumiMeta {
homepage: build_mikan_bangumi_homepage(
let bangumi_expand_info_url = build_mikan_bangumi_expand_info_url(
mikan_base_url.clone(),
&mikan_bangumi_id,
);
Some((
bangumi_title,
mikan_bangumi_id,
bangumi_expand_info_url,
origin_poster_src,
))
} else {
None
}
})
.collect_vec()
};
for (idx, (bangumi_title, mikan_bangumi_id, bangumi_expand_info_url, origin_poster_src)) in
bangumi_items.iter().enumerate()
{
if history.get(idx).is_some() {
continue;
} else if let Some((fansub_name, mikan_fansub_id)) = {
let bangumi_expand_info_content =
fetch_html(http_client, bangumi_expand_info_url.clone()).await?;
let bangumi_expand_info_fragment =
Html::parse_fragment(&bangumi_expand_info_content);
bangumi_expand_info_fragment
.select(fansub_container_selector)
.next()
.and_then(|fansub_info| {
if let (Some(fansub_name), Some(mikan_fansub_id)) = (
fansub_info
.select(fansub_title_selector)
.next()
.and_then(|ele| ele.attr("title"))
.map(String::from),
fansub_info
.select(fansub_id_selector)
.next()
.and_then(|ele| ele.attr("data-subtitlegroupid"))
.map(String::from),
) {
Some((fansub_name, mikan_fansub_id))
} else {
None
}
})
} {
tracing::trace!(fansub_name, mikan_fansub_id, "subscribed fansub extracted");
let item = MikanBangumiMeta {
homepage: build_mikan_bangumi_homepage(
mikan_base_url.clone(),
mikan_bangumi_id,
Some(&mikan_fansub_id),
),
bangumi_title: bangumi_title.to_string(),
@@ -474,11 +492,28 @@ pub fn extract_mikan_bangumis_meta_from_my_bangumi_page(
fansub: Some(fansub_name),
origin_poster_src: origin_poster_src.clone(),
};
yield item;
}
}
}
}
impl StreamTaskRunnerTrait for ExtractMikanBangumisMetaFromMyBangumiTask {
fn run(
context: Arc<dyn AppContextTrait>,
request: &Self::Request,
history: &[Arc<RResult<Self::Item>>],
) -> impl Stream<Item = RResult<Self::Item>> {
let context = context.clone();
extract_mikan_bangumis_meta_from_my_bangumi_page(
context,
request.my_bangumi_page_url.clone(),
request.auth_securcy.clone(),
history,
)
}
}
#[cfg(test)]
mod test {
#![allow(unused_variables)]
@@ -486,22 +521,19 @@ mod test {
use futures::{TryStreamExt, pin_mut};
use http::header;
use rstest::{fixture, rstest};
use secrecy::SecretString;
use tracing::Level;
use url::Url;
use zune_image::{codecs::ImageFormat, image::Image};
use super::*;
use crate::{
extract::mikan::{
MikanAuthSecrecy, web_extract::extract_mikan_bangumis_meta_from_my_bangumi_page,
},
test_utils::{mikan::build_testing_mikan_client, tracing::init_testing_tracing},
use crate::test_utils::{
app::UnitTestAppContext, mikan::build_testing_mikan_client,
tracing::try_init_testing_tracing,
};
#[fixture]
fn before_each() {
init_testing_tracing(Level::INFO);
try_init_testing_tracing(Level::INFO);
}
#[rstest]
@@ -625,7 +657,11 @@ mod test {
let my_bangumi_page_url = mikan_base_url.join("/Home/MyBangumi")?;
let mikan_client = build_testing_mikan_client(mikan_base_url.clone()).await?;
let context = Arc::new(
UnitTestAppContext::builder()
.mikan(build_testing_mikan_client(mikan_base_url.clone()).await?)
.build(),
);
{
let my_bangumi_without_cookie_mock = mikan_server
@@ -636,8 +672,10 @@ mod test {
.await;
let bangumi_metas = extract_mikan_bangumis_meta_from_my_bangumi_page(
&mikan_client,
context.clone(),
my_bangumi_page_url.clone(),
None,
&[],
);
pin_mut!(bangumi_metas);
@@ -671,8 +709,8 @@ mod test {
.create_async()
.await;
let mikan_client_with_cookie = mikan_client.fork_with_auth(MikanAuthSecrecy {
cookie: SecretString::from(
let auth_secrecy = Some(MikanAuthSecrecy {
cookie: String::from(
"mikan-announcement=1; .AspNetCore.Antiforgery.abc=abc; \
.AspNetCore.Identity.Application=abc; ",
),
@@ -680,11 +718,13 @@ mod test {
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like \
Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0",
)),
})?;
});
let bangumi_metas = extract_mikan_bangumis_meta_from_my_bangumi_page(
&mikan_client_with_cookie,
context.clone(),
my_bangumi_page_url,
auth_secrecy,
&[],
);
pin_mut!(bangumi_metas);
let bangumi_metas = bangumi_metas.try_collect::<Vec<_>>().await?;