fix: fix scrape mikan season bangumi list
This commit is contained in:
@@ -2,7 +2,9 @@ use std::{fmt::Debug, ops::Deref, sync::Arc};
|
||||
|
||||
use fetch::{HttpClient, HttpClientTrait};
|
||||
use maplit::hashmap;
|
||||
use sea_orm::{ActiveModelTrait, ActiveValue::Set, DbErr, TryIntoModel};
|
||||
use sea_orm::{
|
||||
ActiveModelTrait, ActiveValue::Set, ColumnTrait, DbErr, EntityTrait, QueryFilter, TryIntoModel,
|
||||
};
|
||||
use url::Url;
|
||||
use util::OptDynErr;
|
||||
|
||||
@@ -137,7 +139,7 @@ impl MikanClient {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn save_credential(
|
||||
pub async fn submit_credential_form(
|
||||
&self,
|
||||
ctx: Arc<dyn AppContextTrait>,
|
||||
subscriber_id: i32,
|
||||
@@ -159,49 +161,67 @@ impl MikanClient {
|
||||
Ok(credential)
|
||||
}
|
||||
|
||||
pub async fn sync_credential_cookies(
|
||||
&self,
|
||||
ctx: Arc<dyn AppContextTrait>,
|
||||
credential_id: i32,
|
||||
) -> RecorderResult<()> {
|
||||
let cookies = self.http_client.save_cookie_store_to_json()?;
|
||||
if let Some(cookies) = cookies {
|
||||
let am = credential_3rd::ActiveModel {
|
||||
cookies: Set(Some(cookies)),
|
||||
..Default::default()
|
||||
}
|
||||
.try_encrypt(ctx.clone())
|
||||
.await?;
|
||||
|
||||
credential_3rd::Entity::update_many()
|
||||
.set(am)
|
||||
.filter(credential_3rd::Column::Id.eq(credential_id))
|
||||
.exec(ctx.db())
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn fork_with_credential(
|
||||
&self,
|
||||
ctx: Arc<dyn AppContextTrait>,
|
||||
credential_id: Option<i32>,
|
||||
credential_id: i32,
|
||||
) -> RecorderResult<Self> {
|
||||
let mut fork = self.http_client.fork();
|
||||
let mut userpass_credential_opt = None;
|
||||
|
||||
if let Some(credential_id) = credential_id {
|
||||
let credential = credential_3rd::Model::find_by_id(ctx.clone(), credential_id).await?;
|
||||
if let Some(credential) = credential {
|
||||
if credential.credential_type != Credential3rdType::Mikan {
|
||||
return Err(RecorderError::Credential3rdError {
|
||||
message: "credential is not a mikan credential".to_string(),
|
||||
source: None.into(),
|
||||
});
|
||||
}
|
||||
|
||||
let userpass_credential: UserPassCredential =
|
||||
credential.try_into_userpass_credential(ctx)?;
|
||||
|
||||
if let Some(cookies) = userpass_credential.cookies.as_ref() {
|
||||
fork = fork.attach_cookies(cookies)?;
|
||||
}
|
||||
|
||||
if let Some(user_agent) = userpass_credential.user_agent.as_ref() {
|
||||
fork = fork.attach_user_agent(user_agent);
|
||||
}
|
||||
|
||||
userpass_credential_opt = Some(userpass_credential);
|
||||
} else {
|
||||
return Err(RecorderError::from_db_record_not_found(
|
||||
DbErr::RecordNotFound(format!("credential={credential_id} not found")),
|
||||
));
|
||||
let credential = credential_3rd::Model::find_by_id(ctx.clone(), credential_id).await?;
|
||||
if let Some(credential) = credential {
|
||||
if credential.credential_type != Credential3rdType::Mikan {
|
||||
return Err(RecorderError::Credential3rdError {
|
||||
message: "credential is not a mikan credential".to_string(),
|
||||
source: None.into(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
http_client: HttpClient::from_fork(fork)?,
|
||||
base_url: self.base_url.clone(),
|
||||
origin_url: self.origin_url.clone(),
|
||||
userpass_credential: userpass_credential_opt,
|
||||
})
|
||||
let userpass_credential: UserPassCredential =
|
||||
credential.try_into_userpass_credential(ctx)?;
|
||||
|
||||
fork = fork.attach_cookies(userpass_credential.cookies.as_deref())?;
|
||||
|
||||
if let Some(user_agent) = userpass_credential.user_agent.as_ref() {
|
||||
fork = fork.attach_user_agent(user_agent);
|
||||
}
|
||||
|
||||
let userpass_credential_opt = Some(userpass_credential);
|
||||
|
||||
Ok(Self {
|
||||
http_client: HttpClient::from_fork(fork)?,
|
||||
base_url: self.base_url.clone(),
|
||||
origin_url: self.origin_url.clone(),
|
||||
userpass_credential: userpass_credential_opt,
|
||||
})
|
||||
} else {
|
||||
Err(RecorderError::from_db_record_not_found(
|
||||
DbErr::RecordNotFound(format!("credential={credential_id} not found")),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn base_url(&self) -> &Url {
|
||||
|
||||
@@ -2,5 +2,7 @@ pub const MIKAN_POSTER_BUCKET_KEY: &str = "mikan_poster";
|
||||
pub const MIKAN_UNKNOWN_FANSUB_NAME: &str = "生肉/不明字幕";
|
||||
pub const MIKAN_UNKNOWN_FANSUB_ID: &str = "202";
|
||||
pub const MIKAN_LOGIN_PAGE_PATH: &str = "/Account/Login";
|
||||
pub const MIKAN_LOGIN_PAGE_SEARCH: &str = "?ReturnUrl=%2F";
|
||||
pub const MIKAN_LOGIN_PAGE_SEARCH: &str = "ReturnUrl=%2F";
|
||||
pub const MIKAN_ACCOUNT_MANAGE_PAGE_PATH: &str = "/Account/Manage";
|
||||
pub const MIKAN_SEASON_FLOW_PAGE_PATH: &str = "/Home/BangumiCoverFlow";
|
||||
pub const MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH: &str = "/Home/ExpandBangumi";
|
||||
|
||||
@@ -8,8 +8,9 @@ mod web;
|
||||
pub use client::MikanClient;
|
||||
pub use config::MikanConfig;
|
||||
pub use constants::{
|
||||
MIKAN_ACCOUNT_MANAGE_PAGE_PATH, MIKAN_LOGIN_PAGE_PATH, MIKAN_LOGIN_PAGE_SEARCH,
|
||||
MIKAN_POSTER_BUCKET_KEY, MIKAN_UNKNOWN_FANSUB_ID, MIKAN_UNKNOWN_FANSUB_NAME,
|
||||
MIKAN_ACCOUNT_MANAGE_PAGE_PATH, MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH,
|
||||
MIKAN_LOGIN_PAGE_PATH, MIKAN_LOGIN_PAGE_SEARCH, MIKAN_POSTER_BUCKET_KEY,
|
||||
MIKAN_SEASON_FLOW_PAGE_PATH, MIKAN_UNKNOWN_FANSUB_ID, MIKAN_UNKNOWN_FANSUB_NAME,
|
||||
};
|
||||
pub use credential::MikanCredentialForm;
|
||||
pub use rss::{
|
||||
|
||||
@@ -10,7 +10,8 @@ use tracing::instrument;
|
||||
use url::Url;
|
||||
|
||||
use super::{
|
||||
MIKAN_POSTER_BUCKET_KEY, MikanBangumiRssUrlMeta, MikanClient,
|
||||
MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH, MIKAN_POSTER_BUCKET_KEY,
|
||||
MIKAN_SEASON_FLOW_PAGE_PATH, MikanBangumiRssUrlMeta, MikanClient,
|
||||
extract_mikan_bangumi_id_from_rss_url,
|
||||
};
|
||||
use crate::{
|
||||
@@ -183,7 +184,7 @@ pub fn build_mikan_season_flow_url(
|
||||
season_str: MikanSeasonStr,
|
||||
) -> Url {
|
||||
let mut url = mikan_base_url;
|
||||
url.set_path("/Home/BangumiCoverFlow");
|
||||
url.set_path(MIKAN_SEASON_FLOW_PAGE_PATH);
|
||||
url.query_pairs_mut()
|
||||
.append_pair("year", &year.to_string())
|
||||
.append_pair("seasonStr", &season_str.to_string());
|
||||
@@ -201,7 +202,7 @@ pub fn build_mikan_bangumi_expand_subscribed_url(
|
||||
mikan_bangumi_id: &str,
|
||||
) -> Url {
|
||||
let mut url = mikan_base_url;
|
||||
url.set_path("/ExpandBangumi");
|
||||
url.set_path(MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH);
|
||||
url.query_pairs_mut()
|
||||
.append_pair("bangumiId", mikan_bangumi_id)
|
||||
.append_pair("showSubscribed", "true");
|
||||
@@ -651,7 +652,7 @@ pub async fn scrape_mikan_bangumi_meta_list_from_season_flow_url(
|
||||
credential_id: i32,
|
||||
) -> RecorderResult<Vec<MikanBangumiMeta>> {
|
||||
let mikan_client = mikan_client
|
||||
.fork_with_credential(ctx.clone(), Some(credential_id))
|
||||
.fork_with_credential(ctx.clone(), credential_id)
|
||||
.await?;
|
||||
|
||||
let mikan_base_url = mikan_client.base_url();
|
||||
@@ -671,6 +672,10 @@ pub async fn scrape_mikan_bangumi_meta_list_from_season_flow_url(
|
||||
|
||||
let mut bangumi_metas = vec![];
|
||||
|
||||
mikan_client
|
||||
.sync_credential_cookies(ctx.clone(), credential_id)
|
||||
.await?;
|
||||
|
||||
for bangumi_index in bangumi_indices_meta {
|
||||
let bangumi_title = bangumi_index.bangumi_title.clone();
|
||||
let bangumi_expand_subscribed_fragment_url = build_mikan_bangumi_expand_subscribed_url(
|
||||
@@ -696,6 +701,10 @@ pub async fn scrape_mikan_bangumi_meta_list_from_season_flow_url(
|
||||
bangumi_metas.push(bangumi_meta);
|
||||
}
|
||||
|
||||
mikan_client
|
||||
.sync_credential_cookies(ctx, credential_id)
|
||||
.await?;
|
||||
|
||||
Ok(bangumi_metas)
|
||||
}
|
||||
|
||||
@@ -704,7 +713,6 @@ mod test {
|
||||
#![allow(unused_variables)]
|
||||
use std::fs;
|
||||
|
||||
use fetch::get_random_ua;
|
||||
use rstest::{fixture, rstest};
|
||||
use tracing::Level;
|
||||
use url::Url;
|
||||
@@ -712,11 +720,16 @@ mod test {
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
extract::mikan::MikanCredentialForm,
|
||||
extract::mikan::{MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH, MIKAN_SEASON_FLOW_PAGE_PATH},
|
||||
test_utils::{
|
||||
app::UnitTestAppContext, crypto::build_testing_crypto_service,
|
||||
database::build_testing_database_service, mikan::build_testing_mikan_client,
|
||||
storage::build_testing_storage_service, tracing::try_init_testing_tracing,
|
||||
app::UnitTestAppContext,
|
||||
crypto::build_testing_crypto_service,
|
||||
database::build_testing_database_service,
|
||||
mikan::{
|
||||
MikanMockServer, build_testing_mikan_client, build_testing_mikan_credential_form,
|
||||
},
|
||||
storage::build_testing_storage_service,
|
||||
tracing::try_init_testing_tracing,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -932,8 +945,8 @@ mod test {
|
||||
async fn test_scrape_mikan_bangumi_meta_list_from_season_flow_url(
|
||||
before_each: (),
|
||||
) -> RecorderResult<()> {
|
||||
let mut mikan_server = mockito::Server::new_async().await;
|
||||
let mikan_base_url = Url::parse(&mikan_server.url())?;
|
||||
let mut mikan_server = MikanMockServer::new().await?;
|
||||
let mikan_base_url = mikan_server.base_url().clone();
|
||||
|
||||
let app_ctx = {
|
||||
let mikan_client = build_testing_mikan_client(mikan_base_url.clone()).await?;
|
||||
@@ -950,20 +963,50 @@ mod test {
|
||||
|
||||
let mikan_client = app_ctx.mikan();
|
||||
|
||||
let login_mock = mikan_server.mock_get_login_page();
|
||||
|
||||
let season_flow_noauth_mock = mikan_server
|
||||
.server
|
||||
.mock("GET", MIKAN_SEASON_FLOW_PAGE_PATH)
|
||||
.match_query(mockito::Matcher::Any)
|
||||
.match_request(|req| !MikanMockServer::get_has_auth_matcher()(req))
|
||||
.with_status(200)
|
||||
.with_body_from_file("tests/resources/mikan/BangumiCoverFlow-2025-spring-noauth.html")
|
||||
.create();
|
||||
|
||||
let season_flow_mock = mikan_server
|
||||
.server
|
||||
.mock("GET", MIKAN_SEASON_FLOW_PAGE_PATH)
|
||||
.match_query(mockito::Matcher::Any)
|
||||
.match_request(|req| MikanMockServer::get_has_auth_matcher()(req))
|
||||
.with_status(200)
|
||||
.with_body_from_file("tests/resources/mikan/BangumiCoverFlow-2025-spring.html")
|
||||
.create();
|
||||
|
||||
let bangumi_subscribed_noauth_mock = mikan_server
|
||||
.server
|
||||
.mock("GET", MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH)
|
||||
.match_query(mockito::Matcher::Any)
|
||||
.match_request(|req| !MikanMockServer::get_has_auth_matcher()(req))
|
||||
.with_status(200)
|
||||
.with_body_from_file("tests/resources/mikan/ExpandBangumi-3599-noauth.html")
|
||||
.create();
|
||||
|
||||
let bangumi_subscribed_mock = mikan_server
|
||||
.server
|
||||
.mock("GET", MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH)
|
||||
.match_query(mockito::Matcher::Any)
|
||||
.match_request(|req| MikanMockServer::get_has_auth_matcher()(req))
|
||||
.with_status(200)
|
||||
.with_body_from_file("tests/resources/mikan/ExpandBangumi-3599.html")
|
||||
.create();
|
||||
|
||||
let credential = mikan_client
|
||||
.save_credential(
|
||||
app_ctx.clone(),
|
||||
1,
|
||||
MikanCredentialForm {
|
||||
username: String::from("test_username"),
|
||||
password: String::from("test_password"),
|
||||
user_agent: get_random_ua().to_string(),
|
||||
},
|
||||
)
|
||||
.submit_credential_form(app_ctx.clone(), 1, build_testing_mikan_credential_form())
|
||||
.await?;
|
||||
|
||||
let mikan_season_flow_url =
|
||||
build_mikan_season_flow_url(mikan_base_url, 2025, MikanSeasonStr::Spring);
|
||||
build_mikan_season_flow_url(mikan_base_url.clone(), 2025, MikanSeasonStr::Spring);
|
||||
|
||||
let bangumi_meta_list = scrape_mikan_bangumi_meta_list_from_season_flow_url(
|
||||
mikan_client,
|
||||
@@ -975,6 +1018,26 @@ mod test {
|
||||
|
||||
assert!(!bangumi_meta_list.is_empty());
|
||||
|
||||
let bangumi = bangumi_meta_list.first().unwrap();
|
||||
|
||||
assert!(
|
||||
bangumi
|
||||
.homepage
|
||||
.to_string()
|
||||
.ends_with("/Home/Bangumi/3288#370"),
|
||||
);
|
||||
assert_eq!(bangumi.bangumi_title, "吉伊卡哇");
|
||||
assert_eq!(bangumi.mikan_bangumi_id, "3288");
|
||||
assert!(
|
||||
bangumi
|
||||
.origin_poster_src
|
||||
.as_ref()
|
||||
.map_or(String::new(), |u| u.to_string())
|
||||
.ends_with("/images/Bangumi/202204/d8ef46c0.jpg")
|
||||
);
|
||||
assert_eq!(bangumi.mikan_fansub_id, String::from("370"));
|
||||
assert_eq!(bangumi.fansub, String::from("LoliHouse"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -987,8 +1050,9 @@ mod test {
|
||||
let mikan_base_url = Url::parse(&mikan_server.url())?;
|
||||
let mikan_client = build_testing_mikan_client(mikan_base_url.clone()).await?;
|
||||
|
||||
let episode_homepage_url =
|
||||
mikan_base_url.join("/Home/Episode/475184dce83ea2b82902592a5ac3343f6d54b36a")?;
|
||||
let episode_homepage_url = mikan_base_url
|
||||
.clone()
|
||||
.join("/Home/Episode/475184dce83ea2b82902592a5ac3343f6d54b36a")?;
|
||||
|
||||
let episode_homepage_mock = mikan_server
|
||||
.mock("GET", episode_homepage_url.path())
|
||||
@@ -1058,101 +1122,4 @@ mod test {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// #[rstest]
|
||||
// #[tokio::test]
|
||||
// async fn test_extract_mikan_bangumis_meta_from_my_bangumi_page(
|
||||
// before_each: (),
|
||||
// ) -> RecorderResult<()> {
|
||||
// let mut mikan_server = mockito::Server::new_async().await;
|
||||
|
||||
// let mikan_base_url = Url::parse(&mikan_server.url())?;
|
||||
|
||||
// let my_bangumi_page_url = mikan_base_url.join("/Home/MyBangumi")?;
|
||||
|
||||
// let context = Arc::new(
|
||||
// UnitTestAppContext::builder()
|
||||
//
|
||||
// .mikan(build_testing_mikan_client(mikan_base_url.clone()).await?)
|
||||
// .build(),
|
||||
// );
|
||||
|
||||
// {
|
||||
// let my_bangumi_without_cookie_mock = mikan_server
|
||||
// .mock("GET", my_bangumi_page_url.path())
|
||||
// .match_header(header::COOKIE, mockito::Matcher::Missing)
|
||||
//
|
||||
// .with_body_from_file("tests/resources/mikan/MyBangumi-noauth.htm")
|
||||
// .create_async()
|
||||
// .await;
|
||||
|
||||
// let bangumi_metas =
|
||||
// extract_mikan_bangumis_meta_from_my_bangumi_page(
|
||||
// context.clone(), my_bangumi_page_url.clone(),
|
||||
// None,
|
||||
// &[],
|
||||
// );
|
||||
|
||||
// pin_mut!(bangumi_metas);
|
||||
|
||||
// let bangumi_metas = bangumi_metas.try_collect::<Vec<_>>().await?;
|
||||
|
||||
// assert!(bangumi_metas.is_empty());
|
||||
|
||||
// assert!(my_bangumi_without_cookie_mock.matched_async().await);
|
||||
// }
|
||||
// {
|
||||
// let my_bangumi_with_cookie_mock = mikan_server
|
||||
// .mock("GET", my_bangumi_page_url.path())
|
||||
// .match_header(
|
||||
// header::COOKIE,
|
||||
// mockito::Matcher::AllOf(vec![
|
||||
//
|
||||
// mockito::Matcher::Regex(String::from(".*\\.AspNetCore\\.Antiforgery.*")),
|
||||
// mockito::Matcher::Regex(String::from(
|
||||
// ".*\\.AspNetCore\\.Identity\\.Application.*",
|
||||
// )),
|
||||
// ]),
|
||||
// )
|
||||
// .with_body_from_file("tests/resources/mikan/MyBangumi.htm")
|
||||
// .create_async()
|
||||
// .await;
|
||||
|
||||
// let expand_bangumi_mock = mikan_server
|
||||
// .mock("GET", "/ExpandBangumi")
|
||||
// .match_query(mockito::Matcher::Any)
|
||||
//
|
||||
// .with_body_from_file("tests/resources/mikan/ExpandBangumi.htm")
|
||||
// .create_async()
|
||||
// .await;
|
||||
|
||||
// let auth_secrecy = Some(MikanCredentialForm {
|
||||
// username: String::from("test_username"),
|
||||
// password: String::from("test_password"),
|
||||
// user_agent: String::from(
|
||||
// "Mozilla/5.0 (Windows NT 10.0; Win64; x64)
|
||||
// AppleWebKit/537.36 (KHTML, like \ Gecko)
|
||||
// Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0", ),
|
||||
// });
|
||||
|
||||
// let bangumi_metas =
|
||||
// extract_mikan_bangumis_meta_from_my_bangumi_page(
|
||||
// context.clone(), my_bangumi_page_url,
|
||||
// auth_secrecy,
|
||||
// &[],
|
||||
// );
|
||||
// pin_mut!(bangumi_metas);
|
||||
// let bangumi_metas = bangumi_metas.try_collect::<Vec<_>>().await?;
|
||||
|
||||
// assert!(!bangumi_metas.is_empty());
|
||||
|
||||
// assert!(bangumi_metas[0].origin_poster_src.is_some());
|
||||
|
||||
// assert!(my_bangumi_with_cookie_mock.matched_async().await);
|
||||
|
||||
// expand_bangumi_mock.expect(bangumi_metas.len());
|
||||
// }
|
||||
|
||||
// Ok(())
|
||||
// }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user