fix: fix testsuite
This commit is contained in:
249
apps/recorder/examples/mikan_doppel_season_subscription.rs
Normal file
249
apps/recorder/examples/mikan_doppel_season_subscription.rs
Normal file
@@ -0,0 +1,249 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use color_eyre::{Result, eyre::OptionExt};
|
||||
use fetch::{FetchError, HttpClientConfig, fetch_bytes, fetch_html, fetch_image, reqwest};
|
||||
use inquire::{Password, Text, validator::Validation};
|
||||
use recorder::{
|
||||
crypto::UserPassCredential,
|
||||
extract::mikan::{
|
||||
MikanClient, MikanConfig, MikanRssItem, build_mikan_bangumi_expand_subscribed_url,
|
||||
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
|
||||
extract_mikan_bangumi_meta_from_expand_subscribed_fragment,
|
||||
},
|
||||
test_utils::mikan::{MikanDoppelMeta, MikanDoppelPath},
|
||||
};
|
||||
use scraper::Html;
|
||||
use tokio::fs;
|
||||
use url::Url;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_max_level(tracing::Level::INFO)
|
||||
.init();
|
||||
std::env::set_current_dir(std::path::Path::new("apps/recorder"))?;
|
||||
|
||||
let mikan_scrape_client = MikanClient::from_config(MikanConfig {
|
||||
http_client: HttpClientConfig {
|
||||
exponential_backoff_max_retries: Some(3),
|
||||
leaky_bucket_max_tokens: Some(2),
|
||||
leaky_bucket_initial_tokens: Some(0),
|
||||
leaky_bucket_refill_tokens: Some(1),
|
||||
leaky_bucket_refill_interval: Some(Duration::from_millis(1000)),
|
||||
user_agent: Some(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
|
||||
Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
|
||||
.to_string(),
|
||||
),
|
||||
..Default::default()
|
||||
},
|
||||
base_url: Url::parse("https://mikanani.me")?,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let username_validator = |input: &str| {
|
||||
if input.trim().is_empty() {
|
||||
Ok(Validation::Invalid("Username cannot be empty".into()))
|
||||
} else {
|
||||
Ok(Validation::Valid)
|
||||
}
|
||||
};
|
||||
let password_validator = |input: &str| {
|
||||
if input.trim().is_empty() {
|
||||
Ok(Validation::Invalid("Password cannot be empty".into()))
|
||||
} else {
|
||||
Ok(Validation::Valid)
|
||||
}
|
||||
};
|
||||
let username = Text::new("Please enter your mikan username:")
|
||||
.with_validator(username_validator)
|
||||
.prompt()?;
|
||||
let password = Password::new("Please enter your mikan password:")
|
||||
.without_confirmation()
|
||||
.with_display_mode(inquire::PasswordDisplayMode::Masked)
|
||||
.with_validator(password_validator)
|
||||
.prompt()?;
|
||||
|
||||
let mikan_scrape_client = mikan_scrape_client
|
||||
.fork_with_credential(UserPassCredential {
|
||||
username,
|
||||
password,
|
||||
user_agent: None,
|
||||
cookies: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
tracing::info!("Checking if logged in...");
|
||||
if !mikan_scrape_client.has_login().await? {
|
||||
tracing::info!("Logging in to mikan...");
|
||||
mikan_scrape_client.login().await?;
|
||||
tracing::info!("Logged in to mikan");
|
||||
}
|
||||
|
||||
let mikan_base_url = mikan_scrape_client.base_url().clone();
|
||||
tracing::info!("Scraping season subscription...");
|
||||
let season_subscription =
|
||||
fs::read("tests/resources/mikan/BangumiCoverFlow-2025-spring.html").await?;
|
||||
let html = Html::parse_fragment(String::from_utf8(season_subscription)?.as_str());
|
||||
let bangumi_index_list =
|
||||
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment(&html, &mikan_base_url);
|
||||
|
||||
for bangumi_index in bangumi_index_list {
|
||||
let bangumi_meta = {
|
||||
let bangumi_expand_subscribed_url = build_mikan_bangumi_expand_subscribed_url(
|
||||
mikan_base_url.clone(),
|
||||
bangumi_index.mikan_bangumi_id.as_ref(),
|
||||
);
|
||||
let bangumi_expand_subscribed_doppel_path =
|
||||
MikanDoppelPath::new(bangumi_expand_subscribed_url.clone());
|
||||
tracing::info!(
|
||||
bangumi_title = bangumi_index.bangumi_title,
|
||||
"Scraping bangumi expand subscribed..."
|
||||
);
|
||||
let bangumi_expand_subscribed_data =
|
||||
if !bangumi_expand_subscribed_doppel_path.exists_any() {
|
||||
let bangumi_expand_subscribed_data =
|
||||
fetch_html(&mikan_scrape_client, bangumi_expand_subscribed_url).await?;
|
||||
bangumi_expand_subscribed_doppel_path.write(&bangumi_expand_subscribed_data)?;
|
||||
tracing::info!(
|
||||
bangumi_title = bangumi_index.bangumi_title,
|
||||
"Bangumi expand subscribed saved"
|
||||
);
|
||||
bangumi_expand_subscribed_data
|
||||
} else {
|
||||
tracing::info!(
|
||||
bangumi_title = bangumi_index.bangumi_title,
|
||||
"Bangumi expand subscribed already exists"
|
||||
);
|
||||
String::from_utf8(bangumi_expand_subscribed_doppel_path.read()?)?
|
||||
};
|
||||
|
||||
let html = Html::parse_fragment(&bangumi_expand_subscribed_data);
|
||||
extract_mikan_bangumi_meta_from_expand_subscribed_fragment(
|
||||
&html,
|
||||
bangumi_index.clone(),
|
||||
mikan_base_url.clone(),
|
||||
)
|
||||
.ok_or_eyre(format!(
|
||||
"Failed to extract bangumi meta from expand subscribed fragment: {:?}",
|
||||
bangumi_index.bangumi_title
|
||||
))
|
||||
}?;
|
||||
{
|
||||
if let Some(poster_url) = bangumi_meta.origin_poster_src.as_ref() {
|
||||
let poster_doppel_path = MikanDoppelPath::new(poster_url.clone());
|
||||
tracing::info!(
|
||||
title = bangumi_meta.bangumi_title,
|
||||
"Scraping bangumi poster..."
|
||||
);
|
||||
if !poster_doppel_path.exists_any() {
|
||||
let poster_data = fetch_image(&mikan_scrape_client, poster_url.clone()).await?;
|
||||
poster_doppel_path.write(&poster_data)?;
|
||||
tracing::info!(title = bangumi_meta.bangumi_title, "Bangumi poster saved");
|
||||
} else {
|
||||
tracing::info!(
|
||||
title = bangumi_meta.bangumi_title,
|
||||
"Bangumi poster already exists"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
let bangumi_homepage_url = bangumi_meta
|
||||
.bangumi_hash()
|
||||
.build_homepage_url(mikan_base_url.clone());
|
||||
let bangumi_homepage_doppel_path = MikanDoppelPath::new(bangumi_homepage_url.clone());
|
||||
tracing::info!(
|
||||
title = bangumi_meta.bangumi_title,
|
||||
"Scraping bangumi homepage..."
|
||||
);
|
||||
if !bangumi_homepage_doppel_path.exists_any() {
|
||||
let bangumi_homepage_data =
|
||||
fetch_html(&mikan_scrape_client, bangumi_homepage_url).await?;
|
||||
bangumi_homepage_doppel_path.write(&bangumi_homepage_data)?;
|
||||
tracing::info!(title = bangumi_meta.bangumi_title, "Bangumi homepage saved");
|
||||
} else {
|
||||
tracing::info!(
|
||||
title = bangumi_meta.bangumi_title,
|
||||
"Bangumi homepage already exists"
|
||||
);
|
||||
}
|
||||
}
|
||||
let rss_items = {
|
||||
let bangumi_rss_url = bangumi_meta
|
||||
.bangumi_hash()
|
||||
.build_rss_url(mikan_base_url.clone());
|
||||
let bangumi_rss_doppel_path = MikanDoppelPath::new(bangumi_rss_url.clone());
|
||||
tracing::info!(
|
||||
title = bangumi_meta.bangumi_title,
|
||||
"Scraping bangumi rss..."
|
||||
);
|
||||
let bangumi_rss_data = if !bangumi_rss_doppel_path.exists_any() {
|
||||
let bangumi_rss_data = fetch_html(&mikan_scrape_client, bangumi_rss_url).await?;
|
||||
bangumi_rss_doppel_path.write(&bangumi_rss_data)?;
|
||||
tracing::info!(title = bangumi_meta.bangumi_title, "Bangumi rss saved");
|
||||
bangumi_rss_data
|
||||
} else {
|
||||
tracing::info!(
|
||||
title = bangumi_meta.bangumi_title,
|
||||
"Bangumi rss already exists"
|
||||
);
|
||||
String::from_utf8(bangumi_rss_doppel_path.read()?)?
|
||||
};
|
||||
let rss_items = rss::Channel::read_from(bangumi_rss_data.as_bytes())?.items;
|
||||
rss_items
|
||||
.into_iter()
|
||||
.map(MikanRssItem::try_from)
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}?;
|
||||
for rss_item in rss_items {
|
||||
{
|
||||
let episode_homepage_url = rss_item.homepage;
|
||||
let episode_homepage_doppel_path =
|
||||
MikanDoppelPath::new(episode_homepage_url.clone());
|
||||
tracing::info!(title = rss_item.title, "Scraping episode...");
|
||||
if !episode_homepage_doppel_path.exists_any() {
|
||||
let episode_homepage_data =
|
||||
fetch_html(&mikan_scrape_client, episode_homepage_url).await?;
|
||||
episode_homepage_doppel_path.write(&episode_homepage_data)?;
|
||||
tracing::info!(title = rss_item.title, "Episode saved");
|
||||
} else {
|
||||
tracing::info!(title = rss_item.title, "Episode already exists");
|
||||
};
|
||||
}
|
||||
{
|
||||
let episode_torrent_url = rss_item.url;
|
||||
let episode_torrent_doppel_path = MikanDoppelPath::new(episode_torrent_url.clone());
|
||||
tracing::info!(title = rss_item.title, "Scraping episode torrent...");
|
||||
if !episode_torrent_doppel_path.exists_any() {
|
||||
match fetch_bytes(&mikan_scrape_client, episode_torrent_url).await {
|
||||
Ok(episode_torrent_data) => {
|
||||
episode_torrent_doppel_path.write(&episode_torrent_data)?;
|
||||
tracing::info!(title = rss_item.title, "Episode torrent saved");
|
||||
}
|
||||
Err(e) => {
|
||||
if let FetchError::ReqwestError { source } = &e
|
||||
&& source
|
||||
.status()
|
||||
.is_some_and(|status| status == reqwest::StatusCode::NOT_FOUND)
|
||||
{
|
||||
tracing::warn!(
|
||||
title = rss_item.title,
|
||||
"Episode torrent not found, maybe deleted since new version"
|
||||
);
|
||||
episode_torrent_doppel_path
|
||||
.write_meta(MikanDoppelMeta { status: 404 })?;
|
||||
} else {
|
||||
Err(e)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::info!(title = rss_item.title, "Episode torrent already exists");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::info!("Scraping season subscription done");
|
||||
Ok(())
|
||||
}
|
||||
140
apps/recorder/examples/mikan_doppel_subscriber_subscription.rs
Normal file
140
apps/recorder/examples/mikan_doppel_subscriber_subscription.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use fetch::{FetchError, HttpClientConfig, fetch_bytes, fetch_html, fetch_image, reqwest};
|
||||
use recorder::{
|
||||
errors::RecorderResult,
|
||||
extract::mikan::{
|
||||
MikanClient, MikanConfig, MikanRssItem,
|
||||
extract_mikan_episode_meta_from_episode_homepage_html,
|
||||
},
|
||||
test_utils::mikan::{MikanDoppelMeta, MikanDoppelPath},
|
||||
};
|
||||
use scraper::Html;
|
||||
use tokio::fs;
|
||||
use url::Url;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> RecorderResult<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_max_level(tracing::Level::INFO)
|
||||
.init();
|
||||
std::env::set_current_dir(std::path::Path::new("apps/recorder"))?;
|
||||
|
||||
let mikan_scrape_client = MikanClient::from_config(MikanConfig {
|
||||
http_client: HttpClientConfig {
|
||||
exponential_backoff_max_retries: Some(3),
|
||||
leaky_bucket_max_tokens: Some(2),
|
||||
leaky_bucket_initial_tokens: Some(0),
|
||||
leaky_bucket_refill_tokens: Some(1),
|
||||
leaky_bucket_refill_interval: Some(Duration::from_millis(500)),
|
||||
user_agent: Some(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
|
||||
Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
|
||||
.to_string(),
|
||||
),
|
||||
..Default::default()
|
||||
},
|
||||
base_url: Url::parse("https://mikanani.me")?,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let mikan_base_url = mikan_scrape_client.base_url().clone();
|
||||
tracing::info!("Scraping subscriber subscription...");
|
||||
let subscriber_subscription =
|
||||
fs::read("tests/resources/mikan/MyBangumi-2025-spring.rss").await?;
|
||||
let channel = rss::Channel::read_from(&subscriber_subscription[..])?;
|
||||
let rss_items: Vec<MikanRssItem> = channel
|
||||
.items
|
||||
.into_iter()
|
||||
.map(MikanRssItem::try_from)
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
for rss_item in rss_items {
|
||||
let episode_homepage_meta = {
|
||||
tracing::info!(title = rss_item.title, "Scraping episode homepage...");
|
||||
let episode_homepage_url = rss_item.homepage;
|
||||
let episode_homepage_doppel_path = MikanDoppelPath::new(episode_homepage_url.clone());
|
||||
let episode_homepage_data = if !episode_homepage_doppel_path.exists_any() {
|
||||
let episode_homepage_data =
|
||||
fetch_html(&mikan_scrape_client, episode_homepage_url.clone()).await?;
|
||||
episode_homepage_doppel_path.write(&episode_homepage_data)?;
|
||||
tracing::info!(title = rss_item.title, "Episode homepage saved");
|
||||
episode_homepage_data
|
||||
} else {
|
||||
tracing::info!(title = rss_item.title, "Episode homepage already exists");
|
||||
String::from_utf8(episode_homepage_doppel_path.read()?)?
|
||||
};
|
||||
let html = Html::parse_document(&episode_homepage_data);
|
||||
extract_mikan_episode_meta_from_episode_homepage_html(
|
||||
&html,
|
||||
mikan_base_url.clone(),
|
||||
episode_homepage_url,
|
||||
)
|
||||
}?;
|
||||
|
||||
{
|
||||
let episode_torrent_url = rss_item.url;
|
||||
let episode_torrent_doppel_path = MikanDoppelPath::new(episode_torrent_url.clone());
|
||||
tracing::info!(title = rss_item.title, "Scraping episode torrent...");
|
||||
if !episode_torrent_doppel_path.exists_any() {
|
||||
match fetch_bytes(&mikan_scrape_client, episode_torrent_url).await {
|
||||
Ok(episode_torrent_data) => {
|
||||
episode_torrent_doppel_path.write(&episode_torrent_data)?;
|
||||
tracing::info!(title = rss_item.title, "Episode torrent saved");
|
||||
}
|
||||
Err(e) => {
|
||||
if let FetchError::ReqwestError { source } = &e
|
||||
&& source
|
||||
.status()
|
||||
.is_some_and(|status| status == reqwest::StatusCode::NOT_FOUND)
|
||||
{
|
||||
tracing::warn!(
|
||||
title = rss_item.title,
|
||||
"Episode torrent not found, maybe deleted since new version"
|
||||
);
|
||||
episode_torrent_doppel_path
|
||||
.write_meta(MikanDoppelMeta { status: 404 })?;
|
||||
} else {
|
||||
Err(e)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(title = rss_item.title, "Episode torrent saved");
|
||||
} else {
|
||||
tracing::info!(title = rss_item.title, "Episode torrent already exists");
|
||||
}
|
||||
}
|
||||
{
|
||||
if let Some(episode_poster_url) = episode_homepage_meta.origin_poster_src.as_ref() {
|
||||
let episode_poster_doppel_path = MikanDoppelPath::new(episode_poster_url.clone());
|
||||
tracing::info!(title = rss_item.title, "Scraping episode poster...");
|
||||
if !episode_poster_doppel_path.exists_any() {
|
||||
let episode_poster_data =
|
||||
fetch_image(&mikan_scrape_client, episode_poster_url.clone()).await?;
|
||||
episode_poster_doppel_path.write(&episode_poster_data)?;
|
||||
tracing::info!(title = rss_item.title, "Episode poster saved");
|
||||
} else {
|
||||
tracing::info!(title = rss_item.title, "Episode poster already exists");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let bangumi_homepage_url = episode_homepage_meta
|
||||
.bangumi_hash()
|
||||
.build_homepage_url(mikan_base_url.clone());
|
||||
let bangumi_homepage_doppel_path = MikanDoppelPath::new(bangumi_homepage_url.clone());
|
||||
tracing::info!(title = rss_item.title, "Scraping bangumi homepage...");
|
||||
if !bangumi_homepage_doppel_path.exists_any() {
|
||||
let bangumi_homepage_data =
|
||||
fetch_html(&mikan_scrape_client, bangumi_homepage_url).await?;
|
||||
bangumi_homepage_doppel_path.write(&bangumi_homepage_data)?;
|
||||
tracing::info!(title = rss_item.title, "Bangumi homepage saved");
|
||||
} else {
|
||||
tracing::info!(title = rss_item.title, "Bangumi homepage already exists");
|
||||
};
|
||||
}
|
||||
}
|
||||
tracing::info!("Scraping subscriber subscription done");
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user