feature: add new mikan scrapers

This commit is contained in:
2025-05-03 04:23:33 +08:00
parent dbded94324
commit 3fe0538468
36 changed files with 1001 additions and 793 deletions

View File

@@ -3,7 +3,6 @@ use std::{fmt::Debug, ops::Deref, sync::Arc};
use fetch::{HttpClient, HttpClientTrait};
use maplit::hashmap;
use sea_orm::DbErr;
use secrecy::SecretBox;
use serde::{Deserialize, Serialize};
use url::Url;
use util::OptDynErr;
@@ -23,8 +22,6 @@ pub struct MikanCredentialForm {
pub user_agent: String,
}
pub type MikanAuthSecrecy = SecretBox<MikanCredentialForm>;
impl Debug for MikanCredentialForm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("MikanCredentialForm")
@@ -72,7 +69,7 @@ impl MikanClient {
Ok(false)
} else {
Err(RecorderError::Credential3rdError {
message: format!("mikan account check has login failed, status = {}", status),
message: format!("mikan account check has login failed, status = {status}"),
source: None.into(),
})
}
@@ -189,7 +186,7 @@ impl MikanClient {
userpass_credential_opt = Some(userpass_credential);
} else {
return Err(RecorderError::from_db_record_not_found(
DbErr::RecordNotFound(format!("credential={} not found", credential_id)),
DbErr::RecordNotFound(format!("credential={credential_id} not found")),
));
}
}

View File

@@ -1,4 +1,4 @@
pub const MIKAN_BUCKET_KEY: &str = "mikan";
pub const MIKAN_POSTER_BUCKET_KEY: &str = "mikan_poster";
pub const MIKAN_UNKNOWN_FANSUB_NAME: &str = "生肉/不明字幕";
pub const MIKAN_UNKNOWN_FANSUB_ID: &str = "202";
pub const MIKAN_LOGIN_PAGE_PATH: &str = "/Account/Login";

View File

@@ -1,23 +1,31 @@
pub mod client;
pub mod config;
pub mod constants;
pub mod rss_extract;
pub mod web_extract;
mod client;
mod config;
mod constants;
mod rss;
mod web;
pub use client::{MikanClient, MikanCredentialForm};
pub use config::MikanConfig;
pub use constants::MIKAN_BUCKET_KEY;
pub use rss_extract::{
MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanBangumiRssUrlMeta,
MikanRssChannel, MikanRssItem, MikanSubscriberAggregationRssChannel,
MikanSubscriberAggregationRssUrlMeta, build_mikan_bangumi_rss_url,
build_mikan_subscriber_aggregation_rss_url, extract_mikan_bangumi_id_from_rss_url,
extract_mikan_rss_channel_from_rss_link, extract_mikan_subscriber_aggregation_id_from_rss_link,
pub use constants::{
MIKAN_ACCOUNT_MANAGE_PAGE_PATH, MIKAN_LOGIN_PAGE_PATH, MIKAN_LOGIN_PAGE_SEARCH,
MIKAN_POSTER_BUCKET_KEY, MIKAN_UNKNOWN_FANSUB_ID, MIKAN_UNKNOWN_FANSUB_NAME,
};
pub use web_extract::{
MikanBangumiMeta, MikanEpisodeMeta, MikanSeasonStr, build_mikan_bangumi_homepage_url,
build_mikan_episode_homepage_url, build_mikan_season_flow_url,
extract_mikan_bangumi_indices_meta_from_season_flow_fragment,
extract_mikan_bangumi_meta_from_bangumi_homepage,
extract_mikan_episode_meta_from_episode_homepage,
pub use rss::{
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanBangumiRssUrlMeta, MikanRssChannel,
MikanRssItem, MikanSubscriberAggregationRssUrlMeta, MikanSubscriberStreamRssChannel,
build_mikan_bangumi_rss_url, build_mikan_subscriber_aggregation_rss_url,
extract_mikan_bangumi_id_from_rss_url, extract_mikan_rss_channel_from_rss_link,
extract_mikan_subscriber_aggregation_id_from_rss_link,
};
pub use web::{
MikanBangumiHomepageUrlMeta, MikanBangumiIndexHomepageUrlMeta, MikanBangumiIndexMeta,
MikanBangumiMeta, MikanBangumiPosterMeta, MikanEpisodeHomepageUrlMeta, MikanEpisodeMeta,
MikanSeasonFlowUrlMeta, MikanSeasonStr, build_mikan_bangumi_expand_subscribed_url,
build_mikan_bangumi_homepage_url, build_mikan_episode_homepage_url,
build_mikan_season_flow_url, extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
extract_mikan_episode_meta_from_episode_homepage_html,
scrape_mikan_bangumi_meta_from_bangumi_homepage_url,
scrape_mikan_bangumi_meta_list_from_season_flow_url,
scrape_mikan_episode_meta_from_episode_homepage_url, scrape_mikan_poster_data_from_image_url,
scrape_mikan_poster_meta_from_image_url,
};

View File

@@ -10,10 +10,7 @@ use url::Url;
use crate::{
errors::app_error::{RecorderError, RecorderResult},
extract::mikan::{
MikanClient,
web_extract::{MikanEpisodeHomepage, extract_mikan_episode_id_from_homepage_url},
},
extract::mikan::{MikanClient, MikanEpisodeHomepageUrlMeta},
};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
@@ -37,7 +34,7 @@ pub struct MikanBangumiRssChannel {
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanBangumiAggregationRssChannel {
pub struct MikanBangumiIndexRssChannel {
pub name: String,
pub url: Url,
pub mikan_bangumi_id: String,
@@ -45,7 +42,7 @@ pub struct MikanBangumiAggregationRssChannel {
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanSubscriberAggregationRssChannel {
pub struct MikanSubscriberStreamRssChannel {
pub mikan_aggregation_id: String,
pub url: Url,
pub items: Vec<MikanRssItem>,
@@ -54,46 +51,40 @@ pub struct MikanSubscriberAggregationRssChannel {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum MikanRssChannel {
Bangumi(MikanBangumiRssChannel),
BangumiAggregation(MikanBangumiAggregationRssChannel),
SubscriberAggregation(MikanSubscriberAggregationRssChannel),
BangumiIndex(MikanBangumiIndexRssChannel),
SubscriberStream(MikanSubscriberStreamRssChannel),
}
impl MikanRssChannel {
pub fn items(&self) -> &[MikanRssItem] {
match &self {
Self::Bangumi(MikanBangumiRssChannel { items, .. })
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { items, .. })
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { items, .. }) => {
items
}
| Self::BangumiIndex(MikanBangumiIndexRssChannel { items, .. })
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { items, .. }) => items,
}
}
pub fn into_items(self) -> Vec<MikanRssItem> {
match self {
Self::Bangumi(MikanBangumiRssChannel { items, .. })
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { items, .. })
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { items, .. }) => {
items
}
| Self::BangumiIndex(MikanBangumiIndexRssChannel { items, .. })
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { items, .. }) => items,
}
}
pub fn name(&self) -> Option<&str> {
match &self {
Self::Bangumi(MikanBangumiRssChannel { name, .. })
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { name, .. }) => {
Some(name.as_str())
}
Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { .. }) => None,
| Self::BangumiIndex(MikanBangumiIndexRssChannel { name, .. }) => Some(name.as_str()),
Self::SubscriberStream(MikanSubscriberStreamRssChannel { .. }) => None,
}
}
pub fn url(&self) -> &Url {
match &self {
Self::Bangumi(MikanBangumiRssChannel { url, .. })
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { url, .. })
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { url, .. }) => url,
| Self::BangumiIndex(MikanBangumiIndexRssChannel { url, .. })
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { url, .. }) => url,
}
}
}
@@ -133,9 +124,9 @@ impl TryFrom<rss::Item> for MikanRssItem {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("homepage:link"))
})?;
let MikanEpisodeHomepage {
let MikanEpisodeHomepageUrlMeta {
mikan_episode_id, ..
} = extract_mikan_episode_id_from_homepage_url(&homepage).ok_or_else(|| {
} = MikanEpisodeHomepageUrlMeta::parse_url(&homepage).ok_or_else(|| {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("mikan_episode_id"))
})?;
@@ -278,17 +269,15 @@ pub async fn extract_mikan_rss_channel_from_rss_link(
channel_name,
channel_link = channel_link.as_str(),
mikan_bangumi_id,
"MikanBangumiAggregationRssChannel extracted"
"MikanBangumiIndexRssChannel extracted"
);
Ok(MikanRssChannel::BangumiAggregation(
MikanBangumiAggregationRssChannel {
name: channel_name,
mikan_bangumi_id,
url: channel_link,
items,
},
))
Ok(MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel {
name: channel_name,
mikan_bangumi_id,
url: channel_link,
items,
}))
}
} else if let Some(MikanSubscriberAggregationRssUrlMeta {
mikan_aggregation_id,
@@ -317,8 +306,8 @@ pub async fn extract_mikan_rss_channel_from_rss_link(
"MikanSubscriberAggregationRssChannel extracted"
);
Ok(MikanRssChannel::SubscriberAggregation(
MikanSubscriberAggregationRssChannel {
Ok(MikanRssChannel::SubscriberStream(
MikanSubscriberStreamRssChannel {
mikan_aggregation_id,
items,
url: channel_link,
@@ -342,7 +331,7 @@ mod tests {
use crate::{
errors::RecorderResult,
extract::mikan::{
MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanRssChannel,
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanRssChannel,
extract_mikan_rss_channel_from_rss_link,
},
test_utils::mikan::build_testing_mikan_client,
@@ -413,7 +402,7 @@ mod tests {
assert_matches!(
&channel,
MikanRssChannel::BangumiAggregation(MikanBangumiAggregationRssChannel { .. })
MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel { .. })
);
assert_matches!(&channel.name(), Some("叹气的亡灵想隐退"));