feature: add new mikan scrapers
This commit is contained in:
@@ -3,7 +3,6 @@ use std::{fmt::Debug, ops::Deref, sync::Arc};
|
||||
use fetch::{HttpClient, HttpClientTrait};
|
||||
use maplit::hashmap;
|
||||
use sea_orm::DbErr;
|
||||
use secrecy::SecretBox;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use url::Url;
|
||||
use util::OptDynErr;
|
||||
@@ -23,8 +22,6 @@ pub struct MikanCredentialForm {
|
||||
pub user_agent: String,
|
||||
}
|
||||
|
||||
pub type MikanAuthSecrecy = SecretBox<MikanCredentialForm>;
|
||||
|
||||
impl Debug for MikanCredentialForm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("MikanCredentialForm")
|
||||
@@ -72,7 +69,7 @@ impl MikanClient {
|
||||
Ok(false)
|
||||
} else {
|
||||
Err(RecorderError::Credential3rdError {
|
||||
message: format!("mikan account check has login failed, status = {}", status),
|
||||
message: format!("mikan account check has login failed, status = {status}"),
|
||||
source: None.into(),
|
||||
})
|
||||
}
|
||||
@@ -189,7 +186,7 @@ impl MikanClient {
|
||||
userpass_credential_opt = Some(userpass_credential);
|
||||
} else {
|
||||
return Err(RecorderError::from_db_record_not_found(
|
||||
DbErr::RecordNotFound(format!("credential={} not found", credential_id)),
|
||||
DbErr::RecordNotFound(format!("credential={credential_id} not found")),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
pub const MIKAN_BUCKET_KEY: &str = "mikan";
|
||||
pub const MIKAN_POSTER_BUCKET_KEY: &str = "mikan_poster";
|
||||
pub const MIKAN_UNKNOWN_FANSUB_NAME: &str = "生肉/不明字幕";
|
||||
pub const MIKAN_UNKNOWN_FANSUB_ID: &str = "202";
|
||||
pub const MIKAN_LOGIN_PAGE_PATH: &str = "/Account/Login";
|
||||
|
||||
@@ -1,23 +1,31 @@
|
||||
pub mod client;
|
||||
pub mod config;
|
||||
pub mod constants;
|
||||
pub mod rss_extract;
|
||||
pub mod web_extract;
|
||||
mod client;
|
||||
mod config;
|
||||
mod constants;
|
||||
mod rss;
|
||||
mod web;
|
||||
|
||||
pub use client::{MikanClient, MikanCredentialForm};
|
||||
pub use config::MikanConfig;
|
||||
pub use constants::MIKAN_BUCKET_KEY;
|
||||
pub use rss_extract::{
|
||||
MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanBangumiRssUrlMeta,
|
||||
MikanRssChannel, MikanRssItem, MikanSubscriberAggregationRssChannel,
|
||||
MikanSubscriberAggregationRssUrlMeta, build_mikan_bangumi_rss_url,
|
||||
build_mikan_subscriber_aggregation_rss_url, extract_mikan_bangumi_id_from_rss_url,
|
||||
extract_mikan_rss_channel_from_rss_link, extract_mikan_subscriber_aggregation_id_from_rss_link,
|
||||
pub use constants::{
|
||||
MIKAN_ACCOUNT_MANAGE_PAGE_PATH, MIKAN_LOGIN_PAGE_PATH, MIKAN_LOGIN_PAGE_SEARCH,
|
||||
MIKAN_POSTER_BUCKET_KEY, MIKAN_UNKNOWN_FANSUB_ID, MIKAN_UNKNOWN_FANSUB_NAME,
|
||||
};
|
||||
pub use web_extract::{
|
||||
MikanBangumiMeta, MikanEpisodeMeta, MikanSeasonStr, build_mikan_bangumi_homepage_url,
|
||||
build_mikan_episode_homepage_url, build_mikan_season_flow_url,
|
||||
extract_mikan_bangumi_indices_meta_from_season_flow_fragment,
|
||||
extract_mikan_bangumi_meta_from_bangumi_homepage,
|
||||
extract_mikan_episode_meta_from_episode_homepage,
|
||||
pub use rss::{
|
||||
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanBangumiRssUrlMeta, MikanRssChannel,
|
||||
MikanRssItem, MikanSubscriberAggregationRssUrlMeta, MikanSubscriberStreamRssChannel,
|
||||
build_mikan_bangumi_rss_url, build_mikan_subscriber_aggregation_rss_url,
|
||||
extract_mikan_bangumi_id_from_rss_url, extract_mikan_rss_channel_from_rss_link,
|
||||
extract_mikan_subscriber_aggregation_id_from_rss_link,
|
||||
};
|
||||
pub use web::{
|
||||
MikanBangumiHomepageUrlMeta, MikanBangumiIndexHomepageUrlMeta, MikanBangumiIndexMeta,
|
||||
MikanBangumiMeta, MikanBangumiPosterMeta, MikanEpisodeHomepageUrlMeta, MikanEpisodeMeta,
|
||||
MikanSeasonFlowUrlMeta, MikanSeasonStr, build_mikan_bangumi_expand_subscribed_url,
|
||||
build_mikan_bangumi_homepage_url, build_mikan_episode_homepage_url,
|
||||
build_mikan_season_flow_url, extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
|
||||
extract_mikan_episode_meta_from_episode_homepage_html,
|
||||
scrape_mikan_bangumi_meta_from_bangumi_homepage_url,
|
||||
scrape_mikan_bangumi_meta_list_from_season_flow_url,
|
||||
scrape_mikan_episode_meta_from_episode_homepage_url, scrape_mikan_poster_data_from_image_url,
|
||||
scrape_mikan_poster_meta_from_image_url,
|
||||
};
|
||||
|
||||
@@ -10,10 +10,7 @@ use url::Url;
|
||||
|
||||
use crate::{
|
||||
errors::app_error::{RecorderError, RecorderResult},
|
||||
extract::mikan::{
|
||||
MikanClient,
|
||||
web_extract::{MikanEpisodeHomepage, extract_mikan_episode_id_from_homepage_url},
|
||||
},
|
||||
extract::mikan::{MikanClient, MikanEpisodeHomepageUrlMeta},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
@@ -37,7 +34,7 @@ pub struct MikanBangumiRssChannel {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct MikanBangumiAggregationRssChannel {
|
||||
pub struct MikanBangumiIndexRssChannel {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
pub mikan_bangumi_id: String,
|
||||
@@ -45,7 +42,7 @@ pub struct MikanBangumiAggregationRssChannel {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct MikanSubscriberAggregationRssChannel {
|
||||
pub struct MikanSubscriberStreamRssChannel {
|
||||
pub mikan_aggregation_id: String,
|
||||
pub url: Url,
|
||||
pub items: Vec<MikanRssItem>,
|
||||
@@ -54,46 +51,40 @@ pub struct MikanSubscriberAggregationRssChannel {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum MikanRssChannel {
|
||||
Bangumi(MikanBangumiRssChannel),
|
||||
BangumiAggregation(MikanBangumiAggregationRssChannel),
|
||||
SubscriberAggregation(MikanSubscriberAggregationRssChannel),
|
||||
BangumiIndex(MikanBangumiIndexRssChannel),
|
||||
SubscriberStream(MikanSubscriberStreamRssChannel),
|
||||
}
|
||||
|
||||
impl MikanRssChannel {
|
||||
pub fn items(&self) -> &[MikanRssItem] {
|
||||
match &self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { items, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { items, .. })
|
||||
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { items, .. }) => {
|
||||
items
|
||||
}
|
||||
| Self::BangumiIndex(MikanBangumiIndexRssChannel { items, .. })
|
||||
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { items, .. }) => items,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_items(self) -> Vec<MikanRssItem> {
|
||||
match self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { items, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { items, .. })
|
||||
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { items, .. }) => {
|
||||
items
|
||||
}
|
||||
| Self::BangumiIndex(MikanBangumiIndexRssChannel { items, .. })
|
||||
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { items, .. }) => items,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self) -> Option<&str> {
|
||||
match &self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { name, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { name, .. }) => {
|
||||
Some(name.as_str())
|
||||
}
|
||||
Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { .. }) => None,
|
||||
| Self::BangumiIndex(MikanBangumiIndexRssChannel { name, .. }) => Some(name.as_str()),
|
||||
Self::SubscriberStream(MikanSubscriberStreamRssChannel { .. }) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn url(&self) -> &Url {
|
||||
match &self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { url, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { url, .. })
|
||||
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { url, .. }) => url,
|
||||
| Self::BangumiIndex(MikanBangumiIndexRssChannel { url, .. })
|
||||
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { url, .. }) => url,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -133,9 +124,9 @@ impl TryFrom<rss::Item> for MikanRssItem {
|
||||
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("homepage:link"))
|
||||
})?;
|
||||
|
||||
let MikanEpisodeHomepage {
|
||||
let MikanEpisodeHomepageUrlMeta {
|
||||
mikan_episode_id, ..
|
||||
} = extract_mikan_episode_id_from_homepage_url(&homepage).ok_or_else(|| {
|
||||
} = MikanEpisodeHomepageUrlMeta::parse_url(&homepage).ok_or_else(|| {
|
||||
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("mikan_episode_id"))
|
||||
})?;
|
||||
|
||||
@@ -278,17 +269,15 @@ pub async fn extract_mikan_rss_channel_from_rss_link(
|
||||
channel_name,
|
||||
channel_link = channel_link.as_str(),
|
||||
mikan_bangumi_id,
|
||||
"MikanBangumiAggregationRssChannel extracted"
|
||||
"MikanBangumiIndexRssChannel extracted"
|
||||
);
|
||||
|
||||
Ok(MikanRssChannel::BangumiAggregation(
|
||||
MikanBangumiAggregationRssChannel {
|
||||
name: channel_name,
|
||||
mikan_bangumi_id,
|
||||
url: channel_link,
|
||||
items,
|
||||
},
|
||||
))
|
||||
Ok(MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel {
|
||||
name: channel_name,
|
||||
mikan_bangumi_id,
|
||||
url: channel_link,
|
||||
items,
|
||||
}))
|
||||
}
|
||||
} else if let Some(MikanSubscriberAggregationRssUrlMeta {
|
||||
mikan_aggregation_id,
|
||||
@@ -317,8 +306,8 @@ pub async fn extract_mikan_rss_channel_from_rss_link(
|
||||
"MikanSubscriberAggregationRssChannel extracted"
|
||||
);
|
||||
|
||||
Ok(MikanRssChannel::SubscriberAggregation(
|
||||
MikanSubscriberAggregationRssChannel {
|
||||
Ok(MikanRssChannel::SubscriberStream(
|
||||
MikanSubscriberStreamRssChannel {
|
||||
mikan_aggregation_id,
|
||||
items,
|
||||
url: channel_link,
|
||||
@@ -342,7 +331,7 @@ mod tests {
|
||||
use crate::{
|
||||
errors::RecorderResult,
|
||||
extract::mikan::{
|
||||
MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanRssChannel,
|
||||
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanRssChannel,
|
||||
extract_mikan_rss_channel_from_rss_link,
|
||||
},
|
||||
test_utils::mikan::build_testing_mikan_client,
|
||||
@@ -413,7 +402,7 @@ mod tests {
|
||||
|
||||
assert_matches!(
|
||||
&channel,
|
||||
MikanRssChannel::BangumiAggregation(MikanBangumiAggregationRssChannel { .. })
|
||||
MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel { .. })
|
||||
);
|
||||
|
||||
assert_matches!(&channel.name(), Some("叹气的亡灵想隐退"));
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user