feature: add new mikan scrapers
This commit is contained in:
@@ -108,7 +108,7 @@ pub fn parse_episode_media_meta_from_torrent(
|
||||
let media_name = torrent_path
|
||||
.file_name()
|
||||
.with_whatever_context::<_, _, RecorderError>(|| {
|
||||
format!("failed to get file name of {}", torrent_path)
|
||||
format!("failed to get file name of {torrent_path}")
|
||||
})?;
|
||||
let mut match_obj = None;
|
||||
for rule in TORRENT_EP_PARSE_RULES.iter() {
|
||||
@@ -141,7 +141,7 @@ pub fn parse_episode_media_meta_from_torrent(
|
||||
.unwrap_or(1);
|
||||
let extname = torrent_path
|
||||
.extension()
|
||||
.map(|e| format!(".{}", e))
|
||||
.map(|e| format!(".{e}"))
|
||||
.unwrap_or_default();
|
||||
Ok(TorrentEpisodeMediaMeta {
|
||||
fansub: fansub.map(|s| s.to_string()),
|
||||
@@ -168,7 +168,7 @@ pub fn parse_episode_subtitle_meta_from_torrent(
|
||||
let media_name = torrent_path
|
||||
.file_name()
|
||||
.with_whatever_context::<_, _, RecorderError>(|| {
|
||||
format!("failed to get file name of {}", torrent_path)
|
||||
format!("failed to get file name of {torrent_path}")
|
||||
})?;
|
||||
|
||||
let lang = get_subtitle_lang(media_name);
|
||||
@@ -271,7 +271,7 @@ mod tests {
|
||||
pub fn test_torrent_ep_parser(raw_name: &str, expected: &str) {
|
||||
let extname = Path::new(raw_name)
|
||||
.extension()
|
||||
.map(|e| format!(".{}", e))
|
||||
.map(|e| format!(".{e}"))
|
||||
.unwrap_or_default()
|
||||
.to_lowercase();
|
||||
|
||||
|
||||
@@ -19,21 +19,19 @@ pub fn extract_background_image_src_from_style_attr(
|
||||
match prop {
|
||||
Property::BackgroundImage(images) => {
|
||||
for img in images {
|
||||
if let CSSImage::Url(path) = img {
|
||||
if let Some(url) = extract_image_src_from_str(path.url.trim(), base_url)
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
if let CSSImage::Url(path) = img
|
||||
&& let Some(url) = extract_image_src_from_str(path.url.trim(), base_url)
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
Property::Background(backgrounds) => {
|
||||
for bg in backgrounds {
|
||||
if let CSSImage::Url(path) = &bg.image {
|
||||
if let Some(url) = extract_image_src_from_str(path.url.trim(), base_url)
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
if let CSSImage::Url(path) = &bg.image
|
||||
&& let Some(url) = extract_image_src_from_str(path.url.trim(), base_url)
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use axum::http::{header, request::Parts, HeaderName, HeaderValue, Uri};
|
||||
use axum::http::{HeaderName, HeaderValue, Uri, header, request::Parts};
|
||||
use itertools::Itertools;
|
||||
use url::Url;
|
||||
|
||||
@@ -121,11 +121,7 @@ impl ForwardedRelatedInfo {
|
||||
.and_then(|s| s.to_str().ok())
|
||||
.and_then(|s| {
|
||||
let l = s.split(",").map(|s| s.trim().to_string()).collect_vec();
|
||||
if l.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(l)
|
||||
}
|
||||
if l.is_empty() { None } else { Some(l) }
|
||||
});
|
||||
|
||||
let host = headers
|
||||
@@ -165,7 +161,7 @@ impl ForwardedRelatedInfo {
|
||||
|
||||
pub fn resolved_origin(&self) -> Option<Url> {
|
||||
if let (Some(protocol), Some(host)) = (self.resolved_protocol(), self.resolved_host()) {
|
||||
let origin = format!("{}://{}", protocol, host);
|
||||
let origin = format!("{protocol}://{host}");
|
||||
Url::parse(&origin).ok()
|
||||
} else {
|
||||
None
|
||||
|
||||
@@ -3,7 +3,7 @@ use url::Url;
|
||||
pub fn extract_image_src_from_str(image_src: &str, base_url: &Url) -> Option<Url> {
|
||||
let mut image_url = base_url.join(image_src).ok()?;
|
||||
if let Some((_, value)) = image_url.query_pairs().find(|(key, _)| key == "webp") {
|
||||
image_url.set_query(Some(&format!("webp={}", value)));
|
||||
image_url.set_query(Some(&format!("webp={value}")));
|
||||
} else {
|
||||
image_url.set_query(None);
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ use std::{fmt::Debug, ops::Deref, sync::Arc};
|
||||
use fetch::{HttpClient, HttpClientTrait};
|
||||
use maplit::hashmap;
|
||||
use sea_orm::DbErr;
|
||||
use secrecy::SecretBox;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use url::Url;
|
||||
use util::OptDynErr;
|
||||
@@ -23,8 +22,6 @@ pub struct MikanCredentialForm {
|
||||
pub user_agent: String,
|
||||
}
|
||||
|
||||
pub type MikanAuthSecrecy = SecretBox<MikanCredentialForm>;
|
||||
|
||||
impl Debug for MikanCredentialForm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("MikanCredentialForm")
|
||||
@@ -72,7 +69,7 @@ impl MikanClient {
|
||||
Ok(false)
|
||||
} else {
|
||||
Err(RecorderError::Credential3rdError {
|
||||
message: format!("mikan account check has login failed, status = {}", status),
|
||||
message: format!("mikan account check has login failed, status = {status}"),
|
||||
source: None.into(),
|
||||
})
|
||||
}
|
||||
@@ -189,7 +186,7 @@ impl MikanClient {
|
||||
userpass_credential_opt = Some(userpass_credential);
|
||||
} else {
|
||||
return Err(RecorderError::from_db_record_not_found(
|
||||
DbErr::RecordNotFound(format!("credential={} not found", credential_id)),
|
||||
DbErr::RecordNotFound(format!("credential={credential_id} not found")),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
pub const MIKAN_BUCKET_KEY: &str = "mikan";
|
||||
pub const MIKAN_POSTER_BUCKET_KEY: &str = "mikan_poster";
|
||||
pub const MIKAN_UNKNOWN_FANSUB_NAME: &str = "生肉/不明字幕";
|
||||
pub const MIKAN_UNKNOWN_FANSUB_ID: &str = "202";
|
||||
pub const MIKAN_LOGIN_PAGE_PATH: &str = "/Account/Login";
|
||||
|
||||
@@ -1,23 +1,31 @@
|
||||
pub mod client;
|
||||
pub mod config;
|
||||
pub mod constants;
|
||||
pub mod rss_extract;
|
||||
pub mod web_extract;
|
||||
mod client;
|
||||
mod config;
|
||||
mod constants;
|
||||
mod rss;
|
||||
mod web;
|
||||
|
||||
pub use client::{MikanClient, MikanCredentialForm};
|
||||
pub use config::MikanConfig;
|
||||
pub use constants::MIKAN_BUCKET_KEY;
|
||||
pub use rss_extract::{
|
||||
MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanBangumiRssUrlMeta,
|
||||
MikanRssChannel, MikanRssItem, MikanSubscriberAggregationRssChannel,
|
||||
MikanSubscriberAggregationRssUrlMeta, build_mikan_bangumi_rss_url,
|
||||
build_mikan_subscriber_aggregation_rss_url, extract_mikan_bangumi_id_from_rss_url,
|
||||
extract_mikan_rss_channel_from_rss_link, extract_mikan_subscriber_aggregation_id_from_rss_link,
|
||||
pub use constants::{
|
||||
MIKAN_ACCOUNT_MANAGE_PAGE_PATH, MIKAN_LOGIN_PAGE_PATH, MIKAN_LOGIN_PAGE_SEARCH,
|
||||
MIKAN_POSTER_BUCKET_KEY, MIKAN_UNKNOWN_FANSUB_ID, MIKAN_UNKNOWN_FANSUB_NAME,
|
||||
};
|
||||
pub use web_extract::{
|
||||
MikanBangumiMeta, MikanEpisodeMeta, MikanSeasonStr, build_mikan_bangumi_homepage_url,
|
||||
build_mikan_episode_homepage_url, build_mikan_season_flow_url,
|
||||
extract_mikan_bangumi_indices_meta_from_season_flow_fragment,
|
||||
extract_mikan_bangumi_meta_from_bangumi_homepage,
|
||||
extract_mikan_episode_meta_from_episode_homepage,
|
||||
pub use rss::{
|
||||
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanBangumiRssUrlMeta, MikanRssChannel,
|
||||
MikanRssItem, MikanSubscriberAggregationRssUrlMeta, MikanSubscriberStreamRssChannel,
|
||||
build_mikan_bangumi_rss_url, build_mikan_subscriber_aggregation_rss_url,
|
||||
extract_mikan_bangumi_id_from_rss_url, extract_mikan_rss_channel_from_rss_link,
|
||||
extract_mikan_subscriber_aggregation_id_from_rss_link,
|
||||
};
|
||||
pub use web::{
|
||||
MikanBangumiHomepageUrlMeta, MikanBangumiIndexHomepageUrlMeta, MikanBangumiIndexMeta,
|
||||
MikanBangumiMeta, MikanBangumiPosterMeta, MikanEpisodeHomepageUrlMeta, MikanEpisodeMeta,
|
||||
MikanSeasonFlowUrlMeta, MikanSeasonStr, build_mikan_bangumi_expand_subscribed_url,
|
||||
build_mikan_bangumi_homepage_url, build_mikan_episode_homepage_url,
|
||||
build_mikan_season_flow_url, extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
|
||||
extract_mikan_episode_meta_from_episode_homepage_html,
|
||||
scrape_mikan_bangumi_meta_from_bangumi_homepage_url,
|
||||
scrape_mikan_bangumi_meta_list_from_season_flow_url,
|
||||
scrape_mikan_episode_meta_from_episode_homepage_url, scrape_mikan_poster_data_from_image_url,
|
||||
scrape_mikan_poster_meta_from_image_url,
|
||||
};
|
||||
|
||||
@@ -10,10 +10,7 @@ use url::Url;
|
||||
|
||||
use crate::{
|
||||
errors::app_error::{RecorderError, RecorderResult},
|
||||
extract::mikan::{
|
||||
MikanClient,
|
||||
web_extract::{MikanEpisodeHomepage, extract_mikan_episode_id_from_homepage_url},
|
||||
},
|
||||
extract::mikan::{MikanClient, MikanEpisodeHomepageUrlMeta},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
@@ -37,7 +34,7 @@ pub struct MikanBangumiRssChannel {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct MikanBangumiAggregationRssChannel {
|
||||
pub struct MikanBangumiIndexRssChannel {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
pub mikan_bangumi_id: String,
|
||||
@@ -45,7 +42,7 @@ pub struct MikanBangumiAggregationRssChannel {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct MikanSubscriberAggregationRssChannel {
|
||||
pub struct MikanSubscriberStreamRssChannel {
|
||||
pub mikan_aggregation_id: String,
|
||||
pub url: Url,
|
||||
pub items: Vec<MikanRssItem>,
|
||||
@@ -54,46 +51,40 @@ pub struct MikanSubscriberAggregationRssChannel {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum MikanRssChannel {
|
||||
Bangumi(MikanBangumiRssChannel),
|
||||
BangumiAggregation(MikanBangumiAggregationRssChannel),
|
||||
SubscriberAggregation(MikanSubscriberAggregationRssChannel),
|
||||
BangumiIndex(MikanBangumiIndexRssChannel),
|
||||
SubscriberStream(MikanSubscriberStreamRssChannel),
|
||||
}
|
||||
|
||||
impl MikanRssChannel {
|
||||
pub fn items(&self) -> &[MikanRssItem] {
|
||||
match &self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { items, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { items, .. })
|
||||
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { items, .. }) => {
|
||||
items
|
||||
}
|
||||
| Self::BangumiIndex(MikanBangumiIndexRssChannel { items, .. })
|
||||
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { items, .. }) => items,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_items(self) -> Vec<MikanRssItem> {
|
||||
match self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { items, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { items, .. })
|
||||
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { items, .. }) => {
|
||||
items
|
||||
}
|
||||
| Self::BangumiIndex(MikanBangumiIndexRssChannel { items, .. })
|
||||
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { items, .. }) => items,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self) -> Option<&str> {
|
||||
match &self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { name, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { name, .. }) => {
|
||||
Some(name.as_str())
|
||||
}
|
||||
Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { .. }) => None,
|
||||
| Self::BangumiIndex(MikanBangumiIndexRssChannel { name, .. }) => Some(name.as_str()),
|
||||
Self::SubscriberStream(MikanSubscriberStreamRssChannel { .. }) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn url(&self) -> &Url {
|
||||
match &self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { url, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { url, .. })
|
||||
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { url, .. }) => url,
|
||||
| Self::BangumiIndex(MikanBangumiIndexRssChannel { url, .. })
|
||||
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { url, .. }) => url,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -133,9 +124,9 @@ impl TryFrom<rss::Item> for MikanRssItem {
|
||||
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("homepage:link"))
|
||||
})?;
|
||||
|
||||
let MikanEpisodeHomepage {
|
||||
let MikanEpisodeHomepageUrlMeta {
|
||||
mikan_episode_id, ..
|
||||
} = extract_mikan_episode_id_from_homepage_url(&homepage).ok_or_else(|| {
|
||||
} = MikanEpisodeHomepageUrlMeta::parse_url(&homepage).ok_or_else(|| {
|
||||
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("mikan_episode_id"))
|
||||
})?;
|
||||
|
||||
@@ -278,17 +269,15 @@ pub async fn extract_mikan_rss_channel_from_rss_link(
|
||||
channel_name,
|
||||
channel_link = channel_link.as_str(),
|
||||
mikan_bangumi_id,
|
||||
"MikanBangumiAggregationRssChannel extracted"
|
||||
"MikanBangumiIndexRssChannel extracted"
|
||||
);
|
||||
|
||||
Ok(MikanRssChannel::BangumiAggregation(
|
||||
MikanBangumiAggregationRssChannel {
|
||||
name: channel_name,
|
||||
mikan_bangumi_id,
|
||||
url: channel_link,
|
||||
items,
|
||||
},
|
||||
))
|
||||
Ok(MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel {
|
||||
name: channel_name,
|
||||
mikan_bangumi_id,
|
||||
url: channel_link,
|
||||
items,
|
||||
}))
|
||||
}
|
||||
} else if let Some(MikanSubscriberAggregationRssUrlMeta {
|
||||
mikan_aggregation_id,
|
||||
@@ -317,8 +306,8 @@ pub async fn extract_mikan_rss_channel_from_rss_link(
|
||||
"MikanSubscriberAggregationRssChannel extracted"
|
||||
);
|
||||
|
||||
Ok(MikanRssChannel::SubscriberAggregation(
|
||||
MikanSubscriberAggregationRssChannel {
|
||||
Ok(MikanRssChannel::SubscriberStream(
|
||||
MikanSubscriberStreamRssChannel {
|
||||
mikan_aggregation_id,
|
||||
items,
|
||||
url: channel_link,
|
||||
@@ -342,7 +331,7 @@ mod tests {
|
||||
use crate::{
|
||||
errors::RecorderResult,
|
||||
extract::mikan::{
|
||||
MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanRssChannel,
|
||||
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanRssChannel,
|
||||
extract_mikan_rss_channel_from_rss_link,
|
||||
},
|
||||
test_utils::mikan::build_testing_mikan_client,
|
||||
@@ -413,7 +402,7 @@ mod tests {
|
||||
|
||||
assert_matches!(
|
||||
&channel,
|
||||
MikanRssChannel::BangumiAggregation(MikanBangumiAggregationRssChannel { .. })
|
||||
MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel { .. })
|
||||
);
|
||||
|
||||
assert_matches!(&channel.name(), Some("叹气的亡灵想隐退"));
|
||||
File diff suppressed because it is too large
Load Diff
@@ -101,19 +101,19 @@ fn title_body_pre_process(title_body: &str, fansub: Option<&str>) -> RecorderRes
|
||||
raw = sub.replace_all(&raw, "").to_string();
|
||||
}
|
||||
}
|
||||
if let Some(m) = MAIN_TITLE_PRE_PROCESS_BACKETS_RE.find(&raw) {
|
||||
if m.len() as f32 > (raw.len() as f32) * 0.5 {
|
||||
let mut raw1 = MAIN_TITLE_PRE_PROCESS_BACKETS_RE_SUB1
|
||||
.replace(&raw, "")
|
||||
.chars()
|
||||
.collect_vec();
|
||||
while let Some(ch) = raw1.pop() {
|
||||
if ch == ']' {
|
||||
break;
|
||||
}
|
||||
if let Some(m) = MAIN_TITLE_PRE_PROCESS_BACKETS_RE.find(&raw)
|
||||
&& m.len() as f32 > (raw.len() as f32) * 0.5
|
||||
{
|
||||
let mut raw1 = MAIN_TITLE_PRE_PROCESS_BACKETS_RE_SUB1
|
||||
.replace(&raw, "")
|
||||
.chars()
|
||||
.collect_vec();
|
||||
while let Some(ch) = raw1.pop() {
|
||||
if ch == ']' {
|
||||
break;
|
||||
}
|
||||
raw = raw1.into_iter().collect();
|
||||
}
|
||||
raw = raw1.into_iter().collect();
|
||||
}
|
||||
Ok(raw.to_string())
|
||||
}
|
||||
@@ -136,23 +136,21 @@ pub fn extract_season_from_title_body(title_body: &str) -> (String, Option<Strin
|
||||
|
||||
for s in seasons {
|
||||
season_raw = Some(s);
|
||||
if let Some(m) = SEASON_EXTRACT_SEASON_EN_PREFIX_RE.find(s) {
|
||||
if let Ok(s) = SEASON_EXTRACT_SEASON_ALL_RE
|
||||
if let Some(m) = SEASON_EXTRACT_SEASON_EN_PREFIX_RE.find(s)
|
||||
&& let Ok(s) = SEASON_EXTRACT_SEASON_ALL_RE
|
||||
.replace_all(m.as_str(), "")
|
||||
.parse::<i32>()
|
||||
{
|
||||
season = s;
|
||||
break;
|
||||
}
|
||||
{
|
||||
season = s;
|
||||
break;
|
||||
}
|
||||
if let Some(m) = SEASON_EXTRACT_SEASON_EN_NTH_RE.find(s) {
|
||||
if let Some(s) = DIGIT_1PLUS_REG
|
||||
if let Some(m) = SEASON_EXTRACT_SEASON_EN_NTH_RE.find(s)
|
||||
&& let Some(s) = DIGIT_1PLUS_REG
|
||||
.find(m.as_str())
|
||||
.and_then(|s| s.as_str().parse::<i32>().ok())
|
||||
{
|
||||
season = s;
|
||||
break;
|
||||
}
|
||||
{
|
||||
season = s;
|
||||
break;
|
||||
}
|
||||
if let Some(m) = SEASON_EXTRACT_SEASON_ZH_PREFIX_RE.find(s) {
|
||||
if let Ok(s) = SEASON_EXTRACT_SEASON_ZH_PREFIX_SUB_RE
|
||||
|
||||
Reference in New Issue
Block a user