diff --git a/crates/recorder/examples/playground.rs b/crates/recorder/examples/playground.rs index 431b976..272c295 100644 --- a/crates/recorder/examples/playground.rs +++ b/crates/recorder/examples/playground.rs @@ -19,10 +19,10 @@ use recorder::{ use sea_orm_migration::MigratorTrait; async fn pull_mikan_bangumi_rss(ctx: &AppContext) -> eyre::Result<()> { - // let rss_link = "https://mikanani.me/RSS/Bangumi?bangumiId=3416&subgroupid=370"; + let rss_link = "https://mikanani.me/RSS/Bangumi?bangumiId=3416&subgroupid=370"; - let rss_link = - "https://mikanani.me/RSS/MyBangumi?token=FE9tccsML2nBPUUqpCuJW2uJZydAXCntHJ7RpD9LDP8%3d"; + // let rss_link = + // "https://mikanani.me/RSS/MyBangumi?token=FE9tccsML2nBPUUqpCuJW2uJZydAXCntHJ7RpD9LDP8%3d"; let subscription = if let Some(subscription) = subscriptions::Entity::find() .filter(subscriptions::Column::SourceUrl.eq(String::from(rss_link))) .one(&ctx.db) @@ -49,7 +49,7 @@ async fn pull_mikan_bangumi_rss(ctx: &AppContext) -> eyre::Result<()> { async fn init() -> eyre::Result { tracing_subscriber::fmt() - .with_max_level(tracing::Level::DEBUG) + .with_max_level(tracing::Level::INFO) .with_test_writer() .init(); let ctx = loco_rs::cli::playground::().await?; diff --git a/crates/recorder/src/extract/rawname/parser.rs b/crates/recorder/src/extract/rawname/parser.rs index f49c586..1fa8d26 100644 --- a/crates/recorder/src/extract/rawname/parser.rs +++ b/crates/recorder/src/extract/rawname/parser.rs @@ -11,8 +11,9 @@ const NAME_EXTRACT_REPLACE_ADHOC1_REPLACED: &str = "$1/$2"; lazy_static! { static ref TITLE_RE: Regex = Regex::new( - r#"(.*|\[.*])( -? \d+|\[\d+]|\[\d+.?[vV]\d]|第\d+[话話集]|\[第?\d+[话話集]]|\[\d+.?END]|[Ee][Pp]?\d+)(.*)"# + r#"(.*|\[.*])( -? \d+|\[\d+]|\[\d+.?[vV]\d]|第\d+[话話集]|\[第?\d+[话話集]]|\[\d+.?END]|[Ee][Pp]?\d+|\[\s*\d+\s*[\-\~]\s*\d+\s*\p{scx=Han}*[话話集]\s*])(.*)"# ).unwrap(); + static ref EP_COLLECTION_RE:Regex = Regex::new(r#"\[?\s*\d+\s*[\-\~]\s*\d+\s*\p{scx=Han}*合?[话話集]\s*]?"#).unwrap(); static ref MOVIE_TITLE_RE:Regex = Regex::new(r#"(.*|\[.*])(剧场版|[Mm]ovie|电影)(.*?)$"#).unwrap(); static ref RESOLUTION_RE: Regex = Regex::new(r"1080|720|2160|4K|2K").unwrap(); static ref SOURCE_L1_RE: Regex = Regex::new(r"B-Global|[Bb]aha|[Bb]ilibili|AT-X|W[Ee][Bb][Rr][Ii][Pp]|Sentai|B[Dd][Rr][Ii][Pp]|UHD[Rr][Ii][Pp]|NETFLIX").unwrap(); @@ -41,7 +42,7 @@ lazy_static! { static ref CLEAR_SUB_RE: Regex = Regex::new(r"_MP4|_MKV").unwrap(); } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] pub struct RawEpisodeMeta { pub name_en: Option, pub name_en_no_season: Option, @@ -252,14 +253,14 @@ fn extract_tags_from_title_extra( } pub fn check_is_movie(title: &str) -> bool { - MOVIE_SEASON_EXTRACT_RE.is_match(title) + MOVIE_TITLE_RE.is_match(title) } pub fn parse_episode_meta_from_raw_name(s: &str) -> eyre::Result { let raw_title = s.trim(); let raw_title_without_ch_brackets = replace_ch_bracket_to_en(raw_title); let fansub = extract_fansub(&raw_title_without_ch_brackets); - let is_movie = check_is_movie(&raw_title_without_ch_brackets); + let movie_capture = check_is_movie(&raw_title_without_ch_brackets); if let Some(title_re_match_obj) = MOVIE_TITLE_RE .captures(&raw_title_without_ch_brackets) .or(TITLE_RE.captures(&raw_title_without_ch_brackets)) @@ -278,9 +279,11 @@ pub fn parse_episode_meta_from_raw_name(s: &str) -> eyre::Result .map(|s| s.as_str().trim()) .unwrap_or_else(|| unreachable!("TITLE_RE has at least 3 capture groups")); - if is_movie { + if movie_capture { title_body += title_episode; title_episode = ""; + } else if EP_COLLECTION_RE.is_match(&title_episode) { + title_episode = ""; } let title_body = title_body_pre_process(&title_body, fansub)?; @@ -306,16 +309,20 @@ pub fn parse_episode_meta_from_raw_name(s: &str) -> eyre::Result resolution, }) } else { - Err(eyre::eyre!("Can not parse episode meta from raw filename")) + Err(eyre::eyre!( + "Can not parse episode meta from raw filename {}", + raw_title + )) } } #[cfg(test)] mod tests { + use super::{parse_episode_meta_from_raw_name, RawEpisodeMeta}; fn test_raw_ep_parser_case(raw_name: &str, expected: &str) { - let expected: Option = serde_json::from_str(expected).unwrap(); + let expected: Option = serde_json::from_str(expected).unwrap_or_default(); let found = parse_episode_meta_from_raw_name(raw_name).ok(); if expected != found { @@ -727,6 +734,78 @@ mod tests { ) } + #[test] + fn test_ep_collections() { + test_raw_ep_parser_case( + r#"[奶²&LoliHouse] 蘑菇狗 / Kinokoinu: Mushroom Pup [01-12 精校合集][WebRip 1080p HEVC-10bit AAC][简日内封字幕]"#, + r#"{ + "name_en": "Kinokoinu: Mushroom Pup", + "name_en_no_season": "Kinokoinu: Mushroom Pup", + "name_zh": "蘑菇狗", + "name_zh_no_season": "蘑菇狗", + "season": 1, + "episode_index": 1, + "subtitle": "简日内封字幕", + "source": "WebRip", + "fansub": "奶²&LoliHouse", + "resolution": "1080p", + "name": " 蘑菇狗 / Kinokoinu: Mushroom Pup [01-12 精校合集]" + }"#, + ); + + test_raw_ep_parser_case( + r#"[LoliHouse] 叹气的亡灵想隐退 / Nageki no Bourei wa Intai shitai [01-13 合集][WebRip 1080p HEVC-10bit AAC][简繁内封字幕][Fin]"#, + r#"{ + "name_en": "Nageki no Bourei wa Intai shitai", + "name_en_no_season": "Nageki no Bourei wa Intai shitai", + "name_jp": null, + "name_jp_no_season": null, + "name_zh": "叹气的亡灵想隐退", + "name_zh_no_season": "叹气的亡灵想隐退", + "season": 1, + "season_raw": null, + "episode_index": 1, + "subtitle": "简繁内封字幕", + "source": "WebRip", + "fansub": "LoliHouse", + "resolution": "1080p" + }"#, + ); + + test_raw_ep_parser_case( + r#"[LoliHouse] 精灵幻想记 第二季 / Seirei Gensouki S2 [01-12 合集][WebRip 1080p HEVC-10bit AAC][简繁内封字幕][Fin]"#, + r#"{ + "name_en": "Seirei Gensouki S2", + "name_en_no_season": "Seirei Gensouki", + "name_zh": "精灵幻想记 第二季", + "name_zh_no_season": "精灵幻想记", + "season": 2, + "season_raw": "第二季", + "episode_index": 1, + "subtitle": "简繁内封字幕", + "source": "WebRip", + "fansub": "LoliHouse", + "resolution": "1080p" + }"#, + ); + + test_raw_ep_parser_case( + r#"[喵萌奶茶屋&LoliHouse] 超自然武装当哒当 / 胆大党 / Dandadan [01-12 精校合集][WebRip 1080p HEVC-10bit AAC][简繁日内封字幕][Fin]"#, + r#" { + "name_en": "Dandadan", + "name_en_no_season": "Dandadan", + "name_zh": "超自然武装当哒当", + "name_zh_no_season": "超自然武装当哒当", + "season": 1, + "episode_index": 1, + "subtitle": "简繁日内封字幕", + "source": "WebRip", + "fansub": "喵萌奶茶屋&LoliHouse", + "resolution": "1080p" + }"#, + ); + } + // TODO: FIXME #[test] fn test_bad_cases() { diff --git a/crates/recorder/src/models/episodes.rs b/crates/recorder/src/models/episodes.rs index 08ed798..cd94364 100644 --- a/crates/recorder/src/models/episodes.rs +++ b/crates/recorder/src/models/episodes.rs @@ -27,7 +27,13 @@ impl Model { let db = &ctx.db; let new_episode_active_modes = creations .into_iter() - .flat_map(|cr| ActiveModel::from_mikan_episode_meta(ctx, cr)); + .map(|cr| ActiveModel::from_mikan_episode_meta(ctx, cr)) + .inspect(|result| { + if let Err(e) = result { + tracing::warn!("Failed to create episode: {:?}", e); + } + }) + .flatten(); Entity::insert_many(new_episode_active_modes) .on_conflict( @@ -50,7 +56,12 @@ impl ActiveModel { ) -> eyre::Result { let item = creation.episode; let bgm = creation.bangumi; - let raw_meta = parse_episode_meta_from_raw_name(&item.episode_title)?; + let raw_meta = parse_episode_meta_from_raw_name(&item.episode_title) + .inspect_err(|e| { + tracing::warn!("Failed to parse episode meta: {:?}", e); + }) + .ok() + .unwrap_or_default(); let homepage = build_mikan_episode_homepage( ctx.get_mikan_client().base_url(), &item.mikan_episode_id,