diff --git a/Cargo.lock b/Cargo.lock index 9b4915c..664aa05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -933,6 +933,16 @@ dependencies = [ "walkdir", ] +[[package]] +name = "calendrical_calculations" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f6df87e869fb08be61c7e97ced8e69ab802df1d8bc612ed67dba78c07fbc12c" +dependencies = [ + "core_maths", + "displaydoc", +] + [[package]] name = "cc" version = "1.2.26" @@ -1280,6 +1290,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core_maths" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77745e017f5edba1a9c1d854f6f3a52dac8a12dd5af5d2f54aecf61e43d80d30" +dependencies = [ + "libm", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -2082,6 +2101,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fixed_decimal" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35943d22b2f19c0cb198ecf915910a8158e94541c89dcc63300d7799d46c2c5e" +dependencies = [ + "displaydoc", + "smallvec", + "writeable", +] + [[package]] name = "flate2" version = "1.1.2" @@ -2901,6 +2931,103 @@ dependencies = [ "cc", ] +[[package]] +name = "icu" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab13fe39da5da564b88228e9f08815c9d0efbe9ec244e72b149d9994e10f1054" +dependencies = [ + "icu_calendar", + "icu_casemap", + "icu_collator", + "icu_collections", + "icu_datetime", + "icu_decimal", + "icu_experimental", + "icu_list", + "icu_locale", + "icu_normalizer", + "icu_pattern", + "icu_plurals", + "icu_properties", + "icu_provider", + "icu_segmenter", + "icu_time", +] + +[[package]] +name = "icu_calendar" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7a6ed1ea995a24dff839bc5ca4471ce2fa18ba14d8b09061c2527a46a1c6079" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "ixdtf", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_calendar_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7219c8639ab936713a87b571eed2bc2615aa9137e8af6eb221446ee5644acc18" + +[[package]] +name = "icu_casemap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dc5e74b3c9d7b63e0d7c5fd54ee8c135705df2ea2aa558082dd555dc9747a97" +dependencies = [ + "displaydoc", + "icu_casemap_data", + "icu_collections", + "icu_locale_core", + "icu_properties", + "icu_provider", + "potential_utf", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_casemap_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7584067558ab4c60c95d1ac2abd1588689cb4bcd4e099507f62dae86ae8d2c0" + +[[package]] +name = "icu_collator" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ad4c6a556938dfd31f75a8c54141079e8821dc697ffb799cfe0f0fa11f2edc" +dependencies = [ + "displaydoc", + "icu_collator_data", + "icu_collections", + "icu_locale", + "icu_locale_core", + "icu_normalizer", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_collator_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d880b8e680799eabd90c054e1b95526cd48db16c95269f3c89fb3117e1ac92c5" + [[package]] name = "icu_collections" version = "2.0.0" @@ -2914,6 +3041,139 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_datetime" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0790c15e3d6ae3303365fa2337b4f6469de257916141110d14dcaf73f1d31ac5" +dependencies = [ + "displaydoc", + "either", + "fixed_decimal", + "icu_calendar", + "icu_datetime_data", + "icu_decimal", + "icu_locale", + "icu_locale_core", + "icu_pattern", + "icu_plurals", + "icu_provider", + "icu_time", + "potential_utf", + "smallvec", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_datetime_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83791ac10bb7b774f130bb81fa89c4059de710dcef53caa0b86e645212d6d54c" + +[[package]] +name = "icu_decimal" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fec61c43fdc4e368a9f450272833123a8ef0d7083a44597660ce94d791b8a2e2" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_decimal_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "serde", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_decimal_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b70963bc35f9bdf1bc66a5c1f458f4991c1dc71760e00fa06016b2c76b2738d5" + +[[package]] +name = "icu_experimental" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe3d7e64892a434b08d5a58b53127e47a095ff780305f563c8c01798a1051b0" +dependencies = [ + "displaydoc", + "either", + "fixed_decimal", + "icu_casemap", + "icu_collections", + "icu_decimal", + "icu_experimental_data", + "icu_list", + "icu_locale", + "icu_locale_core", + "icu_normalizer", + "icu_pattern", + "icu_plurals", + "icu_properties", + "icu_provider", + "litemap", + "num-bigint", + "num-rational 0.4.2", + "num-traits", + "potential_utf", + "smallvec", + "tinystr", + "writeable", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_experimental_data" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b60d32ba5610adfc2083f5a759f55d9a9082ebf72750f126cb1630844eea1acf" + +[[package]] +name = "icu_list" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e26f94ec776bb8b28cedc7dcf91033b822c5cb4c1783cf7a3f796fc168aa0c8b" +dependencies = [ + "displaydoc", + "icu_list_data", + "icu_locale", + "icu_provider", + "regex-automata 0.4.9", + "serde", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_list_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a456a2412458ca45e181d9d51c5090ef8cd90f5692e11d34bafab3b3be1c76b" + +[[package]] +name = "icu_locale" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ae5921528335e91da1b6c695dbf1ec37df5ac13faa3f91e5640be93aa2fbefd" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_locale_data", + "icu_provider", + "potential_utf", + "tinystr", + "zerovec", +] + [[package]] name = "icu_locale_core" version = "2.0.0" @@ -2927,6 +3187,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_locale_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fdef0c124749d06a743c69e938350816554eb63ac979166590e2b4ee4252765" + [[package]] name = "icu_normalizer" version = "2.0.0" @@ -2939,6 +3205,9 @@ dependencies = [ "icu_properties", "icu_provider", "smallvec", + "utf16_iter", + "utf8_iter", + "write16", "zerovec", ] @@ -2948,6 +3217,39 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +[[package]] +name = "icu_pattern" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "983825f401e6bc4a13c45d552ffd9ad6f3f6b6bc0ec03f31d6835a90a46deb1f" +dependencies = [ + "displaydoc", + "either", + "writeable", + "yoke", + "zerovec", +] + +[[package]] +name = "icu_plurals" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd83a65f58b6f28e1f3da8c6ada6b415ee3ad5cb480b75bdb669f34d72dd179" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_locale", + "icu_plurals_data", + "icu_provider", + "zerovec", +] + +[[package]] +name = "icu_plurals_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ec552d761eaf4a1c39ad28936e0af77a41bf01ff756ea54be4f8bfc21c265d7" + [[package]] name = "icu_properties" version = "2.0.1" @@ -2987,6 +3289,56 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_segmenter" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e185fc13b6401c138cf40db12b863b35f5edf31b88192a545857b41aeaf7d3d3" +dependencies = [ + "core_maths", + "displaydoc", + "icu_collections", + "icu_locale", + "icu_locale_core", + "icu_provider", + "icu_segmenter_data", + "potential_utf", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_segmenter_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5360a2fbe97f617c4f8b944356dedb36d423f7da7f13c070995cf89e59f01220" + +[[package]] +name = "icu_time" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10d01a4a2dcbc5e5180ef113920e7461d0e9caaddb3567d81c4eca262efe55c0" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar", + "icu_locale_core", + "icu_provider", + "icu_time_data", + "ixdtf", + "serde", + "tinystr", + "writeable", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_time_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8472be4410d26a03d7208cae3a76c798dd6766e8226ab977cd8b2d349a6dbf08" + [[package]] name = "ident_case" version = "1.0.1" @@ -3219,6 +3571,15 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "ixdtf" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8289f7f711a1a51f80e2e368355d023042ca55d8d554fd5e953f01464c15842d" +dependencies = [ + "displaydoc", +] + [[package]] name = "jobserver" version = "0.1.33" @@ -4154,6 +4515,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "num" version = "0.2.1" @@ -4163,7 +4533,7 @@ dependencies = [ "num-complex", "num-integer", "num-iter", - "num-rational", + "num-rational 0.2.4", "num-traits", ] @@ -4241,6 +4611,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -4869,6 +5250,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" dependencies = [ + "serde", "zerovec", ] @@ -5252,6 +5634,8 @@ dependencies = [ "futures", "html-escape", "http", + "icu", + "icu_properties", "inquire", "insta", "ipnetwork", @@ -5265,6 +5649,8 @@ dependencies = [ "mockito", "moka", "nanoid", + "nom", + "num-traits", "once_cell", "opendal", "openidconnect", @@ -5296,6 +5682,7 @@ dependencies = [ "tracing", "tracing-appender", "tracing-subscriber", + "tracing-tree", "typed-builder 0.21.0", "url", "util", @@ -7456,7 +7843,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", - "nu-ansi-term", + "nu-ansi-term 0.46.0", "once_cell", "regex", "serde", @@ -7470,6 +7857,18 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "tracing-tree" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f459ca79f1b0d5f71c54ddfde6debfc59c8b6eeb46808ae492077f739dc7b49c" +dependencies = [ + "nu-ansi-term 0.50.1", + "tracing-core", + "tracing-log", + "tracing-subscriber", +] + [[package]] name = "try-lock" version = "0.2.5" @@ -7711,6 +8110,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8-width" version = "0.1.7" @@ -8296,11 +8701,20 @@ dependencies = [ "bitflags 2.9.1", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + [[package]] name = "writeable" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +dependencies = [ + "either", +] [[package]] name = "wyz" diff --git a/apps/recorder/Cargo.toml b/apps/recorder/Cargo.toml index 8e847b4..e1366d6 100644 --- a/apps/recorder/Cargo.toml +++ b/apps/recorder/Cargo.toml @@ -129,6 +129,11 @@ nanoid = "0.4.0" jwtk = "0.4.0" percent-encoding = "2.3.1" mime_guess = "2.0.5" +nom = "8.0.0" +icu_properties = "2.0.1" +icu = "2.0.0" +num-traits = "0.2.19" +tracing-tree = "0.4.0" [dev-dependencies] diff --git a/apps/recorder/src/extract/mikan/subscription.rs b/apps/recorder/src/extract/mikan/subscription.rs index 1351f3f..8884d72 100644 --- a/apps/recorder/src/extract/mikan/subscription.rs +++ b/apps/recorder/src/extract/mikan/subscription.rs @@ -5,6 +5,7 @@ use std::{ }; use async_graphql::{InputObject, SimpleObject}; +use async_stream::try_stream; use fetch::fetch_bytes; use futures::{Stream, TryStreamExt, pin_mut, try_join}; use maplit::hashmap; @@ -292,17 +293,19 @@ impl SubscriptionTrait for MikanSeasonSubscription { } async fn sync_feeds_incremental(&self, ctx: Arc) -> RecorderResult<()> { - let rss_item_list = self - .get_rss_item_list_from_subsribed_url_rss_link(ctx.as_ref()) - .await?; + let rss_item_stream = self.get_rss_item_stream_from_subsribed_url_rss_link(ctx.as_ref()); - sync_mikan_feeds_from_rss_item_list( - ctx.as_ref(), - rss_item_list, - self.get_subscriber_id(), - self.get_subscription_id(), - ) - .await?; + pin_mut!(rss_item_stream); + + while let Some(rss_item_chunk_list) = rss_item_stream.try_next().await? { + sync_mikan_feeds_from_rss_item_list( + ctx.as_ref(), + rss_item_chunk_list, + self.get_subscriber_id(), + self.get_subscription_id(), + ) + .await?; + } Ok(()) } @@ -393,48 +396,53 @@ impl MikanSeasonSubscription { ) } - #[tracing::instrument(err, skip(ctx))] - async fn get_rss_item_list_from_subsribed_url_rss_link( + fn get_rss_item_stream_from_subsribed_url_rss_link( &self, ctx: &dyn AppContextTrait, - ) -> RecorderResult> { - let db = ctx.db(); + ) -> impl Stream>> { + try_stream! { - let subscribed_bangumi_list = bangumi::Entity::find() - .filter( - Condition::all() - .add(subscription_bangumi::Column::SubscriptionId.eq(self.subscription_id)), - ) - .join_rev( - JoinType::InnerJoin, - subscription_bangumi::Relation::Bangumi.def(), - ) - .all(db) - .await?; + let db = ctx.db(); - let mut rss_item_list = vec![]; - for subscribed_bangumi in subscribed_bangumi_list { - let rss_url = subscribed_bangumi - .rss_link - .with_whatever_context::<_, String, RecorderError>(|| { - format!( - "rss_link is required, subscription_id = {}, bangumi_name = {}", - self.subscription_id, subscribed_bangumi.display_name - ) - })?; - let bytes = fetch_bytes(ctx.mikan(), rss_url).await?; + let subscribed_bangumi_list = bangumi::Entity::find() + .filter( + Condition::all() + .add(subscription_bangumi::Column::SubscriptionId.eq(self.subscription_id)), + ) + .join_rev( + JoinType::InnerJoin, + subscription_bangumi::Relation::Bangumi.def(), + ) + .all(db) + .await?; - let channel = rss::Channel::read_from(&bytes[..])?; - for (idx, item) in channel.items.into_iter().enumerate() { - let item = MikanRssEpisodeItem::try_from(item) - .with_whatever_context::<_, String, RecorderError>(|_| { - format!("failed to extract rss item at idx {idx}") + for subscribed_bangumi in subscribed_bangumi_list { + let rss_url = subscribed_bangumi + .rss_link + .with_whatever_context::<_, String, RecorderError>(|| { + format!( + "rss_link is required, subscription_id = {}, bangumi_name = {}", + self.subscription_id, subscribed_bangumi.display_name + ) })?; - rss_item_list.push(item); + let bytes = fetch_bytes(ctx.mikan(), rss_url).await?; + + let channel = rss::Channel::read_from(&bytes[..])?; + + let mut rss_item_list = vec![]; + + for (idx, item) in channel.items.into_iter().enumerate() { + let item = MikanRssEpisodeItem::try_from(item) + .with_whatever_context::<_, String, RecorderError>(|_| { + format!("failed to extract rss item at idx {idx}") + })?; + rss_item_list.push(item); + } + + yield rss_item_list; } } - Ok(rss_item_list) } } diff --git a/apps/recorder/src/extract/origin/mod.rs b/apps/recorder/src/extract/origin/mod.rs index 6a56d82..b0739dd 100644 --- a/apps/recorder/src/extract/origin/mod.rs +++ b/apps/recorder/src/extract/origin/mod.rs @@ -1,5 +1,1479 @@ -pub mod parser; +use std::borrow::Cow; -pub use parser::{ - RawEpisodeMeta, extract_episode_meta_from_origin_name, extract_season_from_title_body, +use itertools::Itertools; +use lazy_static::lazy_static; +use nom::{ + IResult, Parser, + branch::alt, + bytes::complete::{is_a, tag, tag_no_case, take_till}, + character::complete::{anychar, char as chartag, none_of, space0, space1}, + combinator::{map, opt, recognize, value, verify}, + multi::{many_m_n, many_till, many0, many1}, + number::complete::float, + sequence::{delimited, preceded, terminated}, }; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use tracing::{Level, instrument}; + +use crate::utils::nom::{ + ZhNum, delimited_by_brackets, is_han_scx, parse_int, parse_month_num, parse_uint, + with_recognized, +}; + +const BARKET_ALL: &str = "[【((]))】"; +lazy_static! { + static ref NAME_CLEAR_RE: Regex = + Regex::new(r"[\[\]【】][ ]?[\[\]【】]?|[ ][\[\]【】]?").unwrap(); +} + +pub trait OriginCompTrait<'a>: Sized { + fn parse_comp(input: &'a str) -> IResult<&'a str, Self>; + fn into_source_string(self) -> String; + fn as_source_str(&self) -> &str; +} + +pub type EpisodeNum = i32; + +pub struct EpisodeComp<'a> { + pub source: Cow<'a, str>, + pub num: EpisodeNum, + pub num2: Option, +} + +impl<'a> EpisodeComp<'a> { + fn parse_ep_round_num(input: &'a str) -> IResult<&'a str, i32> { + let (input, num) = float(input)?; + Ok((input, f32::round(num) as i32)) + } + + fn parse_ep_num(input: &'a str) -> IResult<&'a str, i32> { + alt((parse_int::, Self::parse_ep_round_num, ZhNum::parse_int)).parse(input) + } + + fn parse_ep_nums_core(input: &'a str) -> IResult<&'a str, (i32, Option)> { + delimited( + space0, + ( + delimited(space0, Self::parse_ep_num, space0), + opt(preceded( + is_a("-~"), + delimited(space0, Self::parse_ep_num, space0), + )), + ), + (opt((tag_no_case("v"), parse_uint::)), space0), + ) + .parse(input) + } + + fn parse_with_ep_prefix(input: &'a str) -> IResult<&'a str, (i32, Option)> { + preceded(tag_no_case("ep"), Self::parse_ep_nums_core).parse(input) + } + + fn parse_with_zh_suffix(input: &'a str) -> IResult<&'a str, (i32, Option)> { + delimited( + opt(tag("第")), + Self::parse_ep_nums_core, + alt((tag("话"), tag("集"), tag("話"))), + ) + .parse(input) + } + + fn parse_with_collection_suffix(input: &'a str) -> IResult<&'a str, (i32, Option)> { + let collection_zh = |input| -> IResult<&str, &str> { + recognize(many_till(is_han_scx, tag("合集"))).parse(input) + }; + + let collection_en = tag_no_case("end"); + + terminated( + Self::parse_ep_nums_core, + alt((collection_zh, collection_en)), + ) + .parse(input) + } + + fn parse_with_delimited_buckets(input: &'a str) -> IResult<&'a str, (i32, Option)> { + delimited( + is_a("[【"), + delimited( + space0, + alt(( + Self::parse_with_ep_prefix, + Self::parse_with_zh_suffix, + Self::parse_with_collection_suffix, + Self::parse_ep_nums_core, + )), + space0, + ), + is_a("】]"), + ) + .parse(input) + } + + fn parse_with_prefix_hyphen(input: &'a str) -> IResult<&'a str, (i32, Option)> { + preceded( + delimited(space0, is_a("-"), space1), + alt(( + Self::parse_with_ep_prefix, + Self::parse_with_zh_suffix, + Self::parse_with_collection_suffix, + Self::parse_ep_nums_core, + )), + ) + .parse(input) + } +} + +impl<'a> std::fmt::Debug for EpisodeComp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for EpisodeComp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "EpisodeComp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, ((num, num2), source)) = with_recognized(alt(( + Self::parse_with_delimited_buckets, + Self::parse_with_prefix_hyphen, + Self::parse_with_ep_prefix, + Self::parse_with_zh_suffix, + Self::parse_with_collection_suffix, + ))) + .parse(input)?; + + Ok(( + input, + Self { + source: source.into(), + num, + num2, + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct MoiveComp<'a> { + pub source: Cow<'a, str>, +} + +impl<'a> std::fmt::Debug for MoiveComp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for MoiveComp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "MoiveComp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, source) = + alt((tag("剧场版"), tag("电影"), tag_no_case("movie"))).parse(input)?; + Ok(( + input, + Self { + source: source.into(), + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct FansubComp<'a> { + pub source: Cow<'a, str>, +} + +impl<'a> std::fmt::Debug for FansubComp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for FansubComp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "FansubComp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, source) = delimited(space0, delimited_by_brackets, space0).parse(input)?; + + Ok(( + input, + Self { + source: source.into(), + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub type SeasonNum = i32; + +pub struct SeasonComp<'a> { + pub source: Cow<'a, str>, + pub num: SeasonNum, +} + +impl<'a> SeasonComp<'a> { + fn parse_season_round_num(input: &'a str) -> IResult<&'a str, SeasonNum> { + let (input, num) = float(input)?; + + Ok((input, f32::round(num) as i32)) + } + + fn parse_season_num(input: &'a str) -> IResult<&'a str, SeasonNum> { + alt(( + parse_uint::, + Self::parse_season_round_num, + ZhNum::parse_int, + )) + .parse(input) + } + + fn parse_season_prefix(input: &'a str) -> IResult<&'a str, SeasonNum> { + preceded( + alt((tag("S"), tag_no_case("season"))), + Self::parse_season_num, + ) + .parse(input) + } + + fn parse_en123_ordinial(input: &'a str) -> IResult<&'a str, SeasonNum> { + alt(( + value(1, tag_no_case("1st")), + value(2, tag_no_case("2nd")), + value(3, tag_no_case("3rd")), + )) + .parse(input) + } + + fn parse_en4plus_ordinial(input: &'a str) -> IResult<&'a str, SeasonNum> { + terminated(Self::parse_season_num, tag_no_case("th")).parse(input) + } + + fn parse_zh_pattern(input: &'a str) -> IResult<&'a str, SeasonNum> { + delimited( + opt(tag("第")), + Self::parse_season_num, + alt((tag("季"), tag("期"), tag("部分"))), + ) + .parse(input) + } +} + +impl<'a> std::fmt::Debug for SeasonComp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for SeasonComp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "SeasonComp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, (num, source)) = with_recognized(alt(( + Self::parse_season_prefix, + Self::parse_en123_ordinial, + Self::parse_en4plus_ordinial, + Self::parse_zh_pattern, + ))) + .parse(input)?; + + Ok(( + input, + Self { + source: source.into(), + num, + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct ResolutionComp<'a> { + pub source: Cow<'a, str>, + pub keyword: Cow<'a, str>, +} + +impl<'a> std::fmt::Debug for ResolutionComp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for ResolutionComp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "ResolutionComp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, ((_, keyword), source)) = with_recognized(terminated( + many_till( + anychar, + alt(( + tag("720"), + tag("1080"), + tag("1440"), + tag("2160"), + tag("3840"), + tag_no_case("2K"), + tag_no_case("4K"), + tag_no_case("8K"), + )), + ), + many0(anychar), + )) + .parse(input)?; + Ok(( + input, + Self { + source: source.into(), + keyword: keyword.into(), + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct SubtitleComp<'a> { + pub source: Cow<'a, str>, + pub keyword: Cow<'a, str>, +} + +impl<'a> std::fmt::Debug for SubtitleComp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for SubtitleComp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "SubtitleComp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, ((_, keyword), source)) = verify( + with_recognized(terminated( + many_till( + anychar, + alt(( + tag_no_case("ch"), + tag_no_case("big5"), + tag_no_case("gb"), + tag("简"), + tag("繁"), + tag("日"), + tag("字幕"), + tag("内封"), + tag("翻译"), + tag("中字"), + tag("英字"), + tag("生"), + )), + ), + many0(anychar), + )), + |(_, s)| !s.contains("招人") && !s.contains("招募"), + ) + .parse(input)?; + + Ok(( + input, + Self { + source: source.into(), + keyword: keyword.into(), + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct SourceL1Comp<'a> { + pub source: Cow<'a, str>, + pub keyword: Cow<'a, str>, +} + +impl<'a> std::fmt::Debug for SourceL1Comp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for SourceL1Comp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "SourceL1Comp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, ((_, keyword), source)) = with_recognized(terminated( + many_till( + anychar, + alt(( + tag_no_case("b-global"), + tag_no_case("baha"), + tag_no_case("bilibili"), + tag_no_case("at-x"), + tag_no_case("webrip"), + tag_no_case("sentai"), + tag_no_case("bdrip"), + tag_no_case("uhdrip"), + tag_no_case("netflix"), + )), + ), + many0(anychar), + )) + .parse(input)?; + Ok(( + input, + Self { + source: source.into(), + keyword: keyword.into(), + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct SourceL2Comp<'a> { + pub source: Cow<'a, str>, + pub keyword: Cow<'a, str>, +} + +impl<'a> std::fmt::Debug for SourceL2Comp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for SourceL2Comp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "SourceL2Comp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, ((_, keyword), source)) = with_recognized(terminated( + many_till( + anychar, + alt((tag("AMZ"), tag("CR"), tag_no_case("web"), tag_no_case("bd"))), + ), + many0(anychar), + )) + .parse(input)?; + Ok(( + input, + Self { + source: source.into(), + keyword: keyword.into(), + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct RegionLimitComp<'a> { + pub source: Cow<'a, str>, + pub keyword: Cow<'a, str>, +} + +impl<'a> std::fmt::Debug for RegionLimitComp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for RegionLimitComp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "RegionLimitComp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, ((_, keyword), source)) = with_recognized(terminated( + many_till( + verify(none_of(BARKET_ALL), |c| !c.is_whitespace()), + recognize(( + tag("仅限"), + many_m_n(1, 3, is_a("港澳台")), + opt(tag("地区")), + )), + ), + take_till(|c: char| c.is_whitespace() || BARKET_ALL.contains(c)), + )) + .parse(input)?; + Ok(( + input, + Self { + source: source.into(), + keyword: keyword.into(), + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct SeasonDescComp<'a> { + pub source: Cow<'a, str>, + pub keyword: Cow<'a, str>, +} + +impl<'a> SeasonDescComp<'a> { + fn parse_core_month(input: &str) -> IResult<&str, &str> { + recognize((parse_month_num, chartag('月'))).parse(input) + } + + fn parse_core_keyword(input: &str) -> IResult<&str, &str> { + recognize((is_a("春夏秋冬"), opt(chartag('季')))).parse(input) + } + + fn parse_core(input: &str) -> IResult<&str, &str> { + alt((Self::parse_core_month, Self::parse_core_keyword)).parse(input) + } + + fn parse_with_suffix(input: &str) -> IResult<&str, &str> { + recognize((Self::parse_core, space0, opt(chartag('新')), chartag('番'))).parse(input) + } + + fn parse_with_backets(input: &str) -> IResult<&str, &str> { + delimited( + is_a("[【(("), + terminated( + map( + many_till( + none_of(BARKET_ALL), + alt((Self::parse_with_suffix, Self::parse_core)), + ), + |(_, v)| v, + ), + take_till(|c: char| BARKET_ALL.contains(c)), + ), + is_a("]))】"), + ) + .parse(input) + } + + #[instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "SeasonDescComp::parse_without_backets")] + fn parse_without_backets(input: &str) -> IResult<&str, &str> { + terminated( + map( + many_till( + verify(none_of(BARKET_ALL), |c| !c.is_whitespace()), + alt((Self::parse_with_suffix, Self::parse_core)), + ), + |(_, v)| v, + ), + take_till(|c: char| c.is_whitespace() || BARKET_ALL.contains(c)), + ) + .parse(input) + } +} + +impl<'a> std::fmt::Debug for SeasonDescComp<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for SeasonDescComp<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "SeasonDescComp::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (input, (keyword, source)) = + with_recognized(alt((Self::parse_with_backets, Self::parse_without_backets))) + .parse(input)?; + Ok(( + input, + Self { + source: source.into(), + keyword: keyword.into(), + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} +pub struct BangumiComps<'a> { + pub source: Cow<'a, str>, + pub season_desc: Option>, + pub region_limit: Option>, + pub season: Option>, + pub name: Cow<'a, str>, +} + +impl<'a> std::fmt::Debug for BangumiComps<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for BangumiComps<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "BangumiComps::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (main, (season_desc, region_limit)) = ( + opt(SeasonDescComp::parse_comp), + opt(RegionLimitComp::parse_comp), + ) + .parse(input)?; + + let season = many_till(anychar, SeasonComp::parse_comp) + .parse(main) + .ok() + .map(|(_, (_, season))| season); + + let name = NAME_CLEAR_RE.replace_all(main, " ").trim().to_string(); + + Ok(( + "", + Self { + source: input.into(), + season, + name: name.into(), + season_desc, + region_limit, + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct ExtraComps<'a> { + pub source: Cow<'a, str>, + pub resolution: Option>, + pub sub: Option>, + pub source_l1: Option>, + pub source_l2: Option>, + pub region_limit: Option>, +} + +impl<'a> std::fmt::Debug for ExtraComps<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for ExtraComps<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "ExtraComps::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let splitted = input + .split(['[', ']', '【', '】', '(', ')', '(', ')', '_', ' ']) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .collect_vec(); + + let mut sub: Option = None; + let mut resolution: Option = None; + let mut source_l1: Option = None; + let mut source_l2: Option = None; + let mut region_limit: Option = None; + + for elem in splitted.iter() { + if let Ok((_, s)) = SubtitleComp::parse_comp(elem) { + sub = Some(s); + } else if let Ok((_, s)) = ResolutionComp::parse_comp(elem) { + resolution = Some(s); + } else if let Ok((_, s)) = SourceL1Comp::parse_comp(elem) { + source_l1 = Some(s); + } else if let Ok((_, s)) = RegionLimitComp::parse_comp(elem) { + region_limit = Some(s); + } + } + if source_l1.is_none() { + for element in splitted.iter() { + if let Ok((_, s)) = SourceL2Comp::parse_comp(element) { + source_l2 = Some(s); + } + } + } + + Ok(( + input, + Self { + source: input.into(), + resolution, + sub, + source_l1, + source_l2, + region_limit, + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub struct OriginNameEpisode<'a> { + pub source: Cow<'a, str>, + pub fansub: Option>, + pub bangumi: BangumiComps<'a>, + pub episode: EpisodeComp<'a>, + pub extras: ExtraComps<'a>, +} + +impl<'a> std::fmt::Debug for OriginNameEpisode<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for OriginNameEpisode<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "OriginEpisode::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (fansub_remain, fansub) = opt(FansubComp::parse_comp).parse(input)?; + + let (extra_input, (bangumi_input, episode)) = + map(many_till(anychar, EpisodeComp::parse_comp), |(pre, ep)| { + ( + &fansub_remain[..pre.into_iter().collect::().len()], + ep, + ) + }) + .parse(fansub_remain)?; + + let (_, bangumi) = BangumiComps::parse_comp(bangumi_input)?; + let (_, extras) = ExtraComps::parse_comp(extra_input)?; + + Ok(( + input, + Self { + source: input.into(), + fansub, + bangumi, + episode, + extras, + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +impl<'a> From> for OriginNameMeta { + fn from(val: OriginNameEpisode<'a>) -> Self { + OriginNameMeta { + name: val.bangumi.name.into(), + season: val.bangumi.season.as_ref().map_or(1, |s| s.num), + season_raw: val.bangumi.season.map(|s| s.into_source_string()), + episode_index: val.episode.num, + subtitle: val.extras.sub.map(|s| s.into_source_string()), + source: val + .extras + .source_l1 + .map(|s| s.into_source_string()) + .or(val.extras.source_l2.map(|s| s.into_source_string())), + fansub: val.fansub.map(|s| s.into_source_string()), + resolution: val.extras.resolution.map(|s| s.into_source_string()), + } + } +} + +pub struct OriginNameMovie<'a> { + pub source: Cow<'a, str>, + pub fansub: Option>, + pub movie: MoiveComp<'a>, + pub bangumi: BangumiComps<'a>, + pub extras: ExtraComps<'a>, +} + +impl<'a> From> for OriginNameMeta { + fn from(val: OriginNameMovie<'a>) -> Self { + OriginNameMeta { + name: val.bangumi.name.into(), + season: val.bangumi.season.as_ref().map_or(1, |s| s.num), + season_raw: val.bangumi.season.map(|s| s.into_source_string()), + episode_index: 1, + subtitle: val.extras.sub.map(|s| s.into_source_string()), + source: val + .extras + .source_l1 + .map(|s| s.into_source_string()) + .or(val.extras.source_l2.map(|s| s.into_source_string())), + fansub: val.fansub.map(|s| s.into_source_string()), + resolution: val.extras.resolution.map(|s| s.into_source_string()), + } + } +} + +impl<'a> std::fmt::Debug for OriginNameMovie<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for OriginNameMovie<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "OriginMovie::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + let (fansub_remain, fansub) = opt(FansubComp::parse_comp).parse(input)?; + + let (extra_input, (mut movies, bangumi_input)) = with_recognized(many1(map( + many_till(anychar, MoiveComp::parse_comp), + |(_, movie)| movie, + ))) + .parse(fansub_remain)?; + + let (_, extras) = ExtraComps::parse_comp(extra_input)?; + + let (_, bangumi) = BangumiComps::parse_comp(bangumi_input)?; + + Ok(( + input, + Self { + source: input.into(), + fansub, + bangumi, + movie: movies.pop().unwrap(), + extras, + }, + )) + } + + fn into_source_string(self) -> String { + self.source.into() + } + + fn as_source_str(&self) -> &str { + self.source.as_ref() + } +} + +pub enum OriginNameRoot<'a> { + Movie(OriginNameMovie<'a>), + Episode(OriginNameEpisode<'a>), +} + +impl<'a> OriginNameRoot<'a> { + fn parse_movie(input: &'a str) -> IResult<&'a str, Self> { + let (input, movie) = OriginNameMovie::parse_comp(input)?; + Ok((input, Self::Movie(movie))) + } + + fn parse_episode(input: &'a str) -> IResult<&'a str, Self> { + let (input, episode) = OriginNameEpisode::parse_comp(input)?; + Ok((input, Self::Episode(episode))) + } + + pub fn into_meta(self) -> OriginNameMeta { + OriginNameMeta::from(self) + } +} + +impl<'a> std::fmt::Debug for OriginNameRoot<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_source_str()) + } +} + +impl<'a> OriginCompTrait<'a> for OriginNameRoot<'a> { + #[cfg_attr(debug_assertions, instrument(level = Level::TRACE, ret, err(level=Level::TRACE), "OriginData::parse_comp"))] + fn parse_comp(input: &'a str) -> IResult<&'a str, Self> { + alt((Self::parse_movie, Self::parse_episode)).parse(input) + } + + fn into_source_string(self) -> String { + match self { + Self::Movie(m) => m.into_source_string(), + Self::Episode(e) => e.into_source_string(), + } + } + + fn as_source_str(&self) -> &str { + match self { + Self::Movie(m) => m.as_source_str(), + Self::Episode(e) => e.as_source_str(), + } + } +} + +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct OriginNameMeta { + pub name: String, + pub season: i32, + pub season_raw: Option, + pub episode_index: i32, + pub subtitle: Option, + pub source: Option, + pub fansub: Option, + pub resolution: Option, +} + +impl<'a> From> for OriginNameMeta { + fn from(val: OriginNameRoot<'a>) -> Self { + match val { + OriginNameRoot::Movie(m) => m.into(), + OriginNameRoot::Episode(e) => e.into(), + } + } +} + +#[cfg(test)] +#[allow(unused_variables)] +mod tests { + use rstest::{fixture, rstest}; + + use crate::{ + errors::{RecorderError, RecorderResult}, + extract::origin::{OriginCompTrait, OriginNameMeta, OriginNameRoot}, + }; + + fn test_parse_origin_data(origin_name: &str, expected: &str) -> RecorderResult<()> { + let (_, data) = + OriginNameRoot::parse_comp(origin_name).map_err(|e| RecorderError::Whatever { + message: e.to_string(), + source: None.into(), + })?; + let found: OriginNameMeta = data.into(); + let expected: OriginNameMeta = serde_json::from_str(expected).inspect_err(|e| { + tracing::error!( + "Failed to parse expected: {}, but got found: {}", + e, + serde_json::to_string_pretty(&found).unwrap() + ); + })?; + + if expected != found { + println!( + "expected {} and found {} are not equal", + serde_json::to_string_pretty(&expected).unwrap(), + serde_json::to_string_pretty(&found).unwrap() + ) + } + assert_eq!(expected, found); + + Ok(()) + } + + #[fixture] + fn before_each() { + // use crate::test_utils::tracing::try_init_testing_tracing_only_leaf; + // try_init_testing_tracing_only_leaf(tracing::Level::TRACE); + } + + #[rstest] + #[test] + fn test_parse_ep_with_all_parts_wrapped(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[新Sub][1月新番][我心里危险的东西 第二季][05][HEVC][10Bit][1080P][简日双语][招募翻译]"#, + r#"{ + "name": "我心里危险的东西 第二季", + "season": 2, + "season_raw": "第二季", + "episode_index": 5, + "subtitle": "简日双语", + "source": null, + "fansub": "新Sub", + "resolution": "1080P" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_title_wrapped_by_one_square_bracket_and_season_prefix( + before_each: (), + ) -> RecorderResult<()> { + test_parse_origin_data( + r#"【喵萌奶茶屋】★01月新番★[我内心的糟糕念头 / Boku no Kokoro no Yabai Yatsu][18][1080p][简日双语][招募翻译]"#, + r#"{ + "name": "我内心的糟糕念头 / Boku no Kokoro no Yabai Yatsu", + "season": 1, + "season_raw": null, + "episode_index": 18, + "subtitle": "简日双语", + "source": null, + "fansub": "喵萌奶茶屋", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_ep_and_version(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[LoliHouse] 因为不是真正的伙伴而被逐出勇者队伍,流落到边境展开慢活人生 2nd / Shin no Nakama 2nd - 08v2 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕]"#, + r#"{ + "name": "因为不是真正的伙伴而被逐出勇者队伍,流落到边境展开慢活人生 2nd / Shin no Nakama 2nd", + "season": 2, + "season_raw": "2nd", + "episode_index": 8, + "subtitle": "简繁内封字幕", + "source": "WebRip", + "fansub": "LoliHouse", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_en_title_only(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r"[动漫国字幕组&LoliHouse] THE MARGINAL SERVICE - 08 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕]", + r#"{ + "name": "THE MARGINAL SERVICE", + "season": 1, + "episode_index": 8, + "subtitle": "简繁内封字幕", + "source": "WebRip", + "fansub": "动漫国字幕组&LoliHouse", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_two_zh_title(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[LoliHouse] 事与愿违的不死冒险者 / 非自愿的不死冒险者 / Nozomanu Fushi no Boukensha - 01 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕]"#, + r#"{ + "name": "事与愿违的不死冒险者 / 非自愿的不死冒险者 / Nozomanu Fushi no Boukensha", + "season": 1, + "episode_index": 1, + "subtitle": "简繁内封字幕", + "source": "WebRip", + "fansub": "LoliHouse", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_en_zh_jp_titles(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[喵萌奶茶屋&LoliHouse] 碰之道 / ぽんのみち / Pon no Michi - 07 [WebRip 1080p HEVC-10bit AAC][简繁日内封字幕]"#, + r#"{ + "name": "碰之道 / ぽんのみち / Pon no Michi", + "season": 1, + "episode_index": 7, + "subtitle": "简繁日内封字幕", + "source": "WebRip", + "fansub": "喵萌奶茶屋&LoliHouse", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_nth_season(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[ANi] Yowai Character Tomozakikun / 弱角友崎同学 2nd STAGE - 09 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#, + r#"{ + "name": "Yowai Character Tomozakikun / 弱角友崎同学 2nd STAGE", + "season": 2, + "season_raw": "2nd", + "episode_index": 9, + "subtitle": "CHT", + "source": "Baha", + "fansub": "ANi", + "resolution": "1080P" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_season_en_and_season_zh(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[豌豆字幕组&LoliHouse] 王者天下 第五季 / Kingdom S5 - 07 [WebRip 1080p HEVC-10bit AAC][简繁外挂字幕]"#, + r#"{ + "name": "王者天下 第五季 / Kingdom S5", + "season": 5, + "season_raw": "第五季", + "episode_index": 7, + "subtitle": "简繁外挂字幕", + "source": "WebRip", + "fansub": "豌豆字幕组&LoliHouse", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_airota_fansub_style_case1(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"【千夏字幕组】【爱丽丝与特蕾丝的虚幻工厂_Alice to Therese no Maboroshi Koujou】[剧场版][WebRip_1080p_HEVC][简繁内封][招募新人]"#, + r#"{ + "name": "爱丽丝与特蕾丝的虚幻工厂_Alice to Therese no Maboroshi Koujou 剧场版", + "season": 1, + "episode_index": 1, + "subtitle": "简繁内封", + "source": "WebRip", + "fansub": "千夏字幕组", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_airota_fansub_style_case2(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[千夏字幕组&喵萌奶茶屋][电影 轻旅轻营 (摇曳露营) _Yuru Camp Movie][剧场版][UHDRip_2160p_HEVC][繁体][千夏15周年]"#, + r#"{ + "name": "电影 轻旅轻营 (摇曳露营) _Yuru Camp Movie 剧场版", + "season": 1, + "episode_index": 1, + "subtitle": "繁体", + "source": "UHDRip", + "fansub": "千夏字幕组&喵萌奶茶屋", + "resolution": "2160p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_large_episode_style(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]"#, + r#"{ + "name": "New Doraemon 哆啦A梦新番", + "season": 1, + "episode_index": 747, + "subtitle": "GB", + "fansub": "梦蓝字幕组", + "resolution": "1080P" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_many_square_brackets_split_title(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"【MCE汉化组】[剧场版-摇曳露营][Yuru Camp][Movie][简日双语][1080P][x264 AAC]"#, + r#"{ + "name": "剧场版-摇曳露营 Yuru Camp Movie", + "season": 1, + "episode_index": 1, + "subtitle": "简日双语", + "fansub": "MCE汉化组", + "resolution": "1080P" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_implicit_lang_title_sep(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[织梦字幕组][尼尔:机械纪元 NieR Automata Ver1.1a][02集][1080P][AVC][简日双语]"#, + r#"{ + "name": "尼尔:机械纪元 NieR Automata Ver1.1a", + "season": 1, + "episode_index": 2, + "subtitle": "简日双语", + "fansub": "织梦字幕组", + "resolution": "1080P" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_square_brackets_wrapped_and_space_split( + before_each: (), + ) -> RecorderResult<()> { + test_parse_origin_data( + r#"[天月搬运组][迷宫饭 Delicious in Dungeon][03][日语中字][MKV][1080P][NETFLIX][高画质版]"#, + r#" + { + "name": "迷宫饭 Delicious in Dungeon", + "season": 1, + "episode_index": 3, + "subtitle": "日语中字", + "source": "NETFLIX", + "fansub": "天月搬运组", + "resolution": "1080P" + } + "#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_start_with_brackets_wrapped_season_info_prefix( + before_each: (), + ) -> RecorderResult<()> { + test_parse_origin_data( + r#"[爱恋字幕社][1月新番][迷宫饭][Dungeon Meshi][01][1080P][MP4][简日双语] "#, + r#"{ + "name": "迷宫饭 Dungeon Meshi", + "season": 1, + "episode_index": 1, + "subtitle": "简日双语", + "fansub": "爱恋字幕社", + "resolution": "1080P" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_small_no_title_extra_brackets_case( + before_each: (), + ) -> RecorderResult<()> { + test_parse_origin_data( + r#"[ANi] Mahou Shoujo ni Akogarete / 梦想成为魔法少女 [年龄限制版] - 09 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#, + r#"{ + "name": "Mahou Shoujo ni Akogarete / 梦想成为魔法少女 年龄限制版", + "season": 1, + "episode_index": 9, + "subtitle": "CHT", + "source": "Baha", + "fansub": "ANi", + "resolution": "1080P" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_title_leading_space_style(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[ANi] 16bit 的感动 ANOTHER LAYER - 01 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#, + r#"{ + "name": "16bit 的感动 ANOTHER LAYER", + "season": 1, + "episode_index": 1, + "subtitle": "CHT", + "source": "Baha", + "fansub": "ANi", + "resolution": "1080P" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_title_leading_month_and_wrapped_brackets_style( + before_each: (), + ) -> RecorderResult<()> { + test_parse_origin_data( + r#"【喵萌奶茶屋】★07月新番★[银砂糖师与黑妖精 ~ Sugar Apple Fairy Tale ~][13][1080p][简日双语][招募翻译]"#, + r#"{ + "name": "银砂糖师与黑妖精 ~ Sugar Apple Fairy Tale ~", + "season": 1, + "episode_index": 13, + "subtitle": "简日双语", + "fansub": "喵萌奶茶屋", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_title_leading_month_style(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"【极影字幕社】★4月新番 天国大魔境 Tengoku Daimakyou 第05话 GB 720P MP4(字幕社招人内详)"#, + r#"{ + "name": "天国大魔境 Tengoku Daimakyou", + "season": 1, + "episode_index": 5, + "subtitle": "GB", + "fansub": "极影字幕社", + "resolution": "720P" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_tokusatsu_style(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[MagicStar] 假面骑士Geats / 仮面ライダーギーツ EP33 [WEBDL] [1080p] [TTFC]【生】"#, + r#"{ + "name": "假面骑士Geats / 仮面ライダーギーツ", + "season": 1, + "episode_index": 33, + "source": "WEBDL", + "subtitle": "生", + "fansub": "MagicStar", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_parse_ep_with_multi_lang_zh_title(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[百冬练习组&LoliHouse] BanG Dream! 少女乐团派对!☆PICO FEVER! / Garupa Pico: Fever! - 26 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][END] [101.69 MB]"#, + r#"{ + "name": "BanG Dream! 少女乐团派对!☆PICO FEVER! / Garupa Pico: Fever!", + "season": 1, + "episode_index": 26, + "subtitle": "简繁内封字幕", + "source": "WebRip", + "fansub": "百冬练习组&LoliHouse", + "resolution": "1080p" + }"#, + ) + } + + #[rstest] + #[test] + fn test_ep_collections(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[奶²&LoliHouse] 蘑菇狗 / Kinokoinu: Mushroom Pup [01-12 精校合集][WebRip 1080p HEVC-10bit AAC][简日内封字幕]"#, + r#"{ + "name": "蘑菇狗 / Kinokoinu: Mushroom Pup", + "season": 1, + "episode_index": 1, + "subtitle": "简日内封字幕", + "source": "WebRip", + "fansub": "奶²&LoliHouse", + "resolution": "1080p" + }"#, + )?; + + test_parse_origin_data( + r#"[LoliHouse] 叹气的亡灵想隐退 / Nageki no Bourei wa Intai shitai [01-13 合集][WebRip 1080p HEVC-10bit AAC][简繁内封字幕][Fin]"#, + r#"{ + "name": "叹气的亡灵想隐退 / Nageki no Bourei wa Intai shitai", + "season": 1, + "episode_index": 1, + "subtitle": "简繁内封字幕", + "source": "WebRip", + "fansub": "LoliHouse", + "resolution": "1080p" + }"#, + )?; + + test_parse_origin_data( + r#"[LoliHouse] 精灵幻想记 第二季 / Seirei Gensouki S2 [01-12 合集][WebRip 1080p HEVC-10bit AAC][简繁内封字幕][Fin]"#, + r#"{ + "name": "精灵幻想记 第二季 / Seirei Gensouki S2", + "season": 2, + "season_raw": "第二季", + "episode_index": 1, + "subtitle": "简繁内封字幕", + "source": "WebRip", + "fansub": "LoliHouse", + "resolution": "1080p" + }"#, + )?; + + test_parse_origin_data( + r#"[喵萌奶茶屋&LoliHouse] 超自然武装当哒当 / 胆大党 / Dandadan [01-12 精校合集][WebRip 1080p HEVC-10bit AAC][简繁日内封字幕][Fin]"#, + r#" { + "name": "超自然武装当哒当 / 胆大党 / Dandadan", + "season": 1, + "episode_index": 1, + "subtitle": "简繁日内封字幕", + "source": "WebRip", + "fansub": "喵萌奶茶屋&LoliHouse", + "resolution": "1080p" + }"#, + )?; + + Ok(()) + } + + #[rstest] + #[test] + fn test_parse_ep_with_zh_bracketed_name(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】"#, + r#"{ + "name": "古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02", + "season": 2, + "season_raw": "第二季", + "episode_index": 22, + "subtitle": "GB", + "fansub": "幻樱字幕组", + "resolution": "1920X1080" + }"#, + ) + } + + #[rstest] + #[test] + fn test_bad_cases(before_each: ()) -> RecorderResult<()> { + test_parse_origin_data( + r#"[7³ACG x 桜都字幕组] 摇曳露营△ 剧场版/映画 ゆるキャン△/Eiga Yuru Camp△ [简繁字幕] BDrip 1080p x265 FLAC 2.0"#, + r#"{ + "name": "摇曳露营△ 剧场版", + "season": 1, + "episode_index": 1, + "subtitle": "简繁字幕", + "source": "BDrip", + "fansub": "7³ACG x 桜都字幕组", + "resolution": "1080p" + }"#, + )?; + + Ok(()) + } +} diff --git a/apps/recorder/src/extract/origin/parser.rs b/apps/recorder/src/extract/origin/parser.rs deleted file mode 100644 index 1920c81..0000000 --- a/apps/recorder/src/extract/origin/parser.rs +++ /dev/null @@ -1,845 +0,0 @@ -/** - * @TODO: rewrite with nom - */ -use std::borrow::Cow; - -use itertools::Itertools; -use lazy_static::lazy_static; -use regex::Regex; -use serde::{Deserialize, Serialize}; -use snafu::whatever; - -use crate::{ - errors::RecorderResult, - extract::defs::{DIGIT_1PLUS_REG, ZH_NUM_MAP, ZH_NUM_RE}, -}; - -const NAME_EXTRACT_REPLACE_ADHOC1_REPLACED: &str = "$1/$2"; - -lazy_static! { - static ref TITLE_RE: Regex = Regex::new( - r#"(.*|\[.*])( -? \d+|\[\d+]|\[\d+.?[vV]\d]|第\d+[话話集]|\[第?\d+[话話集]]|\[\d+.?END]|[Ee][Pp]?\d+|\[\s*\d+\s*[\-\~]\s*\d+\s*\p{scx=Han}*[话話集]\s*])(.*)"# - ).unwrap(); - static ref EP_COLLECTION_RE:Regex = Regex::new(r#"\[?\s*\d+\s*[\-\~]\s*\d+\s*\p{scx=Han}*合?[话話集]\s*]?"#).unwrap(); - static ref MOVIE_TITLE_RE:Regex = Regex::new(r#"(.*|\[.*])(剧场版|[Mm]ovie|电影)(.*?)$"#).unwrap(); - static ref RESOLUTION_RE: Regex = Regex::new(r"1080|720|2160|4K|2K").unwrap(); - static ref SOURCE_L1_RE: Regex = Regex::new(r"B-Global|[Bb]aha|[Bb]ilibili|AT-X|W[Ee][Bb][Rr][Ii][Pp]|Sentai|B[Dd][Rr][Ii][Pp]|UHD[Rr][Ii][Pp]|NETFLIX").unwrap(); - static ref SOURCE_L2_RE: Regex = Regex::new(r"AMZ|CR|W[Ee][Bb]|B[Dd]").unwrap(); - static ref SUB_RE: Regex = Regex::new(r"[简繁日字幕]|CH|BIG5|GB").unwrap(); - static ref PREFIX_RE: Regex = - Regex::new(r"[^\w\s\p{Unified_Ideograph}\p{scx=Han}\p{scx=Hira}\p{scx=Kana}-]").unwrap(); - static ref EN_BRACKET_SPLIT_RE: Regex = Regex::new(r"[\[\]]").unwrap(); - static ref MOVIE_SEASON_EXTRACT_RE: Regex = Regex::new(r"剧场版|Movie|电影").unwrap(); - static ref MAIN_TITLE_PREFIX_PROCESS_RE1: Regex = Regex::new(r"新番|月?番").unwrap(); - static ref MAIN_TITLE_PREFIX_PROCESS_RE2: Regex = Regex::new(r"[港澳台]{1,3}地区").unwrap(); - static ref MAIN_TITLE_PRE_PROCESS_BACKETS_RE: Regex = Regex::new(r"\[.+\]").unwrap(); - static ref MAIN_TITLE_PRE_PROCESS_BACKETS_RE_SUB1: Regex = Regex::new(r"^.*?\[").unwrap(); - static ref SEASON_EXTRACT_SEASON_ALL_RE: Regex = Regex::new(r"S\d{1,2}|Season \d{1,2}|[第].[季期]|1st|2nd|3rd|\d{1,2}th").unwrap(); - static ref SEASON_EXTRACT_SEASON_EN_PREFIX_RE: Regex = Regex::new(r"Season|S").unwrap(); - static ref SEASON_EXTRACT_SEASON_EN_NTH_RE: Regex = Regex::new(r"1st|2nd|3rd|\d{1,2}th").unwrap(); - static ref SEASON_EXTRACT_SEASON_ZH_PREFIX_RE: Regex = Regex::new(r"[第 ].*[季期(部分)]|部分").unwrap(); - static ref SEASON_EXTRACT_SEASON_ZH_PREFIX_SUB_RE: Regex = Regex::new(r"[第季期 ]").unwrap(); - static ref NAME_EXTRACT_REMOVE_RE: Regex = Regex::new(r"[((]仅限[港澳台]{1,3}地区[))]").unwrap(); - static ref NAME_EXTRACT_SPLIT_RE: Regex = Regex::new(r"/|\s{2}|-\s{2}|\]\[").unwrap(); - static ref NAME_EXTRACT_REPLACE_ADHOC1_RE: Regex = Regex::new(r"([\p{scx=Han}\s\(\)]{5,})_([a-zA-Z]{2,})").unwrap(); - static ref NAME_JP_TEST: Regex = Regex::new(r"[\p{scx=Hira}\p{scx=Kana}]{2,}").unwrap(); - static ref NAME_ZH_TEST: Regex = Regex::new(r"[\p{scx=Han}]{2,}").unwrap(); - static ref NAME_EN_TEST: Regex = Regex::new(r"[a-zA-Z]{3,}").unwrap(); - static ref TAGS_EXTRACT_SPLIT_RE: Regex = Regex::new(r"[\[\]()()_]").unwrap(); - static ref CLEAR_SUB_RE: Regex = Regex::new(r"_MP4|_MKV").unwrap(); -} - -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] -pub struct RawEpisodeMeta { - pub name_en: Option, - pub name_en_no_season: Option, - pub name_jp: Option, - pub name_jp_no_season: Option, - pub name_zh: Option, - pub name_zh_no_season: Option, - pub season: i32, - pub season_raw: Option, - pub episode_index: i32, - pub subtitle: Option, - pub source: Option, - pub fansub: Option, - pub resolution: Option, -} - -fn extract_fansub(raw_name: &str) -> Option<&str> { - let mut groups = EN_BRACKET_SPLIT_RE.splitn(raw_name, 3); - groups.nth(1) -} - -fn replace_ch_bracket_to_en(raw_name: &str) -> String { - raw_name.replace('【', "[").replace('】', "]") -} - -fn title_body_pre_process(title_body: &str, fansub: Option<&str>) -> RecorderResult { - let raw_without_fansub = if let Some(fansub) = fansub { - let fan_sub_re = Regex::new(&format!(".{fansub}."))?; - fan_sub_re.replace_all(title_body, "") - } else { - Cow::Borrowed(title_body) - }; - let raw_with_prefix_replaced = PREFIX_RE.replace_all(&raw_without_fansub, "/"); - let mut arg_group = raw_with_prefix_replaced - .split('/') - .map(|s| s.trim()) - .filter(|s| !s.is_empty()) - .collect::>(); - - if arg_group.len() == 1 { - arg_group = arg_group.first_mut().unwrap().split(' ').collect(); - } - let mut raw = raw_without_fansub.to_string(); - for arg in arg_group.iter() { - if (arg_group.len() <= 5 && MAIN_TITLE_PREFIX_PROCESS_RE1.is_match(arg)) - || (MAIN_TITLE_PREFIX_PROCESS_RE2.is_match(arg)) - { - let sub = Regex::new(&format!(".{arg}."))?; - raw = sub.replace_all(&raw, "").to_string(); - } - } - if let Some(m) = MAIN_TITLE_PRE_PROCESS_BACKETS_RE.find(&raw) - && m.len() as f32 > (raw.len() as f32) * 0.5 - { - let mut raw1 = MAIN_TITLE_PRE_PROCESS_BACKETS_RE_SUB1 - .replace(&raw, "") - .chars() - .collect_vec(); - while let Some(ch) = raw1.pop() { - if ch == ']' { - break; - } - } - raw = raw1.into_iter().collect(); - } - Ok(raw.to_string()) -} - -pub fn extract_season_from_title_body(title_body: &str) -> (String, Option, i32) { - let name_and_season = EN_BRACKET_SPLIT_RE.replace_all(title_body, " "); - let seasons = SEASON_EXTRACT_SEASON_ALL_RE - .find(&name_and_season) - .into_iter() - .map(|s| s.as_str()) - .collect_vec(); - - if seasons.is_empty() { - return (title_body.to_string(), None, 1); - } - - let mut season = 1; - let mut season_raw = None; - let name = SEASON_EXTRACT_SEASON_ALL_RE.replace_all(&name_and_season, ""); - - for s in seasons { - season_raw = Some(s); - if let Some(m) = SEASON_EXTRACT_SEASON_EN_PREFIX_RE.find(s) - && let Ok(s) = SEASON_EXTRACT_SEASON_ALL_RE - .replace_all(m.as_str(), "") - .parse::() - { - season = s; - break; - } - if let Some(m) = SEASON_EXTRACT_SEASON_EN_NTH_RE.find(s) - && let Some(s) = DIGIT_1PLUS_REG - .find(m.as_str()) - .and_then(|s| s.as_str().parse::().ok()) - { - season = s; - break; - } - if let Some(m) = SEASON_EXTRACT_SEASON_ZH_PREFIX_RE.find(s) { - if let Ok(s) = SEASON_EXTRACT_SEASON_ZH_PREFIX_SUB_RE - .replace(m.as_str(), "") - .parse::() - { - season = s; - break; - } - if let Some(m) = ZH_NUM_RE.find(m.as_str()) { - season = ZH_NUM_MAP[m.as_str()]; - break; - } - } - } - - (name.to_string(), season_raw.map(|s| s.to_string()), season) -} - -fn extract_name_from_title_body_name_section( - title_body_name_section: &str, -) -> (Option, Option, Option) { - let mut name_en = None; - let mut name_zh = None; - let mut name_jp = None; - let replaced1 = NAME_EXTRACT_REMOVE_RE.replace_all(title_body_name_section, ""); - let replaced2 = NAME_EXTRACT_REPLACE_ADHOC1_RE - .replace_all(&replaced1, NAME_EXTRACT_REPLACE_ADHOC1_REPLACED); - let trimmed = replaced2.trim(); - let mut split = NAME_EXTRACT_SPLIT_RE - .split(trimmed) - .map(|s| s.trim()) - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()) - .collect_vec(); - if split.len() == 1 { - let mut split_space = split[0].split(' ').collect_vec(); - let mut search_indices = vec![0]; - if split_space.len() > 1 { - search_indices.push(split_space.len() - 1); - } - for i in search_indices { - if NAME_ZH_TEST.is_match(split_space[i]) { - let chs = split_space[i]; - split_space.remove(i); - split = vec![chs.to_string(), split_space.join(" ")]; - break; - } - } - } - for item in split { - if NAME_JP_TEST.is_match(&item) && name_jp.is_none() { - name_jp = Some(item); - } else if NAME_ZH_TEST.is_match(&item) && name_zh.is_none() { - name_zh = Some(item); - } else if NAME_EN_TEST.is_match(&item) && name_en.is_none() { - name_en = Some(item); - } - } - (name_en, name_zh, name_jp) -} - -fn extract_episode_index_from_title_episode(title_episode: &str) -> Option { - DIGIT_1PLUS_REG - .find(title_episode)? - .as_str() - .parse::() - .ok() -} - -fn clear_sub(sub: Option) -> Option { - sub.map(|s| CLEAR_SUB_RE.replace_all(&s, "").to_string()) -} - -fn extract_tags_from_title_extra( - title_extra: &str, -) -> (Option, Option, Option) { - let replaced = TAGS_EXTRACT_SPLIT_RE.replace_all(title_extra, " "); - let elements = replaced - .split(' ') - .map(|s| s.trim()) - .filter(|s| !s.is_empty()) - .collect_vec(); - - let mut sub = None; - let mut resolution = None; - let mut source = None; - for element in elements.iter() { - if SUB_RE.is_match(element) { - sub = Some(element.to_string()) - } else if RESOLUTION_RE.is_match(element) { - resolution = Some(element.to_string()) - } else if SOURCE_L1_RE.is_match(element) { - source = Some(element.to_string()) - } - } - if source.is_none() { - for element in elements { - if SOURCE_L2_RE.is_match(element) { - source = Some(element.to_string()) - } - } - } - (clear_sub(sub), resolution, source) -} - -pub fn check_is_movie(title: &str) -> bool { - MOVIE_TITLE_RE.is_match(title) -} - -pub fn extract_episode_meta_from_origin_name(s: &str) -> RecorderResult { - let raw_title = s.trim(); - let raw_title_without_ch_brackets = replace_ch_bracket_to_en(raw_title); - let fansub = extract_fansub(&raw_title_without_ch_brackets); - let movie_capture = check_is_movie(&raw_title_without_ch_brackets); - if let Some(title_re_match_obj) = MOVIE_TITLE_RE - .captures(&raw_title_without_ch_brackets) - .or(TITLE_RE.captures(&raw_title_without_ch_brackets)) - { - let mut title_body = title_re_match_obj - .get(1) - .map(|s| s.as_str().trim()) - .unwrap_or_else(|| unreachable!("TITLE_RE has at least 3 capture groups")) - .to_string(); - let mut title_episode = title_re_match_obj - .get(2) - .map(|s| s.as_str().trim()) - .unwrap_or_else(|| unreachable!("TITLE_RE has at least 3 capture groups")); - let title_extra = title_re_match_obj - .get(3) - .map(|s| s.as_str().trim()) - .unwrap_or_else(|| unreachable!("TITLE_RE has at least 3 capture groups")); - - if movie_capture { - title_body += title_episode; - title_episode = ""; - } else if EP_COLLECTION_RE.is_match(title_episode) { - title_episode = ""; - } - - let title_body = title_body_pre_process(&title_body, fansub)?; - let (name_without_season, season_raw, season) = extract_season_from_title_body(&title_body); - let (name_en, name_zh, name_jp) = extract_name_from_title_body_name_section(&title_body); - let (name_en_no_season, name_zh_no_season, name_jp_no_season) = - extract_name_from_title_body_name_section(&name_without_season); - let episode_index = extract_episode_index_from_title_episode(title_episode).unwrap_or(1); - let (sub, resolution, source) = extract_tags_from_title_extra(title_extra); - Ok(RawEpisodeMeta { - name_en, - name_en_no_season, - name_jp, - name_jp_no_season, - name_zh, - name_zh_no_season, - season, - season_raw, - episode_index, - subtitle: sub, - source, - fansub: fansub.map(|s| s.to_string()), - resolution, - }) - } else { - whatever!("Can not parse episode meta from raw filename {}", raw_title) - } -} - -#[cfg(test)] -mod tests { - - use super::{RawEpisodeMeta, extract_episode_meta_from_origin_name}; - - fn test_raw_ep_parser_case(raw_name: &str, expected: &str) { - let expected: Option = serde_json::from_str(expected).unwrap_or_default(); - let found = extract_episode_meta_from_origin_name(raw_name).ok(); - - if expected != found { - println!( - "expected {} and found {} are not equal", - serde_json::to_string_pretty(&expected).unwrap(), - serde_json::to_string_pretty(&found).unwrap() - ) - } - assert_eq!(expected, found); - } - - #[test] - fn test_parse_ep_with_all_parts_wrapped() { - test_raw_ep_parser_case( - r#"[新Sub][1月新番][我心里危险的东西 第二季][05][HEVC][10Bit][1080P][简日双语][招募翻译]"#, - r#"{ - "name_zh": "我心里危险的东西", - "name_zh_no_season": "我心里危险的东西", - "season": 2, - "season_raw": "第二季", - "episode_index": 5, - "subtitle": "简日双语", - "source": null, - "fansub": "新Sub", - "resolution": "1080P" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_title_wrapped_by_one_square_bracket_and_season_prefix() { - test_raw_ep_parser_case( - r#"【喵萌奶茶屋】★01月新番★[我内心的糟糕念头 / Boku no Kokoro no Yabai Yatsu][18][1080p][简日双语][招募翻译]"#, - r#"{ - "name_en": "Boku no Kokoro no Yabai Yatsu", - "name_en_no_season": "Boku no Kokoro no Yabai Yatsu", - "name_zh": "我内心的糟糕念头", - "name_zh_no_season": "我内心的糟糕念头", - "season": 1, - "season_raw": null, - "episode_index": 18, - "subtitle": "简日双语", - "source": null, - "fansub": "喵萌奶茶屋", - "resolution": "1080p" - }"#, - ); - } - - #[test] - fn test_parse_ep_with_ep_and_version() { - test_raw_ep_parser_case( - r#"[LoliHouse] 因为不是真正的伙伴而被逐出勇者队伍,流落到边境展开慢活人生 2nd / Shin no Nakama 2nd - 08v2 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕]"#, - r#"{ - "name_en": "Shin no Nakama 2nd", - "name_en_no_season": "Shin no Nakama", - "name_zh": "因为不是真正的伙伴而被逐出勇者队伍,流落到边境展开慢活人生 2nd", - "name_zh_no_season": "因为不是真正的伙伴而被逐出勇者队伍,流落到边境展开慢活人生", - "season": 2, - "season_raw": "2nd", - "episode_index": 8, - "subtitle": "简繁内封字幕", - "source": "WebRip", - "fansub": "LoliHouse", - "resolution": "1080p" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_en_title_only() { - test_raw_ep_parser_case( - r"[动漫国字幕组&LoliHouse] THE MARGINAL SERVICE - 08 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕]", - r#"{ - "name_en": "THE MARGINAL SERVICE", - "name_en_no_season": "THE MARGINAL SERVICE", - "season": 1, - "episode_index": 8, - "subtitle": "简繁内封字幕", - "source": "WebRip", - "fansub": "动漫国字幕组&LoliHouse", - "resolution": "1080p" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_two_zh_title() { - test_raw_ep_parser_case( - r#"[LoliHouse] 事与愿违的不死冒险者 / 非自愿的不死冒险者 / Nozomanu Fushi no Boukensha - 01 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕]"#, - r#"{ - "name_en": "Nozomanu Fushi no Boukensha", - "name_en_no_season": "Nozomanu Fushi no Boukensha", - "name_zh": "事与愿违的不死冒险者", - "name_zh_no_season": "事与愿违的不死冒险者", - "season": 1, - "season_raw": null, - "episode_index": 1, - "subtitle": "简繁内封字幕", - "source": "WebRip", - "fansub": "LoliHouse", - "resolution": "1080p" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_en_zh_jp_titles() { - test_raw_ep_parser_case( - r#"[喵萌奶茶屋&LoliHouse] 碰之道 / ぽんのみち / Pon no Michi - 07 [WebRip 1080p HEVC-10bit AAC][简繁日内封字幕]"#, - r#"{ - "name_en": "Pon no Michi", - "name_jp": "ぽんのみち", - "name_zh": "碰之道", - "name_en_no_season": "Pon no Michi", - "name_jp_no_season": "ぽんのみち", - "name_zh_no_season": "碰之道", - "season": 1, - "season_raw": null, - "episode_index": 7, - "subtitle": "简繁日内封字幕", - "source": "WebRip", - "fansub": "喵萌奶茶屋&LoliHouse", - "resolution": "1080p" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_nth_season() { - test_raw_ep_parser_case( - r#"[ANi] Yowai Character Tomozakikun / 弱角友崎同学 2nd STAGE - 09 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#, - r#"{ - "name_en": "Yowai Character Tomozakikun", - "name_en_no_season": "Yowai Character Tomozakikun", - "name_zh": "弱角友崎同学 2nd STAGE", - "name_zh_no_season": "弱角友崎同学", - "season": 2, - "season_raw": "2nd", - "episode_index": 9, - "subtitle": "CHT", - "source": "Baha", - "fansub": "ANi", - "resolution": "1080P" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_season_en_and_season_zh() { - test_raw_ep_parser_case( - r#"[豌豆字幕组&LoliHouse] 王者天下 第五季 / Kingdom S5 - 07 [WebRip 1080p HEVC-10bit AAC][简繁外挂字幕]"#, - r#"{ - "name_en": "Kingdom S5", - "name_en_no_season": "Kingdom", - "name_zh": "王者天下 第五季", - "name_zh_no_season": "王者天下", - "season": 5, - "season_raw": "第五季", - "episode_index": 7, - "subtitle": "简繁外挂字幕", - "source": "WebRip", - "fansub": "豌豆字幕组&LoliHouse", - "resolution": "1080p" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_airota_fansub_style_case1() { - test_raw_ep_parser_case( - r#"【千夏字幕组】【爱丽丝与特蕾丝的虚幻工厂_Alice to Therese no Maboroshi Koujou】[剧场版][WebRip_1080p_HEVC][简繁内封][招募新人]"#, - r#"{ - "name_en": "Alice to Therese no Maboroshi Koujou", - "name_en_no_season": "Alice to Therese no Maboroshi Koujou", - "name_zh": "爱丽丝与特蕾丝的虚幻工厂", - "name_zh_no_season": "爱丽丝与特蕾丝的虚幻工厂", - "season": 1, - "episode_index": 1, - "subtitle": "简繁内封", - "source": "WebRip", - "fansub": "千夏字幕组", - "resolution": "1080p" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_airota_fansub_style_case2() { - test_raw_ep_parser_case( - r#"[千夏字幕组&喵萌奶茶屋][电影 轻旅轻营 (摇曳露营) _Yuru Camp Movie][剧场版][UHDRip_2160p_HEVC][繁体][千夏15周年]"#, - r#"{ - "name_en": "Yuru Camp Movie", - "name_en_no_season": "Yuru Camp Movie", - "name_zh": "电影 轻旅轻营 (摇曳露营)", - "name_zh_no_season": "电影 轻旅轻营 (摇曳露营)", - "season": 1, - "episode_index": 1, - "subtitle": "繁体", - "source": "UHDRip", - "fansub": "千夏字幕组&喵萌奶茶屋", - "resolution": "2160p" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_large_episode_style() { - test_raw_ep_parser_case( - r#"[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]"#, - r#"{ - "name_en": "New Doraemon", - "name_en_no_season": "New Doraemon", - "name_zh": "哆啦A梦新番", - "name_zh_no_season": "哆啦A梦新番", - "season": 1, - "episode_index": 747, - "subtitle": "GB", - "fansub": "梦蓝字幕组", - "resolution": "1080P" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_many_square_brackets_split_title() { - test_raw_ep_parser_case( - r#"【MCE汉化组】[剧场版-摇曳露营][Yuru Camp][Movie][简日双语][1080P][x264 AAC]"#, - r#"{ - "name_en": "Yuru Camp", - "name_en_no_season": "Yuru Camp", - "name_zh": "剧场版-摇曳露营", - "name_zh_no_season": "剧场版-摇曳露营", - "season": 1, - "episode_index": 1, - "subtitle": "简日双语", - "fansub": "MCE汉化组", - "resolution": "1080P" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_implicit_lang_title_sep() { - test_raw_ep_parser_case( - r#"[织梦字幕组][尼尔:机械纪元 NieR Automata Ver1.1a][02集][1080P][AVC][简日双语]"#, - r#"{ - "name_en": "NieR Automata Ver1.1a", - "name_en_no_season": "NieR Automata Ver1.1a", - "name_zh": "尼尔:机械纪元", - "name_zh_no_season": "尼尔:机械纪元", - "season": 1, - "episode_index": 2, - "subtitle": "简日双语", - "fansub": "织梦字幕组", - "resolution": "1080P" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_square_brackets_wrapped_and_space_split() { - test_raw_ep_parser_case( - r#"[天月搬运组][迷宫饭 Delicious in Dungeon][03][日语中字][MKV][1080P][NETFLIX][高画质版]"#, - r#" - { - "name_en": "Delicious in Dungeon", - "name_en_no_season": "Delicious in Dungeon", - "name_zh": "迷宫饭", - "name_zh_no_season": "迷宫饭", - "season": 1, - "episode_index": 3, - "subtitle": "日语中字", - "source": "NETFLIX", - "fansub": "天月搬运组", - "resolution": "1080P" - } - "#, - ) - } - - #[test] - fn test_parse_ep_with_start_with_brackets_wrapped_season_info_prefix() { - test_raw_ep_parser_case( - r#"[爱恋字幕社][1月新番][迷宫饭][Dungeon Meshi][01][1080P][MP4][简日双语] "#, - r#"{ - "name_en": "Dungeon Meshi", - "name_en_no_season": "Dungeon Meshi", - "name_zh": "迷宫饭", - "name_zh_no_season": "迷宫饭", - "season": 1, - "episode_index": 1, - "subtitle": "简日双语", - "fansub": "爱恋字幕社", - "resolution": "1080P" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_small_no_title_extra_brackets_case() { - test_raw_ep_parser_case( - r#"[ANi] Mahou Shoujo ni Akogarete / 梦想成为魔法少女 [年龄限制版] - 09 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#, - r#"{ - "name_en": "Mahou Shoujo ni Akogarete", - "name_en_no_season": "Mahou Shoujo ni Akogarete", - "name_zh": "梦想成为魔法少女 [年龄限制版]", - "name_zh_no_season": "梦想成为魔法少女 [年龄限制版]", - "season": 1, - "episode_index": 9, - "subtitle": "CHT", - "source": "Baha", - "fansub": "ANi", - "resolution": "1080P" - }"#, - ) - } - - #[test] - fn test_parse_ep_title_leading_space_style() { - test_raw_ep_parser_case( - r#"[ANi] 16bit 的感动 ANOTHER LAYER - 01 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#, - r#"{ - "name_zh": "16bit 的感动 ANOTHER LAYER", - "name_zh_no_season": "16bit 的感动 ANOTHER LAYER", - "season": 1, - "season_raw": null, - "episode_index": 1, - "subtitle": "CHT", - "source": "Baha", - "fansub": "ANi", - "resolution": "1080P" - }"#, - ) - } - - #[test] - fn test_parse_ep_title_leading_month_and_wrapped_brackets_style() { - test_raw_ep_parser_case( - r#"【喵萌奶茶屋】★07月新番★[银砂糖师与黑妖精 ~ Sugar Apple Fairy Tale ~][13][1080p][简日双语][招募翻译]"#, - r#"{ - "name_en": "~ Sugar Apple Fairy Tale ~", - "name_en_no_season": "~ Sugar Apple Fairy Tale ~", - "name_zh": "银砂糖师与黑妖精", - "name_zh_no_season": "银砂糖师与黑妖精", - "season": 1, - "episode_index": 13, - "subtitle": "简日双语", - "fansub": "喵萌奶茶屋", - "resolution": "1080p" - }"#, - ) - } - - #[test] - fn test_parse_ep_title_leading_month_style() { - test_raw_ep_parser_case( - r#"【极影字幕社】★4月新番 天国大魔境 Tengoku Daimakyou 第05话 GB 720P MP4(字幕社招人内详)"#, - r#"{ - "name_en": "Tengoku Daimakyou", - "name_en_no_season": "Tengoku Daimakyou", - "name_zh": "天国大魔境", - "name_zh_no_season": "天国大魔境", - "season": 1, - "episode_index": 5, - "subtitle": "字幕社招人内详", - "source": null, - "fansub": "极影字幕社", - "resolution": "720P" - }"#, - ) - } - - #[test] - fn test_parse_ep_tokusatsu_style() { - test_raw_ep_parser_case( - r#"[MagicStar] 假面骑士Geats / 仮面ライダーギーツ EP33 [WEBDL] [1080p] [TTFC]【生】"#, - r#"{ - "name_jp": "仮面ライダーギーツ", - "name_jp_no_season": "仮面ライダーギーツ", - "name_zh": "假面骑士Geats", - "name_zh_no_season": "假面骑士Geats", - "season": 1, - "episode_index": 33, - "source": "WEBDL", - "fansub": "MagicStar", - "resolution": "1080p" - }"#, - ) - } - - #[test] - fn test_parse_ep_with_multi_lang_zh_title() { - test_raw_ep_parser_case( - r#"[百冬练习组&LoliHouse] BanG Dream! 少女乐团派对!☆PICO FEVER! / Garupa Pico: Fever! - 26 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][END] [101.69 MB]"#, - r#"{ - "name_en": "Garupa Pico: Fever!", - "name_en_no_season": "Garupa Pico: Fever!", - "name_zh": "BanG Dream! 少女乐团派对!☆PICO FEVER!", - "name_zh_no_season": "BanG Dream! 少女乐团派对!☆PICO FEVER!", - "season": 1, - "episode_index": 26, - "subtitle": "简繁内封字幕", - "source": "WebRip", - "fansub": "百冬练习组&LoliHouse", - "resolution": "1080p" - }"#, - ) - } - - #[test] - fn test_ep_collections() { - test_raw_ep_parser_case( - r#"[奶²&LoliHouse] 蘑菇狗 / Kinokoinu: Mushroom Pup [01-12 精校合集][WebRip 1080p HEVC-10bit AAC][简日内封字幕]"#, - r#"{ - "name_en": "Kinokoinu: Mushroom Pup", - "name_en_no_season": "Kinokoinu: Mushroom Pup", - "name_zh": "蘑菇狗", - "name_zh_no_season": "蘑菇狗", - "season": 1, - "episode_index": 1, - "subtitle": "简日内封字幕", - "source": "WebRip", - "fansub": "奶²&LoliHouse", - "resolution": "1080p", - "name": " 蘑菇狗 / Kinokoinu: Mushroom Pup [01-12 精校合集]" - }"#, - ); - - test_raw_ep_parser_case( - r#"[LoliHouse] 叹气的亡灵想隐退 / Nageki no Bourei wa Intai shitai [01-13 合集][WebRip 1080p HEVC-10bit AAC][简繁内封字幕][Fin]"#, - r#"{ - "name_en": "Nageki no Bourei wa Intai shitai", - "name_en_no_season": "Nageki no Bourei wa Intai shitai", - "name_jp": null, - "name_jp_no_season": null, - "name_zh": "叹气的亡灵想隐退", - "name_zh_no_season": "叹气的亡灵想隐退", - "season": 1, - "season_raw": null, - "episode_index": 1, - "subtitle": "简繁内封字幕", - "source": "WebRip", - "fansub": "LoliHouse", - "resolution": "1080p" - }"#, - ); - - test_raw_ep_parser_case( - r#"[LoliHouse] 精灵幻想记 第二季 / Seirei Gensouki S2 [01-12 合集][WebRip 1080p HEVC-10bit AAC][简繁内封字幕][Fin]"#, - r#"{ - "name_en": "Seirei Gensouki S2", - "name_en_no_season": "Seirei Gensouki", - "name_zh": "精灵幻想记 第二季", - "name_zh_no_season": "精灵幻想记", - "season": 2, - "season_raw": "第二季", - "episode_index": 1, - "subtitle": "简繁内封字幕", - "source": "WebRip", - "fansub": "LoliHouse", - "resolution": "1080p" - }"#, - ); - - test_raw_ep_parser_case( - r#"[喵萌奶茶屋&LoliHouse] 超自然武装当哒当 / 胆大党 / Dandadan [01-12 精校合集][WebRip 1080p HEVC-10bit AAC][简繁日内封字幕][Fin]"#, - r#" { - "name_en": "Dandadan", - "name_en_no_season": "Dandadan", - "name_zh": "超自然武装当哒当", - "name_zh_no_season": "超自然武装当哒当", - "season": 1, - "episode_index": 1, - "subtitle": "简繁日内封字幕", - "source": "WebRip", - "fansub": "喵萌奶茶屋&LoliHouse", - "resolution": "1080p" - }"#, - ); - } - - // TODO: FIXME - #[test] - fn test_bad_cases() { - test_raw_ep_parser_case( - r#"[7³ACG x 桜都字幕组] 摇曳露营△ 剧场版/映画 ゆるキャン△/Eiga Yuru Camp△ [简繁字幕] BDrip 1080p x265 FLAC 2.0"#, - r#"{ - "name_zh": "摇曳露营△剧场版", - "name_zh_no_season": "摇曳露营△剧场版", - "season": 1, - "season_raw": null, - "episode_index": 1, - "subtitle": "简繁字幕", - "source": "BDrip", - "fansub": "7³ACG x 桜都字幕组", - "resolution": "1080p" - }"#, - ); - - test_raw_ep_parser_case( - r#"【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】"#, - r#"{ - "name_en": "第二季 Komi-san wa, Komyushou Desu. S02", - "name_en_no_season": "Komi-san wa, Komyushou Desu.", - "name_zh": "古见同学有交流障碍症", - "name_zh_no_season": "古见同学有交流障碍症", - "season": 2, - "season_raw": "第二季", - "episode_index": 22, - "subtitle": "GB", - "fansub": "幻樱字幕组", - "resolution": "1920X1080" - }"#, - ); - } -} diff --git a/apps/recorder/src/models/bangumi.rs b/apps/recorder/src/models/bangumi.rs index 252f094..b96c6ea 100644 --- a/apps/recorder/src/models/bangumi.rs +++ b/apps/recorder/src/models/bangumi.rs @@ -17,7 +17,7 @@ use crate::{ MikanBangumiHash, MikanBangumiMeta, build_mikan_bangumi_subscription_rss_url, scrape_mikan_poster_meta_from_image_url, }, - origin::extract_season_from_title_body, + origin::{OriginCompTrait, SeasonComp}, }, }; @@ -123,7 +123,11 @@ impl ActiveModel { let mikan_client = ctx.mikan(); let storage_service = ctx.storage(); let mikan_base_url = mikan_client.base_url(); - let (_, season_raw, season_index) = extract_season_from_title_body(&meta.bangumi_title); + let season_comp = SeasonComp::parse_comp(&meta.bangumi_title) + .ok() + .map(|(_, s)| s); + let season_index = season_comp.as_ref().map(|s| s.num).unwrap_or(1); + let season_raw = season_comp.map(|s| s.source.into_owned()); let rss_url = build_mikan_bangumi_subscription_rss_url( mikan_base_url.clone(), diff --git a/apps/recorder/src/models/episodes.rs b/apps/recorder/src/models/episodes.rs index 5b5921a..6daa055 100644 --- a/apps/recorder/src/models/episodes.rs +++ b/apps/recorder/src/models/episodes.rs @@ -10,7 +10,7 @@ use crate::{ errors::RecorderResult, extract::{ mikan::{MikanEpisodeHash, MikanEpisodeMeta, build_mikan_episode_homepage_url}, - origin::extract_episode_meta_from_origin_name, + origin::{OriginCompTrait, OriginNameRoot}, }, }; @@ -124,7 +124,7 @@ impl ActiveModel { episode: MikanEpisodeMeta, ) -> RecorderResult { let mikan_base_url = ctx.mikan().base_url().clone(); - let episode_extention_meta = extract_episode_meta_from_origin_name(&episode.episode_title) + let episode_extention_meta = OriginNameRoot::parse_comp(&episode.episode_title) .inspect_err(|err| { tracing::error!( err = ?err, @@ -132,6 +132,7 @@ impl ActiveModel { "Failed to parse episode extension meta from episode title, skip" ); }) + .map(|(_, e)| e.into_meta()) .ok(); let homepage = build_mikan_episode_homepage_url(mikan_base_url, &episode.mikan_episode_id); diff --git a/apps/recorder/src/test_utils/tracing.rs b/apps/recorder/src/test_utils/tracing.rs index f342e75..ae33ec5 100644 --- a/apps/recorder/src/test_utils/tracing.rs +++ b/apps/recorder/src/test_utils/tracing.rs @@ -1,9 +1,10 @@ use tracing::Level; -use tracing_subscriber::EnvFilter; +use tracing_subscriber::{EnvFilter, layer::SubscriberExt, util::SubscriberInitExt}; +use tracing_tree::HierarchicalLayer; use crate::logger::MODULE_WHITELIST; -pub fn try_init_testing_tracing(level: Level) { +fn build_testing_tracing_filter(level: Level) -> EnvFilter { let crate_name = env!("CARGO_PKG_NAME"); let level = level.as_str().to_lowercase(); let mut filter = EnvFilter::new(format!("{crate_name}[]={level}")); @@ -14,5 +15,22 @@ pub fn try_init_testing_tracing(level: Level) { filter = filter.add_directive(format!("{module}[]={level}").parse().unwrap()); } - let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init(); + filter +} + +pub fn try_init_testing_tracing(level: Level) { + let _ = tracing_subscriber::fmt() + .with_env_filter(build_testing_tracing_filter(level)) + .try_init(); +} + +pub fn try_init_testing_tracing_only_leaf(level: Level) { + let _ = tracing_subscriber::registry() + .with(build_testing_tracing_filter(level)) + .with( + HierarchicalLayer::new(2) + .with_targets(true) + .with_bracketed_fields(true), + ) + .try_init(); } diff --git a/apps/recorder/src/utils/mod.rs b/apps/recorder/src/utils/mod.rs index 22eaf2f..bec2859 100644 --- a/apps/recorder/src/utils/mod.rs +++ b/apps/recorder/src/utils/mod.rs @@ -1,2 +1,3 @@ pub mod http; pub mod json; +pub mod nom; diff --git a/apps/recorder/src/utils/nom.rs b/apps/recorder/src/utils/nom.rs new file mode 100644 index 0000000..e847df0 --- /dev/null +++ b/apps/recorder/src/utils/nom.rs @@ -0,0 +1,261 @@ +use std::collections::HashMap; + +use icu::properties::{CodePointMapData, props::Script}; +use lazy_static::lazy_static; +use maplit::hashmap; +use nom::{ + IResult, Parser, + branch::alt, + bytes::complete::tag, + character::complete::{anychar, digit1, none_of, satisfy}, + combinator::{map, opt, recognize, value, verify}, + error::ParseError, + multi::many1, + sequence::{delimited, preceded}, +}; +use num_traits::{PrimInt, Signed}; + +lazy_static! { + pub static ref ZH_DIGIT_MAP: HashMap = { + hashmap! { + '〇' => 0, + '零' => 0, + '一' => 1, + '壹' => 1, + '二' => 2, + '贰' => 2, + '三' => 3, + '叁' => 3, + '四' => 4, + '肆' => 4, + '五' => 5, + '伍' => 5, + '六' => 6, + '陆' => 6, + '七' => 7, + '柒' => 7, + '八' => 8, + '捌' => 8, + '九' => 9, + '玖' => 9, + '十' => 10, + '拾' => 10, + '廿' => 20, + '念' => 20, + '百' => 100, + '佰' => 100, + '千' => 1000, + '仟' => 1000, + '万' => 10000, + '萬' => 10000, + '亿' => 100000000, + '億' => 100000000, + } + }; +} + +pub fn with_recognized<'a, F, O, E>( + mut parser: F, +) -> impl FnMut(&'a str) -> IResult<&'a str, (O, &'a str), E> +where + F: Parser<&'a str, Output = O, Error = E>, + E: ParseError<&'a str>, +{ + move |input: &'a str| { + let i = input; + let (rest, output) = parser.parse(i)?; + let consumed_len = i.len() - rest.len(); + Ok((rest, (output, &i[..consumed_len]))) + } +} + +pub fn is_some_unicode_scx(input: &str, script: Script) -> IResult<&str, char> { + let script_data = CodePointMapData::