refactor: rewrite origin name extractor from regex to nom combinators
This commit is contained in:
parent
c12b9b360a
commit
324427513c
418
Cargo.lock
generated
418
Cargo.lock
generated
@ -933,6 +933,16 @@ dependencies = [
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "calendrical_calculations"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f6df87e869fb08be61c7e97ced8e69ab802df1d8bc612ed67dba78c07fbc12c"
|
||||
dependencies = [
|
||||
"core_maths",
|
||||
"displaydoc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.26"
|
||||
@ -1280,6 +1290,15 @@ version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
|
||||
[[package]]
|
||||
name = "core_maths"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77745e017f5edba1a9c1d854f6f3a52dac8a12dd5af5d2f54aecf61e43d80d30"
|
||||
dependencies = [
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.17"
|
||||
@ -2082,6 +2101,17 @@ dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fixed_decimal"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35943d22b2f19c0cb198ecf915910a8158e94541c89dcc63300d7799d46c2c5e"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"smallvec",
|
||||
"writeable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.2"
|
||||
@ -2901,6 +2931,103 @@ dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab13fe39da5da564b88228e9f08815c9d0efbe9ec244e72b149d9994e10f1054"
|
||||
dependencies = [
|
||||
"icu_calendar",
|
||||
"icu_casemap",
|
||||
"icu_collator",
|
||||
"icu_collections",
|
||||
"icu_datetime",
|
||||
"icu_decimal",
|
||||
"icu_experimental",
|
||||
"icu_list",
|
||||
"icu_locale",
|
||||
"icu_normalizer",
|
||||
"icu_pattern",
|
||||
"icu_plurals",
|
||||
"icu_properties",
|
||||
"icu_provider",
|
||||
"icu_segmenter",
|
||||
"icu_time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_calendar"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7a6ed1ea995a24dff839bc5ca4471ce2fa18ba14d8b09061c2527a46a1c6079"
|
||||
dependencies = [
|
||||
"calendrical_calculations",
|
||||
"displaydoc",
|
||||
"icu_calendar_data",
|
||||
"icu_locale",
|
||||
"icu_locale_core",
|
||||
"icu_provider",
|
||||
"ixdtf",
|
||||
"tinystr",
|
||||
"writeable",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_calendar_data"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7219c8639ab936713a87b571eed2bc2615aa9137e8af6eb221446ee5644acc18"
|
||||
|
||||
[[package]]
|
||||
name = "icu_casemap"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6dc5e74b3c9d7b63e0d7c5fd54ee8c135705df2ea2aa558082dd555dc9747a97"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"icu_casemap_data",
|
||||
"icu_collections",
|
||||
"icu_locale_core",
|
||||
"icu_properties",
|
||||
"icu_provider",
|
||||
"potential_utf",
|
||||
"writeable",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_casemap_data"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7584067558ab4c60c95d1ac2abd1588689cb4bcd4e099507f62dae86ae8d2c0"
|
||||
|
||||
[[package]]
|
||||
name = "icu_collator"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42ad4c6a556938dfd31f75a8c54141079e8821dc697ffb799cfe0f0fa11f2edc"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"icu_collator_data",
|
||||
"icu_collections",
|
||||
"icu_locale",
|
||||
"icu_locale_core",
|
||||
"icu_normalizer",
|
||||
"icu_properties",
|
||||
"icu_provider",
|
||||
"smallvec",
|
||||
"utf16_iter",
|
||||
"utf8_iter",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_collator_data"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d880b8e680799eabd90c054e1b95526cd48db16c95269f3c89fb3117e1ac92c5"
|
||||
|
||||
[[package]]
|
||||
name = "icu_collections"
|
||||
version = "2.0.0"
|
||||
@ -2914,6 +3041,139 @@ dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_datetime"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0790c15e3d6ae3303365fa2337b4f6469de257916141110d14dcaf73f1d31ac5"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"either",
|
||||
"fixed_decimal",
|
||||
"icu_calendar",
|
||||
"icu_datetime_data",
|
||||
"icu_decimal",
|
||||
"icu_locale",
|
||||
"icu_locale_core",
|
||||
"icu_pattern",
|
||||
"icu_plurals",
|
||||
"icu_provider",
|
||||
"icu_time",
|
||||
"potential_utf",
|
||||
"smallvec",
|
||||
"tinystr",
|
||||
"writeable",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_datetime_data"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83791ac10bb7b774f130bb81fa89c4059de710dcef53caa0b86e645212d6d54c"
|
||||
|
||||
[[package]]
|
||||
name = "icu_decimal"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fec61c43fdc4e368a9f450272833123a8ef0d7083a44597660ce94d791b8a2e2"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"fixed_decimal",
|
||||
"icu_decimal_data",
|
||||
"icu_locale",
|
||||
"icu_locale_core",
|
||||
"icu_provider",
|
||||
"serde",
|
||||
"tinystr",
|
||||
"writeable",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_decimal_data"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b70963bc35f9bdf1bc66a5c1f458f4991c1dc71760e00fa06016b2c76b2738d5"
|
||||
|
||||
[[package]]
|
||||
name = "icu_experimental"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebe3d7e64892a434b08d5a58b53127e47a095ff780305f563c8c01798a1051b0"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"either",
|
||||
"fixed_decimal",
|
||||
"icu_casemap",
|
||||
"icu_collections",
|
||||
"icu_decimal",
|
||||
"icu_experimental_data",
|
||||
"icu_list",
|
||||
"icu_locale",
|
||||
"icu_locale_core",
|
||||
"icu_normalizer",
|
||||
"icu_pattern",
|
||||
"icu_plurals",
|
||||
"icu_properties",
|
||||
"icu_provider",
|
||||
"litemap",
|
||||
"num-bigint",
|
||||
"num-rational 0.4.2",
|
||||
"num-traits",
|
||||
"potential_utf",
|
||||
"smallvec",
|
||||
"tinystr",
|
||||
"writeable",
|
||||
"zerofrom",
|
||||
"zerotrie",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_experimental_data"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b60d32ba5610adfc2083f5a759f55d9a9082ebf72750f126cb1630844eea1acf"
|
||||
|
||||
[[package]]
|
||||
name = "icu_list"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e26f94ec776bb8b28cedc7dcf91033b822c5cb4c1783cf7a3f796fc168aa0c8b"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"icu_list_data",
|
||||
"icu_locale",
|
||||
"icu_provider",
|
||||
"regex-automata 0.4.9",
|
||||
"serde",
|
||||
"writeable",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_list_data"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a456a2412458ca45e181d9d51c5090ef8cd90f5692e11d34bafab3b3be1c76b"
|
||||
|
||||
[[package]]
|
||||
name = "icu_locale"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ae5921528335e91da1b6c695dbf1ec37df5ac13faa3f91e5640be93aa2fbefd"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"icu_collections",
|
||||
"icu_locale_core",
|
||||
"icu_locale_data",
|
||||
"icu_provider",
|
||||
"potential_utf",
|
||||
"tinystr",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_locale_core"
|
||||
version = "2.0.0"
|
||||
@ -2927,6 +3187,12 @@ dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_locale_data"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fdef0c124749d06a743c69e938350816554eb63ac979166590e2b4ee4252765"
|
||||
|
||||
[[package]]
|
||||
name = "icu_normalizer"
|
||||
version = "2.0.0"
|
||||
@ -2939,6 +3205,9 @@ dependencies = [
|
||||
"icu_properties",
|
||||
"icu_provider",
|
||||
"smallvec",
|
||||
"utf16_iter",
|
||||
"utf8_iter",
|
||||
"write16",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
@ -2948,6 +3217,39 @@ version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
|
||||
|
||||
[[package]]
|
||||
name = "icu_pattern"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "983825f401e6bc4a13c45d552ffd9ad6f3f6b6bc0ec03f31d6835a90a46deb1f"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"either",
|
||||
"writeable",
|
||||
"yoke",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_plurals"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fd83a65f58b6f28e1f3da8c6ada6b415ee3ad5cb480b75bdb669f34d72dd179"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"fixed_decimal",
|
||||
"icu_locale",
|
||||
"icu_plurals_data",
|
||||
"icu_provider",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_plurals_data"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ec552d761eaf4a1c39ad28936e0af77a41bf01ff756ea54be4f8bfc21c265d7"
|
||||
|
||||
[[package]]
|
||||
name = "icu_properties"
|
||||
version = "2.0.1"
|
||||
@ -2987,6 +3289,56 @@ dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_segmenter"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e185fc13b6401c138cf40db12b863b35f5edf31b88192a545857b41aeaf7d3d3"
|
||||
dependencies = [
|
||||
"core_maths",
|
||||
"displaydoc",
|
||||
"icu_collections",
|
||||
"icu_locale",
|
||||
"icu_locale_core",
|
||||
"icu_provider",
|
||||
"icu_segmenter_data",
|
||||
"potential_utf",
|
||||
"utf8_iter",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_segmenter_data"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5360a2fbe97f617c4f8b944356dedb36d423f7da7f13c070995cf89e59f01220"
|
||||
|
||||
[[package]]
|
||||
name = "icu_time"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10d01a4a2dcbc5e5180ef113920e7461d0e9caaddb3567d81c4eca262efe55c0"
|
||||
dependencies = [
|
||||
"calendrical_calculations",
|
||||
"displaydoc",
|
||||
"icu_calendar",
|
||||
"icu_locale_core",
|
||||
"icu_provider",
|
||||
"icu_time_data",
|
||||
"ixdtf",
|
||||
"serde",
|
||||
"tinystr",
|
||||
"writeable",
|
||||
"zerotrie",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_time_data"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8472be4410d26a03d7208cae3a76c798dd6766e8226ab977cd8b2d349a6dbf08"
|
||||
|
||||
[[package]]
|
||||
name = "ident_case"
|
||||
version = "1.0.1"
|
||||
@ -3219,6 +3571,15 @@ version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
||||
|
||||
[[package]]
|
||||
name = "ixdtf"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8289f7f711a1a51f80e2e368355d023042ca55d8d554fd5e953f01464c15842d"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.33"
|
||||
@ -4154,6 +4515,15 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.50.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399"
|
||||
dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num"
|
||||
version = "0.2.1"
|
||||
@ -4163,7 +4533,7 @@ dependencies = [
|
||||
"num-complex",
|
||||
"num-integer",
|
||||
"num-iter",
|
||||
"num-rational",
|
||||
"num-rational 0.2.4",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
@ -4241,6 +4611,17 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-rational"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
|
||||
dependencies = [
|
||||
"num-bigint",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
@ -4869,6 +5250,7 @@ version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
@ -5252,6 +5634,8 @@ dependencies = [
|
||||
"futures",
|
||||
"html-escape",
|
||||
"http",
|
||||
"icu",
|
||||
"icu_properties",
|
||||
"inquire",
|
||||
"insta",
|
||||
"ipnetwork",
|
||||
@ -5265,6 +5649,8 @@ dependencies = [
|
||||
"mockito",
|
||||
"moka",
|
||||
"nanoid",
|
||||
"nom",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"opendal",
|
||||
"openidconnect",
|
||||
@ -5296,6 +5682,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"tracing-appender",
|
||||
"tracing-subscriber",
|
||||
"tracing-tree",
|
||||
"typed-builder 0.21.0",
|
||||
"url",
|
||||
"util",
|
||||
@ -7456,7 +7843,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
|
||||
dependencies = [
|
||||
"matchers",
|
||||
"nu-ansi-term",
|
||||
"nu-ansi-term 0.46.0",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
@ -7470,6 +7857,18 @@ dependencies = [
|
||||
"tracing-serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-tree"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f459ca79f1b0d5f71c54ddfde6debfc59c8b6eeb46808ae492077f739dc7b49c"
|
||||
dependencies = [
|
||||
"nu-ansi-term 0.50.1",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "try-lock"
|
||||
version = "0.2.5"
|
||||
@ -7711,6 +8110,12 @@ version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||
|
||||
[[package]]
|
||||
name = "utf16_iter"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-width"
|
||||
version = "0.1.7"
|
||||
@ -8296,11 +8701,20 @@ dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "write16"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
|
||||
|
||||
[[package]]
|
||||
name = "writeable"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wyz"
|
||||
|
@ -129,6 +129,11 @@ nanoid = "0.4.0"
|
||||
jwtk = "0.4.0"
|
||||
percent-encoding = "2.3.1"
|
||||
mime_guess = "2.0.5"
|
||||
nom = "8.0.0"
|
||||
icu_properties = "2.0.1"
|
||||
icu = "2.0.0"
|
||||
num-traits = "0.2.19"
|
||||
tracing-tree = "0.4.0"
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -5,6 +5,7 @@ use std::{
|
||||
};
|
||||
|
||||
use async_graphql::{InputObject, SimpleObject};
|
||||
use async_stream::try_stream;
|
||||
use fetch::fetch_bytes;
|
||||
use futures::{Stream, TryStreamExt, pin_mut, try_join};
|
||||
use maplit::hashmap;
|
||||
@ -292,17 +293,19 @@ impl SubscriptionTrait for MikanSeasonSubscription {
|
||||
}
|
||||
|
||||
async fn sync_feeds_incremental(&self, ctx: Arc<dyn AppContextTrait>) -> RecorderResult<()> {
|
||||
let rss_item_list = self
|
||||
.get_rss_item_list_from_subsribed_url_rss_link(ctx.as_ref())
|
||||
.await?;
|
||||
let rss_item_stream = self.get_rss_item_stream_from_subsribed_url_rss_link(ctx.as_ref());
|
||||
|
||||
sync_mikan_feeds_from_rss_item_list(
|
||||
ctx.as_ref(),
|
||||
rss_item_list,
|
||||
self.get_subscriber_id(),
|
||||
self.get_subscription_id(),
|
||||
)
|
||||
.await?;
|
||||
pin_mut!(rss_item_stream);
|
||||
|
||||
while let Some(rss_item_chunk_list) = rss_item_stream.try_next().await? {
|
||||
sync_mikan_feeds_from_rss_item_list(
|
||||
ctx.as_ref(),
|
||||
rss_item_chunk_list,
|
||||
self.get_subscriber_id(),
|
||||
self.get_subscription_id(),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -393,48 +396,53 @@ impl MikanSeasonSubscription {
|
||||
)
|
||||
}
|
||||
|
||||
#[tracing::instrument(err, skip(ctx))]
|
||||
async fn get_rss_item_list_from_subsribed_url_rss_link(
|
||||
fn get_rss_item_stream_from_subsribed_url_rss_link(
|
||||
&self,
|
||||
ctx: &dyn AppContextTrait,
|
||||
) -> RecorderResult<Vec<MikanRssEpisodeItem>> {
|
||||
let db = ctx.db();
|
||||
) -> impl Stream<Item = RecorderResult<Vec<MikanRssEpisodeItem>>> {
|
||||
try_stream! {
|
||||
|
||||
let subscribed_bangumi_list = bangumi::Entity::find()
|
||||
.filter(
|
||||
Condition::all()
|
||||
.add(subscription_bangumi::Column::SubscriptionId.eq(self.subscription_id)),
|
||||
)
|
||||
.join_rev(
|
||||
JoinType::InnerJoin,
|
||||
subscription_bangumi::Relation::Bangumi.def(),
|
||||
)
|
||||
.all(db)
|
||||
.await?;
|
||||
let db = ctx.db();
|
||||
|
||||
let mut rss_item_list = vec![];
|
||||
for subscribed_bangumi in subscribed_bangumi_list {
|
||||
let rss_url = subscribed_bangumi
|
||||
.rss_link
|
||||
.with_whatever_context::<_, String, RecorderError>(|| {
|
||||
format!(
|
||||
"rss_link is required, subscription_id = {}, bangumi_name = {}",
|
||||
self.subscription_id, subscribed_bangumi.display_name
|
||||
)
|
||||
})?;
|
||||
let bytes = fetch_bytes(ctx.mikan(), rss_url).await?;
|
||||
let subscribed_bangumi_list = bangumi::Entity::find()
|
||||
.filter(
|
||||
Condition::all()
|
||||
.add(subscription_bangumi::Column::SubscriptionId.eq(self.subscription_id)),
|
||||
)
|
||||
.join_rev(
|
||||
JoinType::InnerJoin,
|
||||
subscription_bangumi::Relation::Bangumi.def(),
|
||||
)
|
||||
.all(db)
|
||||
.await?;
|
||||
|
||||
let channel = rss::Channel::read_from(&bytes[..])?;
|
||||
|
||||
for (idx, item) in channel.items.into_iter().enumerate() {
|
||||
let item = MikanRssEpisodeItem::try_from(item)
|
||||
.with_whatever_context::<_, String, RecorderError>(|_| {
|
||||
format!("failed to extract rss item at idx {idx}")
|
||||
for subscribed_bangumi in subscribed_bangumi_list {
|
||||
let rss_url = subscribed_bangumi
|
||||
.rss_link
|
||||
.with_whatever_context::<_, String, RecorderError>(|| {
|
||||
format!(
|
||||
"rss_link is required, subscription_id = {}, bangumi_name = {}",
|
||||
self.subscription_id, subscribed_bangumi.display_name
|
||||
)
|
||||
})?;
|
||||
rss_item_list.push(item);
|
||||
let bytes = fetch_bytes(ctx.mikan(), rss_url).await?;
|
||||
|
||||
let channel = rss::Channel::read_from(&bytes[..])?;
|
||||
|
||||
let mut rss_item_list = vec![];
|
||||
|
||||
for (idx, item) in channel.items.into_iter().enumerate() {
|
||||
let item = MikanRssEpisodeItem::try_from(item)
|
||||
.with_whatever_context::<_, String, RecorderError>(|_| {
|
||||
format!("failed to extract rss item at idx {idx}")
|
||||
})?;
|
||||
rss_item_list.push(item);
|
||||
}
|
||||
|
||||
yield rss_item_list;
|
||||
}
|
||||
}
|
||||
Ok(rss_item_list)
|
||||
}
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,845 +0,0 @@
|
||||
/**
|
||||
* @TODO: rewrite with nom
|
||||
*/
|
||||
use std::borrow::Cow;
|
||||
|
||||
use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::whatever;
|
||||
|
||||
use crate::{
|
||||
errors::RecorderResult,
|
||||
extract::defs::{DIGIT_1PLUS_REG, ZH_NUM_MAP, ZH_NUM_RE},
|
||||
};
|
||||
|
||||
const NAME_EXTRACT_REPLACE_ADHOC1_REPLACED: &str = "$1/$2";
|
||||
|
||||
lazy_static! {
|
||||
static ref TITLE_RE: Regex = Regex::new(
|
||||
r#"(.*|\[.*])( -? \d+|\[\d+]|\[\d+.?[vV]\d]|第\d+[话話集]|\[第?\d+[话話集]]|\[\d+.?END]|[Ee][Pp]?\d+|\[\s*\d+\s*[\-\~]\s*\d+\s*\p{scx=Han}*[话話集]\s*])(.*)"#
|
||||
).unwrap();
|
||||
static ref EP_COLLECTION_RE:Regex = Regex::new(r#"\[?\s*\d+\s*[\-\~]\s*\d+\s*\p{scx=Han}*合?[话話集]\s*]?"#).unwrap();
|
||||
static ref MOVIE_TITLE_RE:Regex = Regex::new(r#"(.*|\[.*])(剧场版|[Mm]ovie|电影)(.*?)$"#).unwrap();
|
||||
static ref RESOLUTION_RE: Regex = Regex::new(r"1080|720|2160|4K|2K").unwrap();
|
||||
static ref SOURCE_L1_RE: Regex = Regex::new(r"B-Global|[Bb]aha|[Bb]ilibili|AT-X|W[Ee][Bb][Rr][Ii][Pp]|Sentai|B[Dd][Rr][Ii][Pp]|UHD[Rr][Ii][Pp]|NETFLIX").unwrap();
|
||||
static ref SOURCE_L2_RE: Regex = Regex::new(r"AMZ|CR|W[Ee][Bb]|B[Dd]").unwrap();
|
||||
static ref SUB_RE: Regex = Regex::new(r"[简繁日字幕]|CH|BIG5|GB").unwrap();
|
||||
static ref PREFIX_RE: Regex =
|
||||
Regex::new(r"[^\w\s\p{Unified_Ideograph}\p{scx=Han}\p{scx=Hira}\p{scx=Kana}-]").unwrap();
|
||||
static ref EN_BRACKET_SPLIT_RE: Regex = Regex::new(r"[\[\]]").unwrap();
|
||||
static ref MOVIE_SEASON_EXTRACT_RE: Regex = Regex::new(r"剧场版|Movie|电影").unwrap();
|
||||
static ref MAIN_TITLE_PREFIX_PROCESS_RE1: Regex = Regex::new(r"新番|月?番").unwrap();
|
||||
static ref MAIN_TITLE_PREFIX_PROCESS_RE2: Regex = Regex::new(r"[港澳台]{1,3}地区").unwrap();
|
||||
static ref MAIN_TITLE_PRE_PROCESS_BACKETS_RE: Regex = Regex::new(r"\[.+\]").unwrap();
|
||||
static ref MAIN_TITLE_PRE_PROCESS_BACKETS_RE_SUB1: Regex = Regex::new(r"^.*?\[").unwrap();
|
||||
static ref SEASON_EXTRACT_SEASON_ALL_RE: Regex = Regex::new(r"S\d{1,2}|Season \d{1,2}|[第].[季期]|1st|2nd|3rd|\d{1,2}th").unwrap();
|
||||
static ref SEASON_EXTRACT_SEASON_EN_PREFIX_RE: Regex = Regex::new(r"Season|S").unwrap();
|
||||
static ref SEASON_EXTRACT_SEASON_EN_NTH_RE: Regex = Regex::new(r"1st|2nd|3rd|\d{1,2}th").unwrap();
|
||||
static ref SEASON_EXTRACT_SEASON_ZH_PREFIX_RE: Regex = Regex::new(r"[第 ].*[季期(部分)]|部分").unwrap();
|
||||
static ref SEASON_EXTRACT_SEASON_ZH_PREFIX_SUB_RE: Regex = Regex::new(r"[第季期 ]").unwrap();
|
||||
static ref NAME_EXTRACT_REMOVE_RE: Regex = Regex::new(r"[((]仅限[港澳台]{1,3}地区[))]").unwrap();
|
||||
static ref NAME_EXTRACT_SPLIT_RE: Regex = Regex::new(r"/|\s{2}|-\s{2}|\]\[").unwrap();
|
||||
static ref NAME_EXTRACT_REPLACE_ADHOC1_RE: Regex = Regex::new(r"([\p{scx=Han}\s\(\)]{5,})_([a-zA-Z]{2,})").unwrap();
|
||||
static ref NAME_JP_TEST: Regex = Regex::new(r"[\p{scx=Hira}\p{scx=Kana}]{2,}").unwrap();
|
||||
static ref NAME_ZH_TEST: Regex = Regex::new(r"[\p{scx=Han}]{2,}").unwrap();
|
||||
static ref NAME_EN_TEST: Regex = Regex::new(r"[a-zA-Z]{3,}").unwrap();
|
||||
static ref TAGS_EXTRACT_SPLIT_RE: Regex = Regex::new(r"[\[\]()()_]").unwrap();
|
||||
static ref CLEAR_SUB_RE: Regex = Regex::new(r"_MP4|_MKV").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||
pub struct RawEpisodeMeta {
|
||||
pub name_en: Option<String>,
|
||||
pub name_en_no_season: Option<String>,
|
||||
pub name_jp: Option<String>,
|
||||
pub name_jp_no_season: Option<String>,
|
||||
pub name_zh: Option<String>,
|
||||
pub name_zh_no_season: Option<String>,
|
||||
pub season: i32,
|
||||
pub season_raw: Option<String>,
|
||||
pub episode_index: i32,
|
||||
pub subtitle: Option<String>,
|
||||
pub source: Option<String>,
|
||||
pub fansub: Option<String>,
|
||||
pub resolution: Option<String>,
|
||||
}
|
||||
|
||||
fn extract_fansub(raw_name: &str) -> Option<&str> {
|
||||
let mut groups = EN_BRACKET_SPLIT_RE.splitn(raw_name, 3);
|
||||
groups.nth(1)
|
||||
}
|
||||
|
||||
fn replace_ch_bracket_to_en(raw_name: &str) -> String {
|
||||
raw_name.replace('【', "[").replace('】', "]")
|
||||
}
|
||||
|
||||
fn title_body_pre_process(title_body: &str, fansub: Option<&str>) -> RecorderResult<String> {
|
||||
let raw_without_fansub = if let Some(fansub) = fansub {
|
||||
let fan_sub_re = Regex::new(&format!(".{fansub}."))?;
|
||||
fan_sub_re.replace_all(title_body, "")
|
||||
} else {
|
||||
Cow::Borrowed(title_body)
|
||||
};
|
||||
let raw_with_prefix_replaced = PREFIX_RE.replace_all(&raw_without_fansub, "/");
|
||||
let mut arg_group = raw_with_prefix_replaced
|
||||
.split('/')
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if arg_group.len() == 1 {
|
||||
arg_group = arg_group.first_mut().unwrap().split(' ').collect();
|
||||
}
|
||||
let mut raw = raw_without_fansub.to_string();
|
||||
for arg in arg_group.iter() {
|
||||
if (arg_group.len() <= 5 && MAIN_TITLE_PREFIX_PROCESS_RE1.is_match(arg))
|
||||
|| (MAIN_TITLE_PREFIX_PROCESS_RE2.is_match(arg))
|
||||
{
|
||||
let sub = Regex::new(&format!(".{arg}."))?;
|
||||
raw = sub.replace_all(&raw, "").to_string();
|
||||
}
|
||||
}
|
||||
if let Some(m) = MAIN_TITLE_PRE_PROCESS_BACKETS_RE.find(&raw)
|
||||
&& m.len() as f32 > (raw.len() as f32) * 0.5
|
||||
{
|
||||
let mut raw1 = MAIN_TITLE_PRE_PROCESS_BACKETS_RE_SUB1
|
||||
.replace(&raw, "")
|
||||
.chars()
|
||||
.collect_vec();
|
||||
while let Some(ch) = raw1.pop() {
|
||||
if ch == ']' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw = raw1.into_iter().collect();
|
||||
}
|
||||
Ok(raw.to_string())
|
||||
}
|
||||
|
||||
pub fn extract_season_from_title_body(title_body: &str) -> (String, Option<String>, i32) {
|
||||
let name_and_season = EN_BRACKET_SPLIT_RE.replace_all(title_body, " ");
|
||||
let seasons = SEASON_EXTRACT_SEASON_ALL_RE
|
||||
.find(&name_and_season)
|
||||
.into_iter()
|
||||
.map(|s| s.as_str())
|
||||
.collect_vec();
|
||||
|
||||
if seasons.is_empty() {
|
||||
return (title_body.to_string(), None, 1);
|
||||
}
|
||||
|
||||
let mut season = 1;
|
||||
let mut season_raw = None;
|
||||
let name = SEASON_EXTRACT_SEASON_ALL_RE.replace_all(&name_and_season, "");
|
||||
|
||||
for s in seasons {
|
||||
season_raw = Some(s);
|
||||
if let Some(m) = SEASON_EXTRACT_SEASON_EN_PREFIX_RE.find(s)
|
||||
&& let Ok(s) = SEASON_EXTRACT_SEASON_ALL_RE
|
||||
.replace_all(m.as_str(), "")
|
||||
.parse::<i32>()
|
||||
{
|
||||
season = s;
|
||||
break;
|
||||
}
|
||||
if let Some(m) = SEASON_EXTRACT_SEASON_EN_NTH_RE.find(s)
|
||||
&& let Some(s) = DIGIT_1PLUS_REG
|
||||
.find(m.as_str())
|
||||
.and_then(|s| s.as_str().parse::<i32>().ok())
|
||||
{
|
||||
season = s;
|
||||
break;
|
||||
}
|
||||
if let Some(m) = SEASON_EXTRACT_SEASON_ZH_PREFIX_RE.find(s) {
|
||||
if let Ok(s) = SEASON_EXTRACT_SEASON_ZH_PREFIX_SUB_RE
|
||||
.replace(m.as_str(), "")
|
||||
.parse::<i32>()
|
||||
{
|
||||
season = s;
|
||||
break;
|
||||
}
|
||||
if let Some(m) = ZH_NUM_RE.find(m.as_str()) {
|
||||
season = ZH_NUM_MAP[m.as_str()];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(name.to_string(), season_raw.map(|s| s.to_string()), season)
|
||||
}
|
||||
|
||||
fn extract_name_from_title_body_name_section(
|
||||
title_body_name_section: &str,
|
||||
) -> (Option<String>, Option<String>, Option<String>) {
|
||||
let mut name_en = None;
|
||||
let mut name_zh = None;
|
||||
let mut name_jp = None;
|
||||
let replaced1 = NAME_EXTRACT_REMOVE_RE.replace_all(title_body_name_section, "");
|
||||
let replaced2 = NAME_EXTRACT_REPLACE_ADHOC1_RE
|
||||
.replace_all(&replaced1, NAME_EXTRACT_REPLACE_ADHOC1_REPLACED);
|
||||
let trimmed = replaced2.trim();
|
||||
let mut split = NAME_EXTRACT_SPLIT_RE
|
||||
.split(trimmed)
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
.collect_vec();
|
||||
if split.len() == 1 {
|
||||
let mut split_space = split[0].split(' ').collect_vec();
|
||||
let mut search_indices = vec![0];
|
||||
if split_space.len() > 1 {
|
||||
search_indices.push(split_space.len() - 1);
|
||||
}
|
||||
for i in search_indices {
|
||||
if NAME_ZH_TEST.is_match(split_space[i]) {
|
||||
let chs = split_space[i];
|
||||
split_space.remove(i);
|
||||
split = vec![chs.to_string(), split_space.join(" ")];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for item in split {
|
||||
if NAME_JP_TEST.is_match(&item) && name_jp.is_none() {
|
||||
name_jp = Some(item);
|
||||
} else if NAME_ZH_TEST.is_match(&item) && name_zh.is_none() {
|
||||
name_zh = Some(item);
|
||||
} else if NAME_EN_TEST.is_match(&item) && name_en.is_none() {
|
||||
name_en = Some(item);
|
||||
}
|
||||
}
|
||||
(name_en, name_zh, name_jp)
|
||||
}
|
||||
|
||||
fn extract_episode_index_from_title_episode(title_episode: &str) -> Option<i32> {
|
||||
DIGIT_1PLUS_REG
|
||||
.find(title_episode)?
|
||||
.as_str()
|
||||
.parse::<i32>()
|
||||
.ok()
|
||||
}
|
||||
|
||||
fn clear_sub(sub: Option<String>) -> Option<String> {
|
||||
sub.map(|s| CLEAR_SUB_RE.replace_all(&s, "").to_string())
|
||||
}
|
||||
|
||||
fn extract_tags_from_title_extra(
|
||||
title_extra: &str,
|
||||
) -> (Option<String>, Option<String>, Option<String>) {
|
||||
let replaced = TAGS_EXTRACT_SPLIT_RE.replace_all(title_extra, " ");
|
||||
let elements = replaced
|
||||
.split(' ')
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect_vec();
|
||||
|
||||
let mut sub = None;
|
||||
let mut resolution = None;
|
||||
let mut source = None;
|
||||
for element in elements.iter() {
|
||||
if SUB_RE.is_match(element) {
|
||||
sub = Some(element.to_string())
|
||||
} else if RESOLUTION_RE.is_match(element) {
|
||||
resolution = Some(element.to_string())
|
||||
} else if SOURCE_L1_RE.is_match(element) {
|
||||
source = Some(element.to_string())
|
||||
}
|
||||
}
|
||||
if source.is_none() {
|
||||
for element in elements {
|
||||
if SOURCE_L2_RE.is_match(element) {
|
||||
source = Some(element.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
(clear_sub(sub), resolution, source)
|
||||
}
|
||||
|
||||
pub fn check_is_movie(title: &str) -> bool {
|
||||
MOVIE_TITLE_RE.is_match(title)
|
||||
}
|
||||
|
||||
pub fn extract_episode_meta_from_origin_name(s: &str) -> RecorderResult<RawEpisodeMeta> {
|
||||
let raw_title = s.trim();
|
||||
let raw_title_without_ch_brackets = replace_ch_bracket_to_en(raw_title);
|
||||
let fansub = extract_fansub(&raw_title_without_ch_brackets);
|
||||
let movie_capture = check_is_movie(&raw_title_without_ch_brackets);
|
||||
if let Some(title_re_match_obj) = MOVIE_TITLE_RE
|
||||
.captures(&raw_title_without_ch_brackets)
|
||||
.or(TITLE_RE.captures(&raw_title_without_ch_brackets))
|
||||
{
|
||||
let mut title_body = title_re_match_obj
|
||||
.get(1)
|
||||
.map(|s| s.as_str().trim())
|
||||
.unwrap_or_else(|| unreachable!("TITLE_RE has at least 3 capture groups"))
|
||||
.to_string();
|
||||
let mut title_episode = title_re_match_obj
|
||||
.get(2)
|
||||
.map(|s| s.as_str().trim())
|
||||
.unwrap_or_else(|| unreachable!("TITLE_RE has at least 3 capture groups"));
|
||||
let title_extra = title_re_match_obj
|
||||
.get(3)
|
||||
.map(|s| s.as_str().trim())
|
||||
.unwrap_or_else(|| unreachable!("TITLE_RE has at least 3 capture groups"));
|
||||
|
||||
if movie_capture {
|
||||
title_body += title_episode;
|
||||
title_episode = "";
|
||||
} else if EP_COLLECTION_RE.is_match(title_episode) {
|
||||
title_episode = "";
|
||||
}
|
||||
|
||||
let title_body = title_body_pre_process(&title_body, fansub)?;
|
||||
let (name_without_season, season_raw, season) = extract_season_from_title_body(&title_body);
|
||||
let (name_en, name_zh, name_jp) = extract_name_from_title_body_name_section(&title_body);
|
||||
let (name_en_no_season, name_zh_no_season, name_jp_no_season) =
|
||||
extract_name_from_title_body_name_section(&name_without_season);
|
||||
let episode_index = extract_episode_index_from_title_episode(title_episode).unwrap_or(1);
|
||||
let (sub, resolution, source) = extract_tags_from_title_extra(title_extra);
|
||||
Ok(RawEpisodeMeta {
|
||||
name_en,
|
||||
name_en_no_season,
|
||||
name_jp,
|
||||
name_jp_no_season,
|
||||
name_zh,
|
||||
name_zh_no_season,
|
||||
season,
|
||||
season_raw,
|
||||
episode_index,
|
||||
subtitle: sub,
|
||||
source,
|
||||
fansub: fansub.map(|s| s.to_string()),
|
||||
resolution,
|
||||
})
|
||||
} else {
|
||||
whatever!("Can not parse episode meta from raw filename {}", raw_title)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::{RawEpisodeMeta, extract_episode_meta_from_origin_name};
|
||||
|
||||
fn test_raw_ep_parser_case(raw_name: &str, expected: &str) {
|
||||
let expected: Option<RawEpisodeMeta> = serde_json::from_str(expected).unwrap_or_default();
|
||||
let found = extract_episode_meta_from_origin_name(raw_name).ok();
|
||||
|
||||
if expected != found {
|
||||
println!(
|
||||
"expected {} and found {} are not equal",
|
||||
serde_json::to_string_pretty(&expected).unwrap(),
|
||||
serde_json::to_string_pretty(&found).unwrap()
|
||||
)
|
||||
}
|
||||
assert_eq!(expected, found);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_all_parts_wrapped() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[新Sub][1月新番][我心里危险的东西 第二季][05][HEVC][10Bit][1080P][简日双语][招募翻译]"#,
|
||||
r#"{
|
||||
"name_zh": "我心里危险的东西",
|
||||
"name_zh_no_season": "我心里危险的东西",
|
||||
"season": 2,
|
||||
"season_raw": "第二季",
|
||||
"episode_index": 5,
|
||||
"subtitle": "简日双语",
|
||||
"source": null,
|
||||
"fansub": "新Sub",
|
||||
"resolution": "1080P"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_title_wrapped_by_one_square_bracket_and_season_prefix() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"【喵萌奶茶屋】★01月新番★[我内心的糟糕念头 / Boku no Kokoro no Yabai Yatsu][18][1080p][简日双语][招募翻译]"#,
|
||||
r#"{
|
||||
"name_en": "Boku no Kokoro no Yabai Yatsu",
|
||||
"name_en_no_season": "Boku no Kokoro no Yabai Yatsu",
|
||||
"name_zh": "我内心的糟糕念头",
|
||||
"name_zh_no_season": "我内心的糟糕念头",
|
||||
"season": 1,
|
||||
"season_raw": null,
|
||||
"episode_index": 18,
|
||||
"subtitle": "简日双语",
|
||||
"source": null,
|
||||
"fansub": "喵萌奶茶屋",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_ep_and_version() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[LoliHouse] 因为不是真正的伙伴而被逐出勇者队伍,流落到边境展开慢活人生 2nd / Shin no Nakama 2nd - 08v2 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕]"#,
|
||||
r#"{
|
||||
"name_en": "Shin no Nakama 2nd",
|
||||
"name_en_no_season": "Shin no Nakama",
|
||||
"name_zh": "因为不是真正的伙伴而被逐出勇者队伍,流落到边境展开慢活人生 2nd",
|
||||
"name_zh_no_season": "因为不是真正的伙伴而被逐出勇者队伍,流落到边境展开慢活人生",
|
||||
"season": 2,
|
||||
"season_raw": "2nd",
|
||||
"episode_index": 8,
|
||||
"subtitle": "简繁内封字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "LoliHouse",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_en_title_only() {
|
||||
test_raw_ep_parser_case(
|
||||
r"[动漫国字幕组&LoliHouse] THE MARGINAL SERVICE - 08 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕]",
|
||||
r#"{
|
||||
"name_en": "THE MARGINAL SERVICE",
|
||||
"name_en_no_season": "THE MARGINAL SERVICE",
|
||||
"season": 1,
|
||||
"episode_index": 8,
|
||||
"subtitle": "简繁内封字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "动漫国字幕组&LoliHouse",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_two_zh_title() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[LoliHouse] 事与愿违的不死冒险者 / 非自愿的不死冒险者 / Nozomanu Fushi no Boukensha - 01 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕]"#,
|
||||
r#"{
|
||||
"name_en": "Nozomanu Fushi no Boukensha",
|
||||
"name_en_no_season": "Nozomanu Fushi no Boukensha",
|
||||
"name_zh": "事与愿违的不死冒险者",
|
||||
"name_zh_no_season": "事与愿违的不死冒险者",
|
||||
"season": 1,
|
||||
"season_raw": null,
|
||||
"episode_index": 1,
|
||||
"subtitle": "简繁内封字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "LoliHouse",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_en_zh_jp_titles() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[喵萌奶茶屋&LoliHouse] 碰之道 / ぽんのみち / Pon no Michi - 07 [WebRip 1080p HEVC-10bit AAC][简繁日内封字幕]"#,
|
||||
r#"{
|
||||
"name_en": "Pon no Michi",
|
||||
"name_jp": "ぽんのみち",
|
||||
"name_zh": "碰之道",
|
||||
"name_en_no_season": "Pon no Michi",
|
||||
"name_jp_no_season": "ぽんのみち",
|
||||
"name_zh_no_season": "碰之道",
|
||||
"season": 1,
|
||||
"season_raw": null,
|
||||
"episode_index": 7,
|
||||
"subtitle": "简繁日内封字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "喵萌奶茶屋&LoliHouse",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_nth_season() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[ANi] Yowai Character Tomozakikun / 弱角友崎同学 2nd STAGE - 09 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#,
|
||||
r#"{
|
||||
"name_en": "Yowai Character Tomozakikun",
|
||||
"name_en_no_season": "Yowai Character Tomozakikun",
|
||||
"name_zh": "弱角友崎同学 2nd STAGE",
|
||||
"name_zh_no_season": "弱角友崎同学",
|
||||
"season": 2,
|
||||
"season_raw": "2nd",
|
||||
"episode_index": 9,
|
||||
"subtitle": "CHT",
|
||||
"source": "Baha",
|
||||
"fansub": "ANi",
|
||||
"resolution": "1080P"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_season_en_and_season_zh() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[豌豆字幕组&LoliHouse] 王者天下 第五季 / Kingdom S5 - 07 [WebRip 1080p HEVC-10bit AAC][简繁外挂字幕]"#,
|
||||
r#"{
|
||||
"name_en": "Kingdom S5",
|
||||
"name_en_no_season": "Kingdom",
|
||||
"name_zh": "王者天下 第五季",
|
||||
"name_zh_no_season": "王者天下",
|
||||
"season": 5,
|
||||
"season_raw": "第五季",
|
||||
"episode_index": 7,
|
||||
"subtitle": "简繁外挂字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "豌豆字幕组&LoliHouse",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_airota_fansub_style_case1() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"【千夏字幕组】【爱丽丝与特蕾丝的虚幻工厂_Alice to Therese no Maboroshi Koujou】[剧场版][WebRip_1080p_HEVC][简繁内封][招募新人]"#,
|
||||
r#"{
|
||||
"name_en": "Alice to Therese no Maboroshi Koujou",
|
||||
"name_en_no_season": "Alice to Therese no Maboroshi Koujou",
|
||||
"name_zh": "爱丽丝与特蕾丝的虚幻工厂",
|
||||
"name_zh_no_season": "爱丽丝与特蕾丝的虚幻工厂",
|
||||
"season": 1,
|
||||
"episode_index": 1,
|
||||
"subtitle": "简繁内封",
|
||||
"source": "WebRip",
|
||||
"fansub": "千夏字幕组",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_airota_fansub_style_case2() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[千夏字幕组&喵萌奶茶屋][电影 轻旅轻营 (摇曳露营) _Yuru Camp Movie][剧场版][UHDRip_2160p_HEVC][繁体][千夏15周年]"#,
|
||||
r#"{
|
||||
"name_en": "Yuru Camp Movie",
|
||||
"name_en_no_season": "Yuru Camp Movie",
|
||||
"name_zh": "电影 轻旅轻营 (摇曳露营)",
|
||||
"name_zh_no_season": "电影 轻旅轻营 (摇曳露营)",
|
||||
"season": 1,
|
||||
"episode_index": 1,
|
||||
"subtitle": "繁体",
|
||||
"source": "UHDRip",
|
||||
"fansub": "千夏字幕组&喵萌奶茶屋",
|
||||
"resolution": "2160p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_large_episode_style() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]"#,
|
||||
r#"{
|
||||
"name_en": "New Doraemon",
|
||||
"name_en_no_season": "New Doraemon",
|
||||
"name_zh": "哆啦A梦新番",
|
||||
"name_zh_no_season": "哆啦A梦新番",
|
||||
"season": 1,
|
||||
"episode_index": 747,
|
||||
"subtitle": "GB",
|
||||
"fansub": "梦蓝字幕组",
|
||||
"resolution": "1080P"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_many_square_brackets_split_title() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"【MCE汉化组】[剧场版-摇曳露营][Yuru Camp][Movie][简日双语][1080P][x264 AAC]"#,
|
||||
r#"{
|
||||
"name_en": "Yuru Camp",
|
||||
"name_en_no_season": "Yuru Camp",
|
||||
"name_zh": "剧场版-摇曳露营",
|
||||
"name_zh_no_season": "剧场版-摇曳露营",
|
||||
"season": 1,
|
||||
"episode_index": 1,
|
||||
"subtitle": "简日双语",
|
||||
"fansub": "MCE汉化组",
|
||||
"resolution": "1080P"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_implicit_lang_title_sep() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[织梦字幕组][尼尔:机械纪元 NieR Automata Ver1.1a][02集][1080P][AVC][简日双语]"#,
|
||||
r#"{
|
||||
"name_en": "NieR Automata Ver1.1a",
|
||||
"name_en_no_season": "NieR Automata Ver1.1a",
|
||||
"name_zh": "尼尔:机械纪元",
|
||||
"name_zh_no_season": "尼尔:机械纪元",
|
||||
"season": 1,
|
||||
"episode_index": 2,
|
||||
"subtitle": "简日双语",
|
||||
"fansub": "织梦字幕组",
|
||||
"resolution": "1080P"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_square_brackets_wrapped_and_space_split() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[天月搬运组][迷宫饭 Delicious in Dungeon][03][日语中字][MKV][1080P][NETFLIX][高画质版]"#,
|
||||
r#"
|
||||
{
|
||||
"name_en": "Delicious in Dungeon",
|
||||
"name_en_no_season": "Delicious in Dungeon",
|
||||
"name_zh": "迷宫饭",
|
||||
"name_zh_no_season": "迷宫饭",
|
||||
"season": 1,
|
||||
"episode_index": 3,
|
||||
"subtitle": "日语中字",
|
||||
"source": "NETFLIX",
|
||||
"fansub": "天月搬运组",
|
||||
"resolution": "1080P"
|
||||
}
|
||||
"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_start_with_brackets_wrapped_season_info_prefix() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[爱恋字幕社][1月新番][迷宫饭][Dungeon Meshi][01][1080P][MP4][简日双语] "#,
|
||||
r#"{
|
||||
"name_en": "Dungeon Meshi",
|
||||
"name_en_no_season": "Dungeon Meshi",
|
||||
"name_zh": "迷宫饭",
|
||||
"name_zh_no_season": "迷宫饭",
|
||||
"season": 1,
|
||||
"episode_index": 1,
|
||||
"subtitle": "简日双语",
|
||||
"fansub": "爱恋字幕社",
|
||||
"resolution": "1080P"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_small_no_title_extra_brackets_case() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[ANi] Mahou Shoujo ni Akogarete / 梦想成为魔法少女 [年龄限制版] - 09 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#,
|
||||
r#"{
|
||||
"name_en": "Mahou Shoujo ni Akogarete",
|
||||
"name_en_no_season": "Mahou Shoujo ni Akogarete",
|
||||
"name_zh": "梦想成为魔法少女 [年龄限制版]",
|
||||
"name_zh_no_season": "梦想成为魔法少女 [年龄限制版]",
|
||||
"season": 1,
|
||||
"episode_index": 9,
|
||||
"subtitle": "CHT",
|
||||
"source": "Baha",
|
||||
"fansub": "ANi",
|
||||
"resolution": "1080P"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_title_leading_space_style() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[ANi] 16bit 的感动 ANOTHER LAYER - 01 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#,
|
||||
r#"{
|
||||
"name_zh": "16bit 的感动 ANOTHER LAYER",
|
||||
"name_zh_no_season": "16bit 的感动 ANOTHER LAYER",
|
||||
"season": 1,
|
||||
"season_raw": null,
|
||||
"episode_index": 1,
|
||||
"subtitle": "CHT",
|
||||
"source": "Baha",
|
||||
"fansub": "ANi",
|
||||
"resolution": "1080P"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_title_leading_month_and_wrapped_brackets_style() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"【喵萌奶茶屋】★07月新番★[银砂糖师与黑妖精 ~ Sugar Apple Fairy Tale ~][13][1080p][简日双语][招募翻译]"#,
|
||||
r#"{
|
||||
"name_en": "~ Sugar Apple Fairy Tale ~",
|
||||
"name_en_no_season": "~ Sugar Apple Fairy Tale ~",
|
||||
"name_zh": "银砂糖师与黑妖精",
|
||||
"name_zh_no_season": "银砂糖师与黑妖精",
|
||||
"season": 1,
|
||||
"episode_index": 13,
|
||||
"subtitle": "简日双语",
|
||||
"fansub": "喵萌奶茶屋",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_title_leading_month_style() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"【极影字幕社】★4月新番 天国大魔境 Tengoku Daimakyou 第05话 GB 720P MP4(字幕社招人内详)"#,
|
||||
r#"{
|
||||
"name_en": "Tengoku Daimakyou",
|
||||
"name_en_no_season": "Tengoku Daimakyou",
|
||||
"name_zh": "天国大魔境",
|
||||
"name_zh_no_season": "天国大魔境",
|
||||
"season": 1,
|
||||
"episode_index": 5,
|
||||
"subtitle": "字幕社招人内详",
|
||||
"source": null,
|
||||
"fansub": "极影字幕社",
|
||||
"resolution": "720P"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_tokusatsu_style() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[MagicStar] 假面骑士Geats / 仮面ライダーギーツ EP33 [WEBDL] [1080p] [TTFC]【生】"#,
|
||||
r#"{
|
||||
"name_jp": "仮面ライダーギーツ",
|
||||
"name_jp_no_season": "仮面ライダーギーツ",
|
||||
"name_zh": "假面骑士Geats",
|
||||
"name_zh_no_season": "假面骑士Geats",
|
||||
"season": 1,
|
||||
"episode_index": 33,
|
||||
"source": "WEBDL",
|
||||
"fansub": "MagicStar",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ep_with_multi_lang_zh_title() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[百冬练习组&LoliHouse] BanG Dream! 少女乐团派对!☆PICO FEVER! / Garupa Pico: Fever! - 26 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][END] [101.69 MB]"#,
|
||||
r#"{
|
||||
"name_en": "Garupa Pico: Fever!",
|
||||
"name_en_no_season": "Garupa Pico: Fever!",
|
||||
"name_zh": "BanG Dream! 少女乐团派对!☆PICO FEVER!",
|
||||
"name_zh_no_season": "BanG Dream! 少女乐团派对!☆PICO FEVER!",
|
||||
"season": 1,
|
||||
"episode_index": 26,
|
||||
"subtitle": "简繁内封字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "百冬练习组&LoliHouse",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ep_collections() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[奶²&LoliHouse] 蘑菇狗 / Kinokoinu: Mushroom Pup [01-12 精校合集][WebRip 1080p HEVC-10bit AAC][简日内封字幕]"#,
|
||||
r#"{
|
||||
"name_en": "Kinokoinu: Mushroom Pup",
|
||||
"name_en_no_season": "Kinokoinu: Mushroom Pup",
|
||||
"name_zh": "蘑菇狗",
|
||||
"name_zh_no_season": "蘑菇狗",
|
||||
"season": 1,
|
||||
"episode_index": 1,
|
||||
"subtitle": "简日内封字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "奶²&LoliHouse",
|
||||
"resolution": "1080p",
|
||||
"name": " 蘑菇狗 / Kinokoinu: Mushroom Pup [01-12 精校合集]"
|
||||
}"#,
|
||||
);
|
||||
|
||||
test_raw_ep_parser_case(
|
||||
r#"[LoliHouse] 叹气的亡灵想隐退 / Nageki no Bourei wa Intai shitai [01-13 合集][WebRip 1080p HEVC-10bit AAC][简繁内封字幕][Fin]"#,
|
||||
r#"{
|
||||
"name_en": "Nageki no Bourei wa Intai shitai",
|
||||
"name_en_no_season": "Nageki no Bourei wa Intai shitai",
|
||||
"name_jp": null,
|
||||
"name_jp_no_season": null,
|
||||
"name_zh": "叹气的亡灵想隐退",
|
||||
"name_zh_no_season": "叹气的亡灵想隐退",
|
||||
"season": 1,
|
||||
"season_raw": null,
|
||||
"episode_index": 1,
|
||||
"subtitle": "简繁内封字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "LoliHouse",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
);
|
||||
|
||||
test_raw_ep_parser_case(
|
||||
r#"[LoliHouse] 精灵幻想记 第二季 / Seirei Gensouki S2 [01-12 合集][WebRip 1080p HEVC-10bit AAC][简繁内封字幕][Fin]"#,
|
||||
r#"{
|
||||
"name_en": "Seirei Gensouki S2",
|
||||
"name_en_no_season": "Seirei Gensouki",
|
||||
"name_zh": "精灵幻想记 第二季",
|
||||
"name_zh_no_season": "精灵幻想记",
|
||||
"season": 2,
|
||||
"season_raw": "第二季",
|
||||
"episode_index": 1,
|
||||
"subtitle": "简繁内封字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "LoliHouse",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
);
|
||||
|
||||
test_raw_ep_parser_case(
|
||||
r#"[喵萌奶茶屋&LoliHouse] 超自然武装当哒当 / 胆大党 / Dandadan [01-12 精校合集][WebRip 1080p HEVC-10bit AAC][简繁日内封字幕][Fin]"#,
|
||||
r#" {
|
||||
"name_en": "Dandadan",
|
||||
"name_en_no_season": "Dandadan",
|
||||
"name_zh": "超自然武装当哒当",
|
||||
"name_zh_no_season": "超自然武装当哒当",
|
||||
"season": 1,
|
||||
"episode_index": 1,
|
||||
"subtitle": "简繁日内封字幕",
|
||||
"source": "WebRip",
|
||||
"fansub": "喵萌奶茶屋&LoliHouse",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: FIXME
|
||||
#[test]
|
||||
fn test_bad_cases() {
|
||||
test_raw_ep_parser_case(
|
||||
r#"[7³ACG x 桜都字幕组] 摇曳露营△ 剧场版/映画 ゆるキャン△/Eiga Yuru Camp△ [简繁字幕] BDrip 1080p x265 FLAC 2.0"#,
|
||||
r#"{
|
||||
"name_zh": "摇曳露营△剧场版",
|
||||
"name_zh_no_season": "摇曳露营△剧场版",
|
||||
"season": 1,
|
||||
"season_raw": null,
|
||||
"episode_index": 1,
|
||||
"subtitle": "简繁字幕",
|
||||
"source": "BDrip",
|
||||
"fansub": "7³ACG x 桜都字幕组",
|
||||
"resolution": "1080p"
|
||||
}"#,
|
||||
);
|
||||
|
||||
test_raw_ep_parser_case(
|
||||
r#"【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】"#,
|
||||
r#"{
|
||||
"name_en": "第二季 Komi-san wa, Komyushou Desu. S02",
|
||||
"name_en_no_season": "Komi-san wa, Komyushou Desu.",
|
||||
"name_zh": "古见同学有交流障碍症",
|
||||
"name_zh_no_season": "古见同学有交流障碍症",
|
||||
"season": 2,
|
||||
"season_raw": "第二季",
|
||||
"episode_index": 22,
|
||||
"subtitle": "GB",
|
||||
"fansub": "幻樱字幕组",
|
||||
"resolution": "1920X1080"
|
||||
}"#,
|
||||
);
|
||||
}
|
||||
}
|
@ -17,7 +17,7 @@ use crate::{
|
||||
MikanBangumiHash, MikanBangumiMeta, build_mikan_bangumi_subscription_rss_url,
|
||||
scrape_mikan_poster_meta_from_image_url,
|
||||
},
|
||||
origin::extract_season_from_title_body,
|
||||
origin::{OriginCompTrait, SeasonComp},
|
||||
},
|
||||
};
|
||||
|
||||
@ -123,7 +123,11 @@ impl ActiveModel {
|
||||
let mikan_client = ctx.mikan();
|
||||
let storage_service = ctx.storage();
|
||||
let mikan_base_url = mikan_client.base_url();
|
||||
let (_, season_raw, season_index) = extract_season_from_title_body(&meta.bangumi_title);
|
||||
let season_comp = SeasonComp::parse_comp(&meta.bangumi_title)
|
||||
.ok()
|
||||
.map(|(_, s)| s);
|
||||
let season_index = season_comp.as_ref().map(|s| s.num).unwrap_or(1);
|
||||
let season_raw = season_comp.map(|s| s.source.into_owned());
|
||||
|
||||
let rss_url = build_mikan_bangumi_subscription_rss_url(
|
||||
mikan_base_url.clone(),
|
||||
|
@ -10,7 +10,7 @@ use crate::{
|
||||
errors::RecorderResult,
|
||||
extract::{
|
||||
mikan::{MikanEpisodeHash, MikanEpisodeMeta, build_mikan_episode_homepage_url},
|
||||
origin::extract_episode_meta_from_origin_name,
|
||||
origin::{OriginCompTrait, OriginNameRoot},
|
||||
},
|
||||
};
|
||||
|
||||
@ -124,7 +124,7 @@ impl ActiveModel {
|
||||
episode: MikanEpisodeMeta,
|
||||
) -> RecorderResult<Self> {
|
||||
let mikan_base_url = ctx.mikan().base_url().clone();
|
||||
let episode_extention_meta = extract_episode_meta_from_origin_name(&episode.episode_title)
|
||||
let episode_extention_meta = OriginNameRoot::parse_comp(&episode.episode_title)
|
||||
.inspect_err(|err| {
|
||||
tracing::error!(
|
||||
err = ?err,
|
||||
@ -132,6 +132,7 @@ impl ActiveModel {
|
||||
"Failed to parse episode extension meta from episode title, skip"
|
||||
);
|
||||
})
|
||||
.map(|(_, e)| e.into_meta())
|
||||
.ok();
|
||||
let homepage = build_mikan_episode_homepage_url(mikan_base_url, &episode.mikan_episode_id);
|
||||
|
||||
|
@ -1,9 +1,10 @@
|
||||
use tracing::Level;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
use tracing_subscriber::{EnvFilter, layer::SubscriberExt, util::SubscriberInitExt};
|
||||
use tracing_tree::HierarchicalLayer;
|
||||
|
||||
use crate::logger::MODULE_WHITELIST;
|
||||
|
||||
pub fn try_init_testing_tracing(level: Level) {
|
||||
fn build_testing_tracing_filter(level: Level) -> EnvFilter {
|
||||
let crate_name = env!("CARGO_PKG_NAME");
|
||||
let level = level.as_str().to_lowercase();
|
||||
let mut filter = EnvFilter::new(format!("{crate_name}[]={level}"));
|
||||
@ -14,5 +15,22 @@ pub fn try_init_testing_tracing(level: Level) {
|
||||
filter = filter.add_directive(format!("{module}[]={level}").parse().unwrap());
|
||||
}
|
||||
|
||||
let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
|
||||
filter
|
||||
}
|
||||
|
||||
pub fn try_init_testing_tracing(level: Level) {
|
||||
let _ = tracing_subscriber::fmt()
|
||||
.with_env_filter(build_testing_tracing_filter(level))
|
||||
.try_init();
|
||||
}
|
||||
|
||||
pub fn try_init_testing_tracing_only_leaf(level: Level) {
|
||||
let _ = tracing_subscriber::registry()
|
||||
.with(build_testing_tracing_filter(level))
|
||||
.with(
|
||||
HierarchicalLayer::new(2)
|
||||
.with_targets(true)
|
||||
.with_bracketed_fields(true),
|
||||
)
|
||||
.try_init();
|
||||
}
|
||||
|
@ -1,2 +1,3 @@
|
||||
pub mod http;
|
||||
pub mod json;
|
||||
pub mod nom;
|
||||
|
261
apps/recorder/src/utils/nom.rs
Normal file
261
apps/recorder/src/utils/nom.rs
Normal file
@ -0,0 +1,261 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use icu::properties::{CodePointMapData, props::Script};
|
||||
use lazy_static::lazy_static;
|
||||
use maplit::hashmap;
|
||||
use nom::{
|
||||
IResult, Parser,
|
||||
branch::alt,
|
||||
bytes::complete::tag,
|
||||
character::complete::{anychar, digit1, none_of, satisfy},
|
||||
combinator::{map, opt, recognize, value, verify},
|
||||
error::ParseError,
|
||||
multi::many1,
|
||||
sequence::{delimited, preceded},
|
||||
};
|
||||
use num_traits::{PrimInt, Signed};
|
||||
|
||||
lazy_static! {
|
||||
pub static ref ZH_DIGIT_MAP: HashMap<char, u32> = {
|
||||
hashmap! {
|
||||
'〇' => 0,
|
||||
'零' => 0,
|
||||
'一' => 1,
|
||||
'壹' => 1,
|
||||
'二' => 2,
|
||||
'贰' => 2,
|
||||
'三' => 3,
|
||||
'叁' => 3,
|
||||
'四' => 4,
|
||||
'肆' => 4,
|
||||
'五' => 5,
|
||||
'伍' => 5,
|
||||
'六' => 6,
|
||||
'陆' => 6,
|
||||
'七' => 7,
|
||||
'柒' => 7,
|
||||
'八' => 8,
|
||||
'捌' => 8,
|
||||
'九' => 9,
|
||||
'玖' => 9,
|
||||
'十' => 10,
|
||||
'拾' => 10,
|
||||
'廿' => 20,
|
||||
'念' => 20,
|
||||
'百' => 100,
|
||||
'佰' => 100,
|
||||
'千' => 1000,
|
||||
'仟' => 1000,
|
||||
'万' => 10000,
|
||||
'萬' => 10000,
|
||||
'亿' => 100000000,
|
||||
'億' => 100000000,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn with_recognized<'a, F, O, E>(
|
||||
mut parser: F,
|
||||
) -> impl FnMut(&'a str) -> IResult<&'a str, (O, &'a str), E>
|
||||
where
|
||||
F: Parser<&'a str, Output = O, Error = E>,
|
||||
E: ParseError<&'a str>,
|
||||
{
|
||||
move |input: &'a str| {
|
||||
let i = input;
|
||||
let (rest, output) = parser.parse(i)?;
|
||||
let consumed_len = i.len() - rest.len();
|
||||
Ok((rest, (output, &i[..consumed_len])))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_some_unicode_scx(input: &str, script: Script) -> IResult<&str, char> {
|
||||
let script_data = CodePointMapData::<Script>::new();
|
||||
|
||||
verify(anychar, |&c| script_data.get(c) == script).parse(input)
|
||||
}
|
||||
|
||||
pub fn is_han_scx(input: &str) -> IResult<&str, char> {
|
||||
is_some_unicode_scx(input, Script::Han)
|
||||
}
|
||||
|
||||
pub fn is_hira_scx(input: &str) -> IResult<&str, char> {
|
||||
is_some_unicode_scx(input, Script::Hiragana)
|
||||
}
|
||||
|
||||
pub fn is_kana_scx(input: &str) -> IResult<&str, char> {
|
||||
is_some_unicode_scx(input, Script::Katakana)
|
||||
}
|
||||
|
||||
pub fn delimited_by_brackets(input: &str) -> IResult<&str, &str> {
|
||||
alt((
|
||||
delimited(tag("["), recognize(many1(none_of("[]"))), tag("]")),
|
||||
delimited(tag("【"), recognize(many1(none_of("【】"))), tag("】")),
|
||||
))
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
pub struct ZhNum {
|
||||
pub int: i32,
|
||||
}
|
||||
|
||||
impl ZhNum {
|
||||
fn parse_digit<'a>(
|
||||
max_value: u32,
|
||||
) -> impl Parser<&'a str, Output = u32, Error = nom::error::Error<&'a str>> {
|
||||
map(
|
||||
satisfy(move |c| ZH_DIGIT_MAP.get(&c).is_some_and(|v| *v <= max_value)),
|
||||
|c| *ZH_DIGIT_MAP.get(&c).unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_个(input: &str) -> IResult<&str, u32> {
|
||||
Self::parse_digit(9).parse(input)
|
||||
}
|
||||
|
||||
fn parse_十(input: &str) -> IResult<&str, u32> {
|
||||
let (input, (p, o, s)) = (
|
||||
opt(Self::parse_个),
|
||||
map(
|
||||
satisfy(|c| ZH_DIGIT_MAP.get(&c).is_some_and(|v| *v == 10 || *v == 20)),
|
||||
|c| *ZH_DIGIT_MAP.get(&c).unwrap(),
|
||||
),
|
||||
opt(Self::parse_个),
|
||||
)
|
||||
.parse(input)?;
|
||||
|
||||
let value = p.unwrap_or(1) * o + s.unwrap_or(0);
|
||||
|
||||
Ok((input, value))
|
||||
}
|
||||
|
||||
pub fn parse_百(input: &str) -> IResult<&str, u32> {
|
||||
let (input, (p, o, s)) = (
|
||||
opt(Self::parse_个),
|
||||
map(
|
||||
satisfy(|c| ZH_DIGIT_MAP.get(&c).is_some_and(|v| *v == 100 || *v == 200)),
|
||||
|c| *ZH_DIGIT_MAP.get(&c).unwrap(),
|
||||
),
|
||||
opt(Self::parse_十),
|
||||
)
|
||||
.parse(input)?;
|
||||
|
||||
let value = p.unwrap_or(1) * o + s.unwrap_or(0);
|
||||
|
||||
Ok((input, value))
|
||||
}
|
||||
|
||||
pub fn parse_千(input: &str) -> IResult<&str, u32> {
|
||||
let (input, (p, o, s)) = (
|
||||
opt(Self::parse_个),
|
||||
value(
|
||||
1000u32,
|
||||
satisfy(|c| ZH_DIGIT_MAP.get(&c).is_some_and(|v| *v == 1000)),
|
||||
),
|
||||
opt(Self::parse_百),
|
||||
)
|
||||
.parse(input)?;
|
||||
|
||||
let value = p.unwrap_or(1) * o + s.unwrap_or(0);
|
||||
|
||||
Ok((input, value))
|
||||
}
|
||||
|
||||
pub fn parse_万(input: &str) -> IResult<&str, u32> {
|
||||
let (input, (p, o, s)) = (
|
||||
opt(Self::parse_千),
|
||||
value(
|
||||
10000u32,
|
||||
satisfy(|c| ZH_DIGIT_MAP.get(&c).is_some_and(|v| *v == 10000)),
|
||||
),
|
||||
opt(Self::parse_千),
|
||||
)
|
||||
.parse(input)?;
|
||||
|
||||
let value = p.unwrap_or(1) * o + s.unwrap_or(0);
|
||||
|
||||
Ok((input, value))
|
||||
}
|
||||
|
||||
pub fn parse_亿(input: &str) -> IResult<&str, u32> {
|
||||
let (input, (p, o, s)) = (
|
||||
opt(Self::parse_万),
|
||||
value(
|
||||
100000000u32,
|
||||
satisfy(|c| ZH_DIGIT_MAP.get(&c).is_some_and(|v| *v == 100000000)),
|
||||
),
|
||||
opt(Self::parse_万),
|
||||
)
|
||||
.parse(input)?;
|
||||
|
||||
let value = p.unwrap_or(1) * o + s.unwrap_or(0);
|
||||
|
||||
Ok((input, value))
|
||||
}
|
||||
|
||||
pub fn parse_uint(input: &str) -> IResult<&str, u32> {
|
||||
preceded(
|
||||
opt(tag("正")),
|
||||
alt((
|
||||
Self::parse_个,
|
||||
Self::parse_十,
|
||||
Self::parse_百,
|
||||
Self::parse_千,
|
||||
Self::parse_万,
|
||||
Self::parse_亿,
|
||||
)),
|
||||
)
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
pub fn parse_int(input: &str) -> IResult<&str, i32> {
|
||||
let (input, (sign, value)) = (
|
||||
opt(alt((value(1, tag("正")), value(-1, tag("负"))))),
|
||||
alt((
|
||||
Self::parse_个,
|
||||
Self::parse_十,
|
||||
Self::parse_百,
|
||||
Self::parse_千,
|
||||
Self::parse_万,
|
||||
Self::parse_亿,
|
||||
)),
|
||||
)
|
||||
.parse(input)?;
|
||||
|
||||
Ok((input, sign.unwrap_or(1) * value as i32))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_uint<T: PrimInt>(input: &str) -> IResult<&str, T> {
|
||||
let (input, value) = preceded(opt(tag("+")), digit1).parse(input)?;
|
||||
|
||||
let value = T::from_str_radix(value, 10).map_err(|_| {
|
||||
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
|
||||
})?;
|
||||
|
||||
Ok((input, value))
|
||||
}
|
||||
|
||||
pub fn parse_int<T: PrimInt + Signed>(input: &str) -> IResult<&str, T> {
|
||||
let (input, value) = recognize((
|
||||
opt(alt((
|
||||
value(T::one(), tag("+")),
|
||||
value(T::one().neg(), tag("-")),
|
||||
))),
|
||||
digit1,
|
||||
))
|
||||
.parse(input)?;
|
||||
|
||||
let value = T::from_str_radix(value, 10).map_err(|_| {
|
||||
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
|
||||
})?;
|
||||
|
||||
Ok((input, value))
|
||||
}
|
||||
|
||||
pub fn parse_month_num(input: &str) -> IResult<&str, u32> {
|
||||
verify(alt((ZhNum::parse_uint, parse_uint::<u32>)), |v| {
|
||||
*v <= 12 && *v > 0
|
||||
})
|
||||
.parse(input)
|
||||
}
|
Loading…
Reference in New Issue
Block a user