From dbded94324ed85aef4051ad2419de2f077710650 Mon Sep 17 00:00:00 2001 From: lonelyhentxi Date: Fri, 2 May 2025 02:23:23 +0800 Subject: [PATCH] feature: rewrite season subscription extractor --- Cargo.lock | 412 ++- Cargo.toml | 2 - apps/proxy/.whistle/rules/properties | 2 +- apps/recorder/Cargo.toml | 9 +- apps/recorder/src/app/builder.rs | 7 +- .../src/app/config/default_mixin.toml | 4 + apps/recorder/src/app/config/mod.rs | 8 +- apps/recorder/src/app/context.rs | 46 +- apps/recorder/src/crypto/config.rs | 4 + apps/recorder/src/crypto/error.rs | 11 + apps/recorder/src/crypto/mod.rs | 9 + apps/recorder/src/crypto/service.rs | 65 + apps/recorder/src/crypto/userpass.rs | 19 + apps/recorder/src/database/service.rs | 33 +- apps/recorder/src/errors/app_error.rs | 23 +- apps/recorder/src/extract/media/mod.rs | 7 +- apps/recorder/src/extract/mikan/client.rs | 188 +- apps/recorder/src/extract/mikan/constants.rs | 3 + apps/recorder/src/extract/mikan/mod.rs | 12 +- .../recorder/src/extract/mikan/rss_extract.rs | 26 +- .../recorder/src/extract/mikan/web_extract.rs | 637 ++-- apps/recorder/src/lib.rs | 1 + apps/recorder/src/migrations/defs.rs | 13 + .../m20250501_021523_credential_3rd.rs | 107 + apps/recorder/src/migrations/mod.rs | 2 + apps/recorder/src/models/credential_3rd.rs | 143 + apps/recorder/src/models/episodes.rs | 8 +- apps/recorder/src/models/mod.rs | 1 + apps/recorder/src/models/subscriptions.rs | 23 +- apps/recorder/src/tasks/config.rs | 4 + apps/recorder/src/tasks/core.rs | 279 -- ...act_mikan_bangumis_meta_from_my_bangumi.rs | 37 - .../mikan/extract_season_subscription.rs | 172 + apps/recorder/src/tasks/mikan/mod.rs | 6 +- apps/recorder/src/tasks/mod.rs | 6 +- apps/recorder/src/tasks/registry.rs | 1 - apps/recorder/src/tasks/service.rs | 43 +- apps/recorder/src/test_utils/app.rs | 18 + .../BangumiCoverFlow-2025-spring-noauth.html | 28 + .../mikan/BangumiCoverFlow-2025-spring.html | 840 +++++ .../mikan/ExpandBangumi-3599-noauth.html | 2790 +++++++++++++++ .../resources/mikan/ExpandBangumi-3599.html | 2790 +++++++++++++++ .../tests/resources/mikan/ExpandBangumi.htm | 1466 -------- .../resources/mikan/MyBangumi-noauth.htm | 641 ---- .../tests/resources/mikan/MyBangumi.htm | 3180 ----------------- packages/fetch/Cargo.toml | 3 +- packages/fetch/src/client/core.rs | 30 +- packages/fetch/src/client/mod.rs | 3 - packages/fetch/src/client/secrecy.rs | 47 - packages/fetch/src/errors.rs | 2 - packages/fetch/src/lib.rs | 5 +- 51 files changed, 8181 insertions(+), 6035 deletions(-) create mode 100644 apps/recorder/src/crypto/config.rs create mode 100644 apps/recorder/src/crypto/error.rs create mode 100644 apps/recorder/src/crypto/mod.rs create mode 100644 apps/recorder/src/crypto/service.rs create mode 100644 apps/recorder/src/crypto/userpass.rs create mode 100644 apps/recorder/src/migrations/m20250501_021523_credential_3rd.rs create mode 100644 apps/recorder/src/models/credential_3rd.rs create mode 100644 apps/recorder/src/tasks/config.rs delete mode 100644 apps/recorder/src/tasks/core.rs delete mode 100644 apps/recorder/src/tasks/mikan/extract_mikan_bangumis_meta_from_my_bangumi.rs create mode 100644 apps/recorder/src/tasks/mikan/extract_season_subscription.rs delete mode 100644 apps/recorder/src/tasks/registry.rs create mode 100644 apps/recorder/tests/resources/mikan/BangumiCoverFlow-2025-spring-noauth.html create mode 100644 apps/recorder/tests/resources/mikan/BangumiCoverFlow-2025-spring.html create mode 100644 apps/recorder/tests/resources/mikan/ExpandBangumi-3599-noauth.html create mode 100644 apps/recorder/tests/resources/mikan/ExpandBangumi-3599.html delete mode 100644 apps/recorder/tests/resources/mikan/ExpandBangumi.htm delete mode 100644 apps/recorder/tests/resources/mikan/MyBangumi-noauth.htm delete mode 100644 apps/recorder/tests/resources/mikan/MyBangumi.htm delete mode 100644 packages/fetch/src/client/secrecy.rs diff --git a/Cargo.lock b/Cargo.lock index 0288bbc..c4be0e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,6 +27,41 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array 0.14.7", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "ahash" version = "0.7.8" @@ -158,6 +193,56 @@ version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" +[[package]] +name = "apalis" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf157b59923258974a886572a18fe47b401daeca43b44c719b73736d8788840" +dependencies = [ + "apalis-core", + "futures", + "pin-project-lite", + "serde", + "thiserror 2.0.12", + "tower", + "tracing", + "tracing-futures", +] + +[[package]] +name = "apalis-core" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbbc8dc67f007145277cb5977c730c4fa7fb07244e83d69d1c5a43cb4d124fa" +dependencies = [ + "futures", + "futures-timer", + "pin-project-lite", + "serde", + "serde_json", + "thiserror 2.0.12", + "tower", + "ulid", +] + +[[package]] +name = "apalis-sql" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34757d9408f39656451c524ca10fe6331d59aaf25cda60bd70d677a4213efead" +dependencies = [ + "apalis-core", + "async-stream", + "chrono", + "futures", + "futures-lite", + "log", + "serde", + "serde_json", + "sqlx", + "thiserror 2.0.12", +] + [[package]] name = "arc-swap" version = "1.7.1" @@ -648,6 +733,15 @@ dependencies = [ "wyz", ] +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array 0.14.7", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -817,7 +911,7 @@ version = "13.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c5063741c7b2e260bbede781cf4679632dd90e2718e99f7715e46824b65670b" dependencies = [ - "digest", + "digest 0.10.7", "either", "futures", "hex 0.4.3", @@ -829,7 +923,7 @@ dependencies = [ "serde_derive", "serde_json", "sha1", - "sha2", + "sha2 0.10.8", "ssri", "tempfile", "thiserror 1.0.69", @@ -871,6 +965,30 @@ dependencies = [ "version_check", ] +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "chrono" version = "0.4.40" @@ -908,6 +1026,17 @@ dependencies = [ "phf_codegen", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + [[package]] name = "clap" version = "4.5.35" @@ -948,6 +1077,22 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +[[package]] +name = "cocoon" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24bf1b609cc3fcf6785a2305e450e3dae64cc0f28854ae0b83a6487a8eeaa64f" +dependencies = [ + "aes-gcm", + "chacha20poly1305", + "hmac 0.11.0", + "pbkdf2", + "rand 0.8.5", + "sha2 0.9.9", + "thiserror 1.0.69", + "zeroize", +] + [[package]] name = "colorchoice" version = "1.0.3" @@ -1208,6 +1353,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "crypto-mac" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25fab6889090c8133f3deb8f73ba3c65a7f456f66436fc012a1b1e272b1e103e" +dependencies = [ + "generic-array 0.14.7", + "subtle", +] + [[package]] name = "cssparser" version = "0.33.0" @@ -1269,6 +1424,15 @@ version = "0.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f211af61d8efdd104f96e57adf5e426ba1bc3ed7a4ead616e15e5881fd79c4d" +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -1278,7 +1442,7 @@ dependencies = [ "cfg-if", "cpufeatures", "curve25519-dalek-derive", - "digest", + "digest 0.10.7", "fiat-crypto", "rustc_version", "subtle", @@ -1443,13 +1607,22 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc55fe0d1f6c107595572ec8b107c0999bb1a2e0b75e37429a4fb0d6474a0e7d" +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array 0.14.7", +] + [[package]] name = "digest" version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", + "block-buffer 0.10.4", "const-oid", "crypto-common", "subtle", @@ -1604,7 +1777,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" dependencies = [ "der", - "digest", + "digest 0.10.7", "elliptic-curve", "rfc6979", "signature", @@ -1630,7 +1803,7 @@ dependencies = [ "curve25519-dalek", "ed25519", "serde", - "sha2", + "sha2 0.10.8", "subtle", "zeroize", ] @@ -1658,7 +1831,7 @@ checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" dependencies = [ "base16ct", "crypto-bigint", - "digest", + "digest 0.10.7", "ff", "generic-array 0.14.7", "group", @@ -1768,7 +1941,6 @@ dependencies = [ "axum", "axum-extra", "bytes", - "cookie", "fastrand", "http-cache", "http-cache-reqwest", @@ -1780,6 +1952,7 @@ dependencies = [ "reqwest-middleware", "reqwest-retry", "reqwest-tracing", + "reqwest_cookie_store", "serde", "serde_json", "serde_with", @@ -1972,6 +2145,19 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-lite" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5edaec856126859abb19ed65f39e90fea3a9574b9707f13539acf4abf7eb532" +dependencies = [ + "fastrand", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "futures-macro" version = "0.3.31" @@ -2110,6 +2296,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "gimli" version = "0.31.1" @@ -2175,7 +2371,7 @@ dependencies = [ "parking_lot 0.12.3", "portable-atomic", "quanta", - "rand 0.9.0", + "rand 0.9.1", "smallvec", "spinning_top", "web-time", @@ -2314,7 +2510,17 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" dependencies = [ - "hmac", + "hmac 0.12.1", +] + +[[package]] +name = "hmac" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2a2320eb7ec0ebe8da8f744d7812d9fc4cb4d09344ac01898dbcb6a20ae69b" +dependencies = [ + "crypto-mac", + "digest 0.9.0", ] [[package]] @@ -2323,7 +2529,7 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" dependencies = [ - "digest", + "digest 0.10.7", ] [[package]] @@ -2832,6 +3038,15 @@ dependencies = [ "libc", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array 0.14.7", +] + [[package]] name = "insta" version = "1.42.2" @@ -3396,9 +3611,9 @@ dependencies = [ [[package]] name = "lightningcss" -version = "1.0.0-alpha.65" +version = "1.0.0-alpha.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c84f971730745f4aaac013b6cf4328baf1548efc973c0d95cfd843a3c1ca07af" +checksum = "9a73ffa17de66534e4b527232f44aa0a89fad22c4f4e0735f9be35494f058e54" dependencies = [ "ahash 0.8.11", "bitflags 2.9.0", @@ -3573,7 +3788,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ "cfg-if", - "digest", + "digest 0.10.7", ] [[package]] @@ -3686,7 +3901,7 @@ dependencies = [ "hyper", "hyper-util", "log", - "rand 0.9.0", + "rand 0.9.1", "regex", "serde_json", "serde_urlencoded", @@ -3957,7 +4172,7 @@ dependencies = [ "serde", "serde_json", "serde_path_to_error", - "sha2", + "sha2 0.10.8", "thiserror 1.0.69", "url", ] @@ -3977,6 +4192,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "opendal" version = "0.53.0" @@ -4013,7 +4234,7 @@ dependencies = [ "chrono", "dyn-clone", "ed25519-dalek", - "hmac", + "hmac 0.12.1", "http", "itertools 0.10.5", "log", @@ -4028,7 +4249,7 @@ dependencies = [ "serde_path_to_error", "serde_plain", "serde_with", - "sha2", + "sha2 0.10.8", "subtle", "thiserror 1.0.69", "url", @@ -4147,7 +4368,7 @@ dependencies = [ "ecdsa", "elliptic-curve", "primeorder", - "sha2", + "sha2 0.10.8", ] [[package]] @@ -4159,14 +4380,14 @@ dependencies = [ "ecdsa", "elliptic-curve", "primeorder", - "sha2", + "sha2 0.10.8", ] [[package]] name = "parcel_selectors" -version = "0.28.1" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dccbc6fb560df303a44e511618256029410efbc87779018f751ef12c488271fe" +checksum = "54fd03f1ad26cb6b3ec1b7414fa78a3bd639e7dbb421b1a60513c96ce886a196" dependencies = [ "bitflags 2.9.0", "cssparser 0.33.0", @@ -4292,6 +4513,17 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pbkdf2" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05894bce6a1ba4be299d0c5f29563e08af2bc18bb7d48313113bed71e904739" +dependencies = [ + "crypto-mac", + "hmac 0.11.0", + "sha2 0.9.9", +] + [[package]] name = "pear" version = "0.2.9" @@ -4382,7 +4614,7 @@ checksum = "7f9f832470494906d1fca5329f8ab5791cc60beb230c74815dff541cbd2b5ca0" dependencies = [ "once_cell", "pest", - "sha2", + "sha2 0.10.8", ] [[package]] @@ -4505,6 +4737,29 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.11.0" @@ -4714,7 +4969,7 @@ checksum = "b820744eb4dc9b57a3398183639c511b5a26d2ed702cedd3febaa1393caa22cc" dependencies = [ "bytes", "getrandom 0.3.2", - "rand 0.9.0", + "rand 0.9.1", "ring", "rustc-hash 2.1.1", "rustls", @@ -4787,13 +5042,12 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", - "zerocopy 0.8.24", ] [[package]] @@ -4867,6 +5121,8 @@ dependencies = [ name = "recorder" version = "0.1.0" dependencies = [ + "apalis", + "apalis-sql", "async-graphql", "async-graphql-axum", "async-stream", @@ -4877,6 +5133,7 @@ dependencies = [ "bytes", "chrono", "clap", + "cocoon", "ctor", "dotenv", "downloader", @@ -4900,13 +5157,16 @@ dependencies = [ "opendal", "openidconnect", "quirks_path", + "rand 0.9.1", "regex", + "reqwest_cookie_store", "rss", "rstest", "scraper", "sea-orm", "sea-orm-migration", "seaography", + "secrecy", "serde", "serde_json", "serde_variant", @@ -5149,6 +5409,20 @@ dependencies = [ "tracing", ] +[[package]] +name = "reqwest_cookie_store" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0b36498c7452f11b1833900f31fbb01fc46be20992a50269c88cf59d79f54e9" +dependencies = [ + "bytes", + "cookie_store", + "reqwest", + "serde", + "serde_derive", + "url", +] + [[package]] name = "retry-policies" version = "0.4.0" @@ -5164,7 +5438,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" dependencies = [ - "hmac", + "hmac 0.12.1", "subtle", ] @@ -5227,7 +5501,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" dependencies = [ "const-oid", - "digest", + "digest 0.10.7", "num-bigint-dig", "num-integer", "num-traits", @@ -5660,6 +5934,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "secrecy" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a" +dependencies = [ + "zeroize", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -5908,7 +6191,7 @@ checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.7", ] [[package]] @@ -5919,7 +6202,20 @@ checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" +dependencies = [ + "block-buffer 0.9.0", + "cfg-if", + "cpufeatures", + "digest 0.9.0", + "opaque-debug", ] [[package]] @@ -5930,7 +6226,7 @@ checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.7", ] [[package]] @@ -5963,7 +6259,7 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ - "digest", + "digest 0.10.7", "rand_core 0.6.4", ] @@ -6153,7 +6449,7 @@ dependencies = [ "rustls-pemfile", "serde", "serde_json", - "sha2", + "sha2 0.10.8", "smallvec", "thiserror 2.0.12", "time", @@ -6193,7 +6489,7 @@ dependencies = [ "quote", "serde", "serde_json", - "sha2", + "sha2 0.10.8", "sqlx-core", "sqlx-mysql", "sqlx-postgres", @@ -6218,7 +6514,7 @@ dependencies = [ "bytes", "chrono", "crc", - "digest", + "digest 0.10.7", "dotenvy", "either", "futures-channel", @@ -6228,7 +6524,7 @@ dependencies = [ "generic-array 0.14.7", "hex 0.4.3", "hkdf", - "hmac", + "hmac 0.12.1", "itoa", "log", "md-5", @@ -6240,7 +6536,7 @@ dependencies = [ "rust_decimal", "serde", "sha1", - "sha2", + "sha2 0.10.8", "smallvec", "sqlx-core", "stringprep", @@ -6271,7 +6567,7 @@ dependencies = [ "futures-util", "hex 0.4.3", "hkdf", - "hmac", + "hmac 0.12.1", "home", "itoa", "log", @@ -6283,7 +6579,7 @@ dependencies = [ "rust_decimal", "serde", "serde_json", - "sha2", + "sha2 0.10.8", "smallvec", "sqlx-core", "stringprep", @@ -6327,12 +6623,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da7a2b3c2bc9693bcb40870c4e9b5bf0d79f9cb46273321bf855ec513e919082" dependencies = [ "base64 0.21.7", - "digest", + "digest 0.10.7", "hex 0.4.3", "miette", "serde", "sha-1", - "sha2", + "sha2 0.10.8", "thiserror 1.0.69", "xxhash-rust", ] @@ -6906,6 +7202,7 @@ dependencies = [ "pin-project-lite", "sync_wrapper", "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", @@ -6996,6 +7293,15 @@ dependencies = [ "valuable", ] +[[package]] +name = "tracing-futures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" +dependencies = [ + "tracing", +] + [[package]] name = "tracing-log" version = "0.2.0" @@ -7055,7 +7361,7 @@ dependencies = [ "http", "httparse", "log", - "rand 0.9.0", + "rand 0.9.1", "sha1", "thiserror 2.0.12", "utf-8", @@ -7113,6 +7419,16 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" +[[package]] +name = "ulid" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "470dbf6591da1b39d43c14523b2b469c86879a53e8b758c8e090a470fe7b1fbe" +dependencies = [ + "rand 0.9.1", + "web-time", +] + [[package]] name = "uncased" version = "0.9.10" @@ -7217,6 +7533,16 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "unsafe-libyaml" version = "0.2.11" diff --git a/Cargo.toml b/Cargo.toml index 494a289..9d5739a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,6 @@ resolver = "2" [workspace.dependencies] moka = "0.12" futures = "0.3" -futures-util = "0.3" quirks_path = "0.1" snafu = { version = "0.8", features = ["futures"] } testcontainers = { version = "0.23.3" } @@ -46,7 +45,6 @@ testing-torrents = { path = "./packages/testing-torrents" } util = { path = "./packages/util" } fetch = { path = "./packages/fetch" } downloader = { path = "./packages/downloader" } -recorder = { path = "./apps/recorder" } [patch.crates-io] jwt-authorizer = { git = "https://github.com/blablacio/jwt-authorizer.git", rev = "e956774" } diff --git a/apps/proxy/.whistle/rules/properties b/apps/proxy/.whistle/rules/properties index 6cf4660..6c5bb97 100644 --- a/apps/proxy/.whistle/rules/properties +++ b/apps/proxy/.whistle/rules/properties @@ -1 +1 @@ -{"filesOrder":["konobangu"],"selectedList":["konobangu"],"disabledDefalutRules":true} +{"filesOrder":["konobangu"],"selectedList":["konobangu"],"disabledDefalutRules":true,"defalutRules":""} diff --git a/apps/recorder/Cargo.toml b/apps/recorder/Cargo.toml index df59072..e31fa77 100644 --- a/apps/recorder/Cargo.toml +++ b/apps/recorder/Cargo.toml @@ -61,7 +61,7 @@ sea-orm-migration = { version = "1.1", features = ["runtime-tokio-rustls"] } rss = "2" fancy-regex = "0.14" maplit = "1.0.2" -lightningcss = "1.0.0-alpha.65" +lightningcss = "1.0.0-alpha.66" html-escape = "0.2.13" opendal = { version = "0.53", features = ["default", "services-fs"] } zune-image = "0.4.15" @@ -104,11 +104,16 @@ clap = "4.5.31" ipnetwork = "0.21.1" typed-builder = "0.21.0" serde_yaml = "0.9.34" - +apalis = { version = "0.7", features = ["limit", "tracing", "catch-panic"] } +apalis-sql = { version = "0.7", features = ["postgres"] } +cocoon = { version = "0.4.3", features = ["getrandom", "thiserror"] } +rand = "0.9.1" downloader = { workspace = true } util = { workspace = true } fetch = { workspace = true } string-interner = "0.19.0" +secrecy = "0.10.3" +reqwest_cookie_store = "0.8.0" [dev-dependencies] serial_test = "3" diff --git a/apps/recorder/src/app/builder.rs b/apps/recorder/src/app/builder.rs index 76ee0d3..72a679f 100644 --- a/apps/recorder/src/app/builder.rs +++ b/apps/recorder/src/app/builder.rs @@ -1,5 +1,3 @@ -use std::sync::Arc; - use clap::{Parser, command}; use super::{AppContext, core::App, env::Environment}; @@ -83,9 +81,8 @@ impl AppBuilder { ) .await?; - let app_context = Arc::new( - AppContext::new(self.environment.clone(), config, self.working_dir.clone()).await?, - ); + let app_context = + AppContext::new(self.environment.clone(), config, self.working_dir.clone()).await?; Ok(App { context: app_context, diff --git a/apps/recorder/src/app/config/default_mixin.toml b/apps/recorder/src/app/config/default_mixin.toml index f20456b..7faedc0 100644 --- a/apps/recorder/src/app/config/default_mixin.toml +++ b/apps/recorder/src/app/config/default_mixin.toml @@ -16,3 +16,7 @@ depth_limit = inf complexity_limit = inf [cache] + +[crypto] + +[task] diff --git a/apps/recorder/src/app/config/mod.rs b/apps/recorder/src/app/config/mod.rs index 5c8e549..47dd5a7 100644 --- a/apps/recorder/src/app/config/mod.rs +++ b/apps/recorder/src/app/config/mod.rs @@ -9,9 +9,9 @@ use serde::{Deserialize, Serialize}; use super::env::Environment; use crate::{ - auth::AuthConfig, cache::CacheConfig, database::DatabaseConfig, errors::RecorderResult, - extract::mikan::MikanConfig, graphql::GraphQLConfig, logger::LoggerConfig, - storage::StorageConfig, web::WebServerConfig, + auth::AuthConfig, cache::CacheConfig, crypto::CryptoConfig, database::DatabaseConfig, + errors::RecorderResult, extract::mikan::MikanConfig, graphql::GraphQLConfig, + logger::LoggerConfig, storage::StorageConfig, tasks::TaskConfig, web::WebServerConfig, }; const DEFAULT_CONFIG_MIXIN: &str = include_str!("./default_mixin.toml"); @@ -24,9 +24,11 @@ pub struct AppConfig { pub auth: AuthConfig, pub storage: StorageConfig, pub mikan: MikanConfig, + pub crypto: CryptoConfig, pub graphql: GraphQLConfig, pub logger: LoggerConfig, pub database: DatabaseConfig, + pub tasks: TaskConfig, } impl AppConfig { diff --git a/apps/recorder/src/app/context.rs b/apps/recorder/src/app/context.rs index 242ca5e..d616d16 100644 --- a/apps/recorder/src/app/context.rs +++ b/apps/recorder/src/app/context.rs @@ -1,11 +1,15 @@ +use std::{fmt::Debug, sync::Arc}; + +use tokio::sync::OnceCell; + use super::{Environment, config::AppConfig}; use crate::{ - auth::AuthService, cache::CacheService, database::DatabaseService, errors::RecorderResult, - extract::mikan::MikanClient, graphql::GraphQLService, logger::LoggerService, - storage::StorageService, + auth::AuthService, cache::CacheService, crypto::CryptoService, database::DatabaseService, + errors::RecorderResult, extract::mikan::MikanClient, graphql::GraphQLService, + logger::LoggerService, storage::StorageService, tasks::TaskService, }; -pub trait AppContextTrait: Send + Sync { +pub trait AppContextTrait: Send + Sync + Debug { fn logger(&self) -> &LoggerService; fn db(&self) -> &DatabaseService; fn config(&self) -> &AppConfig; @@ -16,6 +20,8 @@ pub trait AppContextTrait: Send + Sync { fn storage(&self) -> &StorageService; fn working_dir(&self) -> &String; fn environment(&self) -> &Environment; + fn crypto(&self) -> &CryptoService; + fn task(&self) -> &TaskService; } pub struct AppContext { @@ -27,8 +33,10 @@ pub struct AppContext { auth: AuthService, graphql: GraphQLService, storage: StorageService, + crypto: CryptoService, working_dir: String, environment: Environment, + task: OnceCell, } impl AppContext { @@ -36,7 +44,7 @@ impl AppContext { environment: Environment, config: AppConfig, working_dir: impl ToString, - ) -> RecorderResult { + ) -> RecorderResult> { let config_cloned = config.clone(); let logger = LoggerService::from_config(config.logger).await?; @@ -45,9 +53,10 @@ impl AppContext { let storage = StorageService::from_config(config.storage).await?; let auth = AuthService::from_conf(config.auth).await?; let mikan = MikanClient::from_config(config.mikan).await?; + let crypto = CryptoService::from_config(config.crypto).await?; let graphql = GraphQLService::from_config_and_database(config.graphql, db.clone()).await?; - Ok(AppContext { + let ctx = Arc::new(AppContext { config: config_cloned, environment, logger, @@ -58,9 +67,26 @@ impl AppContext { mikan, working_dir: working_dir.to_string(), graphql, - }) + crypto, + task: OnceCell::new(), + }); + + ctx.task + .get_or_try_init(async || { + TaskService::from_config_and_ctx(config.tasks, ctx.clone()).await + }) + .await?; + + Ok(ctx) } } + +impl Debug for AppContext { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "AppContext") + } +} + impl AppContextTrait for AppContext { fn logger(&self) -> &LoggerService { &self.logger @@ -92,4 +118,10 @@ impl AppContextTrait for AppContext { fn environment(&self) -> &Environment { &self.environment } + fn crypto(&self) -> &CryptoService { + &self.crypto + } + fn task(&self) -> &TaskService { + self.task.get().expect("task should be set") + } } diff --git a/apps/recorder/src/crypto/config.rs b/apps/recorder/src/crypto/config.rs new file mode 100644 index 0000000..11ce4b7 --- /dev/null +++ b/apps/recorder/src/crypto/config.rs @@ -0,0 +1,4 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CryptoConfig {} diff --git a/apps/recorder/src/crypto/error.rs b/apps/recorder/src/crypto/error.rs new file mode 100644 index 0000000..0215989 --- /dev/null +++ b/apps/recorder/src/crypto/error.rs @@ -0,0 +1,11 @@ +#[derive(Debug, snafu::Snafu)] +pub enum CryptoError { + #[snafu(transparent)] + Base64DecodeError { source: base64::DecodeError }, + #[snafu(display("CocoonError: {source:?}"), context(false))] + CocoonError { source: cocoon::Error }, + #[snafu(transparent)] + FromUtf8Error { source: std::string::FromUtf8Error }, + #[snafu(transparent)] + SerdeJsonError { source: serde_json::Error }, +} diff --git a/apps/recorder/src/crypto/mod.rs b/apps/recorder/src/crypto/mod.rs new file mode 100644 index 0000000..82bfffd --- /dev/null +++ b/apps/recorder/src/crypto/mod.rs @@ -0,0 +1,9 @@ +pub mod config; +pub mod error; +pub mod service; +pub mod userpass; + +pub use config::CryptoConfig; +pub use error::CryptoError; +pub use service::CryptoService; +pub use userpass::UserPassCredential; diff --git a/apps/recorder/src/crypto/service.rs b/apps/recorder/src/crypto/service.rs new file mode 100644 index 0000000..466bd3f --- /dev/null +++ b/apps/recorder/src/crypto/service.rs @@ -0,0 +1,65 @@ +use base64::prelude::{BASE64_URL_SAFE, *}; +use cocoon::Cocoon; +use rand::Rng; +use serde::{Deserialize, Serialize}; + +use super::CryptoConfig; +use crate::crypto::error::CryptoError; + +pub struct CryptoService { + #[allow(dead_code)] + config: CryptoConfig, +} + +impl CryptoService { + pub async fn from_config(config: CryptoConfig) -> Result { + Ok(Self { config }) + } + + pub fn encrypt_data(&self, data: String) -> Result { + let key = rand::rng().random::<[u8; 32]>(); + let mut cocoon = Cocoon::new(&key); + + let mut data = data.into_bytes(); + + let detached_prefix = cocoon.encrypt(&mut data)?; + + let mut combined = Vec::with_capacity(key.len() + detached_prefix.len() + data.len()); + combined.extend_from_slice(&key); + combined.extend_from_slice(&detached_prefix); + combined.extend_from_slice(&data); + + Ok(BASE64_URL_SAFE.encode(combined)) + } + + pub fn decrypt_data(&self, data: &str) -> Result { + let decoded = BASE64_URL_SAFE.decode(data)?; + + let (key, remain) = decoded.split_at(32); + let (detached_prefix, data) = remain.split_at(60); + let mut data = data.to_vec(); + let cocoon = Cocoon::new(key); + + cocoon.decrypt(&mut data, detached_prefix)?; + + String::from_utf8(data).map_err(CryptoError::from) + } + + pub fn encrypt_credentials( + &self, + credentials: &T, + ) -> Result { + let json = serde_json::to_string(credentials)?; + + self.encrypt_data(json) + } + + pub fn decrypt_credentials Deserialize<'de>>( + &self, + encrypted: &str, + ) -> Result { + let data = self.decrypt_data(encrypted)?; + + serde_json::from_str(&data).map_err(CryptoError::from) + } +} diff --git a/apps/recorder/src/crypto/userpass.rs b/apps/recorder/src/crypto/userpass.rs new file mode 100644 index 0000000..d5be7b9 --- /dev/null +++ b/apps/recorder/src/crypto/userpass.rs @@ -0,0 +1,19 @@ +use std::fmt::Debug; + +pub struct UserPassCredential { + pub username: String, + pub password: String, + pub user_agent: Option, + pub cookies: Option, +} + +impl Debug for UserPassCredential { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("UserPassCredential") + .field("username", &"[Secret]") + .field("password", &"[Secret]") + .field("cookies", &"[Secret]") + .field("user_agent", &self.user_agent) + .finish() + } +} diff --git a/apps/recorder/src/database/service.rs b/apps/recorder/src/database/service.rs index 940d150..4961b33 100644 --- a/apps/recorder/src/database/service.rs +++ b/apps/recorder/src/database/service.rs @@ -1,8 +1,8 @@ use std::{ops::Deref, time::Duration}; use sea_orm::{ - ConnectOptions, ConnectionTrait, Database, DatabaseBackend, DatabaseConnection, DbBackend, - DbErr, ExecResult, QueryResult, Statement, + ConnectOptions, ConnectionTrait, Database, DatabaseConnection, DbBackend, DbErr, ExecResult, + QueryResult, Statement, }; use sea_orm_migration::MigratorTrait; @@ -28,20 +28,21 @@ impl DatabaseService { let db = Database::connect(opt).await?; - if db.get_database_backend() == DatabaseBackend::Sqlite { - db.execute(Statement::from_string( - DatabaseBackend::Sqlite, - " - PRAGMA foreign_keys = ON; - PRAGMA journal_mode = WAL; - PRAGMA synchronous = NORMAL; - PRAGMA mmap_size = 134217728; - PRAGMA journal_size_limit = 67108864; - PRAGMA cache_size = 2000; - ", - )) - .await?; - } + // only support postgres for now + // if db.get_database_backend() == DatabaseBackend::Sqlite { + // db.execute(Statement::from_string( + // DatabaseBackend::Sqlite, + // " + // PRAGMA foreign_keys = ON; + // PRAGMA journal_mode = WAL; + // PRAGMA synchronous = NORMAL; + // PRAGMA mmap_size = 134217728; + // PRAGMA journal_size_limit = 67108864; + // PRAGMA cache_size = 2000; + // ", + // )) + // .await?; + // } if config.auto_migrate { Migrator::up(&db, None).await?; diff --git a/apps/recorder/src/errors/app_error.rs b/apps/recorder/src/errors/app_error.rs index 03a7faf..1f06b25 100644 --- a/apps/recorder/src/errors/app_error.rs +++ b/apps/recorder/src/errors/app_error.rs @@ -4,13 +4,14 @@ use axum::{ Json, response::{IntoResponse, Response}, }; -use fetch::{FetchError, HttpClientError}; +use fetch::{FetchError, HttpClientError, reqwest, reqwest_middleware}; use http::StatusCode; use serde::{Deserialize, Deserializer, Serialize}; use snafu::Snafu; use crate::{ auth::AuthError, + crypto::CryptoError, downloader::DownloaderError, errors::{OptDynErr, response::StandardErrorResponse}, }; @@ -102,6 +103,14 @@ pub enum RecorderError { ModelEntityNotFound { entity: Cow<'static, str> }, #[snafu(transparent)] FetchError { source: FetchError }, + #[snafu(display("Credential3rdError: {source}"))] + Credential3rdError { + message: String, + #[snafu(source(from(Box, OptDynErr::some)))] + source: OptDynErr, + }, + #[snafu(transparent)] + CryptoError { source: CryptoError }, #[snafu(display("{message}"))] Whatever { message: String, @@ -195,4 +204,16 @@ impl<'de> Deserialize<'de> for RecorderError { } } +impl From for RecorderError { + fn from(error: reqwest::Error) -> Self { + FetchError::from(error).into() + } +} + +impl From for RecorderError { + fn from(error: reqwest_middleware::Error) -> Self { + FetchError::from(error).into() + } +} + pub type RecorderResult = Result; diff --git a/apps/recorder/src/extract/media/mod.rs b/apps/recorder/src/extract/media/mod.rs index 15b2816..1812c18 100644 --- a/apps/recorder/src/extract/media/mod.rs +++ b/apps/recorder/src/extract/media/mod.rs @@ -2,7 +2,10 @@ use url::Url; pub fn extract_image_src_from_str(image_src: &str, base_url: &Url) -> Option { let mut image_url = base_url.join(image_src).ok()?; - image_url.set_query(None); - image_url.set_fragment(None); + if let Some((_, value)) = image_url.query_pairs().find(|(key, _)| key == "webp") { + image_url.set_query(Some(&format!("webp={}", value))); + } else { + image_url.set_query(None); + } Some(image_url) } diff --git a/apps/recorder/src/extract/mikan/client.rs b/apps/recorder/src/extract/mikan/client.rs index c431e0a..eee6216 100644 --- a/apps/recorder/src/extract/mikan/client.rs +++ b/apps/recorder/src/extract/mikan/client.rs @@ -1,60 +1,204 @@ -use std::{fmt::Debug, ops::Deref}; +use std::{fmt::Debug, ops::Deref, sync::Arc}; -use fetch::{HttpClient, HttpClientTrait, client::HttpClientCookiesAuth}; +use fetch::{HttpClient, HttpClientTrait}; +use maplit::hashmap; +use sea_orm::DbErr; +use secrecy::SecretBox; use serde::{Deserialize, Serialize}; use url::Url; +use util::OptDynErr; -use super::MikanConfig; -use crate::errors::RecorderError; +use super::{MikanConfig, constants::MIKAN_ACCOUNT_MANAGE_PAGE_PATH}; +use crate::{ + app::AppContextTrait, + crypto::UserPassCredential, + errors::{RecorderError, RecorderResult}, + extract::mikan::constants::{MIKAN_LOGIN_PAGE_PATH, MIKAN_LOGIN_PAGE_SEARCH}, + models::credential_3rd::{self, Credential3rdType}, +}; #[derive(Default, Clone, Deserialize, Serialize)] -pub struct MikanAuthSecrecy { - pub cookie: String, - pub user_agent: Option, +pub struct MikanCredentialForm { + pub password: String, + pub username: String, + pub user_agent: String, } -impl Debug for MikanAuthSecrecy { +pub type MikanAuthSecrecy = SecretBox; + +impl Debug for MikanCredentialForm { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MikanAuthSecrecy") - .field("cookie", &String::from("[secrecy]")) + f.debug_struct("MikanCredentialForm") + .field("username", &String::from("[secrecy]")) + .field("password", &String::from("[secrecy]")) .field("user_agent", &String::from("[secrecy]")) .finish() } } -impl MikanAuthSecrecy { - pub fn into_cookie_auth(self, url: &Url) -> Result { - HttpClientCookiesAuth::from_cookies(&self.cookie, url, self.user_agent) - .map_err(RecorderError::from) - } -} - #[derive(Debug)] pub struct MikanClient { http_client: HttpClient, base_url: Url, + origin_url: Url, + userpass_credential: Option, } impl MikanClient { pub async fn from_config(config: MikanConfig) -> Result { let http_client = HttpClient::from_config(config.http_client)?; let base_url = config.base_url; + let origin_url = Url::parse(&base_url.origin().unicode_serialization())?; Ok(Self { http_client, base_url, + origin_url, + userpass_credential: None, }) } - pub fn fork_with_auth(&self, secrecy: Option) -> Result { - let mut fork = self.http_client.fork(); + pub async fn has_login(&self) -> RecorderResult { + let account_manage_page_url = self.base_url.join(MIKAN_ACCOUNT_MANAGE_PAGE_PATH)?; + let res = self.http_client.get(account_manage_page_url).send().await?; + let status = res.status(); + if status.is_success() { + Ok(true) + } else if status.is_redirection() + && res.headers().get("location").is_some_and(|location| { + location + .to_str() + .is_ok_and(|location_str| location_str.contains(MIKAN_LOGIN_PAGE_PATH)) + }) + { + Ok(false) + } else { + Err(RecorderError::Credential3rdError { + message: format!("mikan account check has login failed, status = {}", status), + source: None.into(), + }) + } + } - if let Some(secrecy) = secrecy { - let cookie_auth = secrecy.into_cookie_auth(&self.base_url)?; - fork = fork.attach_secrecy(cookie_auth); + pub async fn login(&self) -> RecorderResult<()> { + let userpass_credential = + self.userpass_credential + .as_ref() + .ok_or_else(|| RecorderError::Credential3rdError { + message: "mikan login failed, credential required".to_string(), + source: None.into(), + })?; + let login_page_url = { + let mut u = self.base_url.join(MIKAN_LOGIN_PAGE_PATH)?; + u.set_query(Some(MIKAN_LOGIN_PAGE_SEARCH)); + u + }; + + // access login page to get antiforgery cookie + self.http_client + .get(login_page_url.clone()) + .send() + .await + .map_err(|error| RecorderError::Credential3rdError { + message: "failed to get mikan login page".to_string(), + source: OptDynErr::some_boxed(error), + })?; + + let antiforgery_cookie = { + let cookie_store_lock = self.http_client.cookie_store.clone().ok_or_else(|| { + RecorderError::Credential3rdError { + message: "failed to get cookie store".to_string(), + source: None.into(), + } + })?; + let cookie_store = + cookie_store_lock + .read() + .map_err(|_| RecorderError::Credential3rdError { + message: "failed to read cookie store".to_string(), + source: None.into(), + })?; + + cookie_store + .matches(&login_page_url) + .iter() + .find(|cookie| cookie.name().starts_with(".AspNetCore.Antiforgery.")) + .map(|cookie| cookie.value().to_string()) + } + .ok_or_else(|| RecorderError::Credential3rdError { + message: "mikan login failed, failed to get antiforgery cookie".to_string(), + source: None.into(), + })?; + + let login_post_form = hashmap! { + "__RequestVerificationToken".to_string() => antiforgery_cookie, + "UserName".to_string() => userpass_credential.username.clone(), + "Password".to_string() => userpass_credential.password.clone(), + "RememberMe".to_string() => "true".to_string(), + }; + let login_post_res = self + .http_client + .post(login_page_url.clone()) + .form(&login_post_form) + .send() + .await + .map_err(|err| RecorderError::Credential3rdError { + message: "mikan login failed".to_string(), + source: OptDynErr::some_boxed(err), + })?; + + if login_post_res.status().is_redirection() + && login_post_res.headers().contains_key("location") + { + Ok(()) + } else { + Err(RecorderError::Credential3rdError { + message: "mikan login failed, no redirecting".to_string(), + source: None.into(), + }) + } + } + + pub async fn fork_with_credential( + &self, + ctx: Arc, + credential_id: Option, + ) -> RecorderResult { + let mut fork = self.http_client.fork(); + let mut userpass_credential_opt = None; + + if let Some(credential_id) = credential_id { + let credential = credential_3rd::Model::find_by_id(ctx.clone(), credential_id).await?; + if let Some(credential) = credential { + if credential.credential_type != Credential3rdType::Mikan { + return Err(RecorderError::Credential3rdError { + message: "credential is not a mikan credential".to_string(), + source: None.into(), + }); + } + + let userpass_credential: UserPassCredential = + credential.try_into_userpass_credential(ctx)?; + + if let Some(cookies) = userpass_credential.cookies.as_ref() { + fork = fork.attach_cookies(cookies)?; + } + + if let Some(user_agent) = userpass_credential.user_agent.as_ref() { + fork = fork.attach_user_agent(user_agent); + } + + userpass_credential_opt = Some(userpass_credential); + } else { + return Err(RecorderError::from_db_record_not_found( + DbErr::RecordNotFound(format!("credential={} not found", credential_id)), + )); + } } Ok(Self { http_client: HttpClient::from_fork(fork)?, base_url: self.base_url.clone(), + origin_url: self.origin_url.clone(), + userpass_credential: userpass_credential_opt, }) } diff --git a/apps/recorder/src/extract/mikan/constants.rs b/apps/recorder/src/extract/mikan/constants.rs index 35ceb3d..193af75 100644 --- a/apps/recorder/src/extract/mikan/constants.rs +++ b/apps/recorder/src/extract/mikan/constants.rs @@ -1,3 +1,6 @@ pub const MIKAN_BUCKET_KEY: &str = "mikan"; pub const MIKAN_UNKNOWN_FANSUB_NAME: &str = "生肉/不明字幕"; pub const MIKAN_UNKNOWN_FANSUB_ID: &str = "202"; +pub const MIKAN_LOGIN_PAGE_PATH: &str = "/Account/Login"; +pub const MIKAN_LOGIN_PAGE_SEARCH: &str = "?ReturnUrl=%2F"; +pub const MIKAN_ACCOUNT_MANAGE_PAGE_PATH: &str = "/Account/Manage"; diff --git a/apps/recorder/src/extract/mikan/mod.rs b/apps/recorder/src/extract/mikan/mod.rs index f8806fd..fc35a2f 100644 --- a/apps/recorder/src/extract/mikan/mod.rs +++ b/apps/recorder/src/extract/mikan/mod.rs @@ -4,18 +4,20 @@ pub mod constants; pub mod rss_extract; pub mod web_extract; -pub use client::{MikanAuthSecrecy, MikanClient}; +pub use client::{MikanClient, MikanCredentialForm}; pub use config::MikanConfig; pub use constants::MIKAN_BUCKET_KEY; pub use rss_extract::{ - MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanBangumiRssLink, + MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanBangumiRssUrlMeta, MikanRssChannel, MikanRssItem, MikanSubscriberAggregationRssChannel, - MikanSubscriberAggregationRssLink, build_mikan_bangumi_rss_link, - build_mikan_subscriber_aggregation_rss_link, extract_mikan_bangumi_id_from_rss_link, + MikanSubscriberAggregationRssUrlMeta, build_mikan_bangumi_rss_url, + build_mikan_subscriber_aggregation_rss_url, extract_mikan_bangumi_id_from_rss_url, extract_mikan_rss_channel_from_rss_link, extract_mikan_subscriber_aggregation_id_from_rss_link, }; pub use web_extract::{ - MikanBangumiMeta, MikanEpisodeMeta, build_mikan_bangumi_homepage, build_mikan_episode_homepage, + MikanBangumiMeta, MikanEpisodeMeta, MikanSeasonStr, build_mikan_bangumi_homepage_url, + build_mikan_episode_homepage_url, build_mikan_season_flow_url, + extract_mikan_bangumi_indices_meta_from_season_flow_fragment, extract_mikan_bangumi_meta_from_bangumi_homepage, extract_mikan_episode_meta_from_episode_homepage, }; diff --git a/apps/recorder/src/extract/mikan/rss_extract.rs b/apps/recorder/src/extract/mikan/rss_extract.rs index 5b2a5f4..fcfc2bc 100644 --- a/apps/recorder/src/extract/mikan/rss_extract.rs +++ b/apps/recorder/src/extract/mikan/rss_extract.rs @@ -12,7 +12,7 @@ use crate::{ errors::app_error::{RecorderError, RecorderResult}, extract::mikan::{ MikanClient, - web_extract::{MikanEpisodeHomepage, extract_mikan_episode_id_from_homepage}, + web_extract::{MikanEpisodeHomepage, extract_mikan_episode_id_from_homepage_url}, }, }; @@ -135,7 +135,7 @@ impl TryFrom for MikanRssItem { let MikanEpisodeHomepage { mikan_episode_id, .. - } = extract_mikan_episode_id_from_homepage(&homepage).ok_or_else(|| { + } = extract_mikan_episode_id_from_homepage_url(&homepage).ok_or_else(|| { RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("mikan_episode_id")) })?; @@ -155,17 +155,17 @@ impl TryFrom for MikanRssItem { } #[derive(Debug, Clone)] -pub struct MikanBangumiRssLink { +pub struct MikanBangumiRssUrlMeta { pub mikan_bangumi_id: String, pub mikan_fansub_id: Option, } #[derive(Debug, Clone)] -pub struct MikanSubscriberAggregationRssLink { +pub struct MikanSubscriberAggregationRssUrlMeta { pub mikan_aggregation_id: String, } -pub fn build_mikan_bangumi_rss_link( +pub fn build_mikan_bangumi_rss_url( mikan_base_url: impl IntoUrl, mikan_bangumi_id: &str, mikan_fansub_id: Option<&str>, @@ -181,7 +181,7 @@ pub fn build_mikan_bangumi_rss_link( Ok(url) } -pub fn build_mikan_subscriber_aggregation_rss_link( +pub fn build_mikan_subscriber_aggregation_rss_url( mikan_base_url: &str, mikan_aggregation_id: &str, ) -> RecorderResult { @@ -192,11 +192,11 @@ pub fn build_mikan_subscriber_aggregation_rss_link( Ok(url) } -pub fn extract_mikan_bangumi_id_from_rss_link(url: &Url) -> Option { +pub fn extract_mikan_bangumi_id_from_rss_url(url: &Url) -> Option { if url.path() == "/RSS/Bangumi" { url.query_pairs() .find(|(k, _)| k == "bangumiId") - .map(|(_, v)| MikanBangumiRssLink { + .map(|(_, v)| MikanBangumiRssUrlMeta { mikan_bangumi_id: v.to_string(), mikan_fansub_id: url .query_pairs() @@ -210,10 +210,10 @@ pub fn extract_mikan_bangumi_id_from_rss_link(url: &Url) -> Option Option { +) -> Option { if url.path() == "/RSS/MyBangumi" { url.query_pairs().find(|(k, _)| k == "token").map(|(_, v)| { - MikanSubscriberAggregationRssLink { + MikanSubscriberAggregationRssUrlMeta { mikan_aggregation_id: v.to_string(), } }) @@ -233,10 +233,10 @@ pub async fn extract_mikan_rss_channel_from_rss_link( let channel_link = Url::parse(channel.link())?; - if let Some(MikanBangumiRssLink { + if let Some(MikanBangumiRssUrlMeta { mikan_bangumi_id, mikan_fansub_id, - }) = extract_mikan_bangumi_id_from_rss_link(&channel_link) + }) = extract_mikan_bangumi_id_from_rss_url(&channel_link) { tracing::trace!( mikan_bangumi_id, @@ -290,7 +290,7 @@ pub async fn extract_mikan_rss_channel_from_rss_link( }, )) } - } else if let Some(MikanSubscriberAggregationRssLink { + } else if let Some(MikanSubscriberAggregationRssUrlMeta { mikan_aggregation_id, .. }) = extract_mikan_subscriber_aggregation_id_from_rss_link(&channel_link) diff --git a/apps/recorder/src/extract/mikan/web_extract.rs b/apps/recorder/src/extract/mikan/web_extract.rs index 87fe12e..de296c5 100644 --- a/apps/recorder/src/extract/mikan/web_extract.rs +++ b/apps/recorder/src/extract/mikan/web_extract.rs @@ -1,22 +1,19 @@ -use std::{borrow::Cow, sync::Arc}; +use std::{borrow::Cow, fmt}; -use async_stream::try_stream; use bytes::Bytes; use fetch::{html::fetch_html, image::fetch_image}; -use futures::Stream; -use itertools::Itertools; +use html_escape::decode_html_entities; use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; use tracing::instrument; use url::Url; use super::{ - MIKAN_BUCKET_KEY, MikanAuthSecrecy, MikanBangumiRssLink, MikanClient, - extract_mikan_bangumi_id_from_rss_link, + MIKAN_BUCKET_KEY, MikanBangumiRssUrlMeta, MikanClient, extract_mikan_bangumi_id_from_rss_url, }; use crate::{ app::AppContextTrait, - errors::app_error::{RecorderResult, RecorderError}, + errors::app_error::{RecorderError, RecorderResult}, extract::{ html::{extract_background_image_src_from_style_attr, extract_inner_text_from_element_ref}, media::extract_image_src_from_str, @@ -24,6 +21,29 @@ use crate::{ storage::StorageContentCategory, }; +#[derive(Clone, Debug, Copy, Serialize, Deserialize)] +pub enum MikanSeasonStr { + #[serde(rename = "春")] + Spring, + #[serde(rename = "夏")] + Summer, + #[serde(rename = "秋")] + Autumn, + #[serde(rename = "冬")] + Winter, +} + +impl fmt::Display for MikanSeasonStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Spring => write!(f, "春"), + Self::Summer => write!(f, "夏"), + Self::Autumn => write!(f, "秋"), + Self::Winter => write!(f, "冬"), + } + } +} + #[derive(Clone, Debug, PartialEq)] pub struct MikanEpisodeMeta { pub homepage: Url, @@ -36,6 +56,14 @@ pub struct MikanEpisodeMeta { pub mikan_episode_id: String, } +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct MikanBangumiIndexMeta { + pub homepage: Url, + pub origin_poster_src: Option, + pub bangumi_title: String, + pub mikan_bangumi_id: String, +} + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct MikanBangumiMeta { pub homepage: Url, @@ -53,6 +81,19 @@ pub struct MikanBangumiPosterMeta { pub poster_src: Option, } +impl From for MikanBangumiMeta { + fn from(index_meta: MikanBangumiIndexMeta) -> Self { + MikanBangumiMeta { + homepage: index_meta.homepage, + origin_poster_src: index_meta.origin_poster_src, + bangumi_title: index_meta.bangumi_title, + mikan_bangumi_id: index_meta.mikan_bangumi_id, + mikan_fansub_id: None, + fansub: None, + } + } +} + #[derive(Clone, Debug, PartialEq)] pub struct MikanEpisodeHomepage { pub mikan_episode_id: String, @@ -64,7 +105,7 @@ pub struct MikanBangumiHomepage { pub mikan_fansub_id: Option, } -pub fn build_mikan_bangumi_homepage( +pub fn build_mikan_bangumi_homepage_url( mikan_base_url: Url, mikan_bangumi_id: &str, mikan_fansub_id: Option<&str>, @@ -75,13 +116,29 @@ pub fn build_mikan_bangumi_homepage( url } -pub fn build_mikan_episode_homepage(mikan_base_url: Url, mikan_episode_id: &str) -> Url { +pub fn build_mikan_season_flow_url( + mikan_base_url: Url, + year: i32, + season_str: MikanSeasonStr, +) -> Url { + let mut url = mikan_base_url; + url.set_path("/Home/BangumiCoverFlow"); + url.query_pairs_mut() + .append_pair("year", &year.to_string()) + .append_pair("seasonStr", &season_str.to_string()); + url +} + +pub fn build_mikan_episode_homepage_url(mikan_base_url: Url, mikan_episode_id: &str) -> Url { let mut url = mikan_base_url; url.set_path(&format!("/Home/Episode/{mikan_episode_id}")); url } -pub fn build_mikan_bangumi_expand_info_url(mikan_base_url: Url, mikan_bangumi_id: &str) -> Url { +pub fn build_mikan_bangumi_expand_subscribed_fragment_url( + mikan_base_url: Url, + mikan_bangumi_id: &str, +) -> Url { let mut url = mikan_base_url; url.set_path("/ExpandBangumi"); url.query_pairs_mut() @@ -90,7 +147,7 @@ pub fn build_mikan_bangumi_expand_info_url(mikan_base_url: Url, mikan_bangumi_id url } -pub fn extract_mikan_bangumi_id_from_homepage(url: &Url) -> Option { +pub fn extract_mikan_bangumi_id_from_homepage_url(url: &Url) -> Option { if url.path().starts_with("/Home/Bangumi/") { let mikan_bangumi_id = url.path().replace("/Home/Bangumi/", ""); @@ -103,7 +160,7 @@ pub fn extract_mikan_bangumi_id_from_homepage(url: &Url) -> Option Option { +pub fn extract_mikan_episode_id_from_homepage_url(url: &Url) -> Option { if url.path().starts_with("/Home/Episode/") { let mikan_episode_id = url.path().replace("/Home/Episode/", ""); Some(MikanEpisodeHomepage { mikan_episode_id }) @@ -191,7 +248,7 @@ pub async fn extract_mikan_episode_meta_from_episode_homepage( tracing::warn!(error = %error); })?; - let MikanBangumiRssLink { + let MikanBangumiRssUrlMeta { mikan_bangumi_id, mikan_fansub_id, .. @@ -200,7 +257,7 @@ pub async fn extract_mikan_episode_meta_from_episode_homepage( .next() .and_then(|el| el.value().attr("href")) .and_then(|s| mikan_episode_homepage_url.join(s).ok()) - .and_then(|rss_link_url| extract_mikan_bangumi_id_from_rss_link(&rss_link_url)) + .and_then(|rss_link_url| extract_mikan_bangumi_id_from_rss_url(&rss_link_url)) .ok_or_else(|| { RecorderError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_bangumi_id")) }) @@ -223,7 +280,7 @@ pub async fn extract_mikan_episode_meta_from_episode_homepage( let MikanEpisodeHomepage { mikan_episode_id, .. - } = extract_mikan_episode_id_from_homepage(&mikan_episode_homepage_url) + } = extract_mikan_episode_id_from_homepage_url(&mikan_episode_homepage_url) .ok_or_else(|| { RecorderError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_episode_id")) }) @@ -303,9 +360,9 @@ pub async fn extract_mikan_bangumi_meta_from_bangumi_homepage( .next() .and_then(|el| el.value().attr("href")) .and_then(|s| mikan_bangumi_homepage_url.join(s).ok()) - .and_then(|rss_link_url| extract_mikan_bangumi_id_from_rss_link(&rss_link_url)) + .and_then(|rss_link_url| extract_mikan_bangumi_id_from_rss_url(&rss_link_url)) .map( - |MikanBangumiRssLink { + |MikanBangumiRssUrlMeta { mikan_bangumi_id, .. }| mikan_bangumi_id, ) @@ -325,7 +382,7 @@ pub async fn extract_mikan_bangumi_meta_from_bangumi_homepage( }) }); - let (mikan_fansub_id, fansub_name) = mikan_bangumi_homepage_url + let (mikan_fansub_id, fansub) = mikan_bangumi_homepage_url .fragment() .and_then(|id| { html.select( @@ -341,7 +398,7 @@ pub async fn extract_mikan_bangumi_meta_from_bangumi_homepage( bangumi_title, mikan_bangumi_id, origin_poster_src = origin_poster_src.as_ref().map(|url| url.as_str()), - fansub_name, + fansub, mikan_fansub_id, "mikan bangumi meta extracted" ); @@ -351,154 +408,141 @@ pub async fn extract_mikan_bangumi_meta_from_bangumi_homepage( bangumi_title, origin_poster_src, mikan_bangumi_id, - fansub: fansub_name, + fansub, mikan_fansub_id, }) } -#[instrument(skip_all, fields(my_bangumi_page_url, auth_secrecy = ?auth_secrecy, history = history.len()))] -pub fn extract_mikan_bangumis_meta_from_my_bangumi_page( - context: Arc, - my_bangumi_page_url: Url, - auth_secrecy: Option, - history: &[Arc>], -) -> impl Stream> { - try_stream! { - let http_client = &context.mikan().fork_with_auth(auth_secrecy.clone())?; +#[instrument] +pub fn extract_mikan_bangumi_indices_meta_from_season_flow_fragment( + season_flow_fragment: &str, + mikan_base_url: Url, +) -> Vec { + let html = Html::parse_fragment(season_flow_fragment); - let mikan_base_url = Url::parse(&my_bangumi_page_url.origin().unicode_serialization())?; + let bangumi_empty_selector = &Selector::parse(".no-subscribe-bangumi").unwrap(); - let content = fetch_html(http_client, my_bangumi_page_url.clone()).await?; + if html.select(bangumi_empty_selector).next().is_some() { + return vec![]; + } - let fansub_container_selector = - &Selector::parse(".js-expand_bangumi-subgroup.js-subscribed").unwrap(); - let fansub_title_selector = &Selector::parse(".tag-res-name[title]").unwrap(); - let fansub_id_selector = - &Selector::parse(".active[data-subtitlegroupid][data-bangumiid]").unwrap(); + let bangumi_item_selector = &Selector::parse(".mine.an-box ul.an-ul>li").unwrap(); + let bangumi_poster_span_selector = &Selector::parse("span[data-src][data-bangumiid]").unwrap(); + let bangumi_title_a_selector = &Selector::parse(".an-info-group a.an-text[title]").unwrap(); - let bangumi_items = { - let html = Html::parse_document(&content); + let mut items = vec![]; + for bangumi_item in html.select(bangumi_item_selector) { + let bangumi_poster_span = bangumi_item.select(bangumi_poster_span_selector).next(); + let bangumi_title_a = bangumi_item.select(bangumi_title_a_selector).next(); + if let (Some(bangumi_poster_span), Some(bangumi_title_a)) = + (bangumi_poster_span, bangumi_title_a) + { + let origin_poster_src = bangumi_poster_span + .attr("data-src") + .and_then(|data_src| extract_image_src_from_str(data_src, &mikan_base_url)); + let bangumi_title = bangumi_title_a + .attr("title") + .map(|title| decode_html_entities(&title).trim().to_string()); + let mikan_bangumi_id = bangumi_poster_span + .attr("data-bangumiid") + .map(|id| id.to_string()); - let bangumi_container_selector = &Selector::parse(".sk-bangumi .an-ul>li").unwrap(); - let bangumi_info_selector = &Selector::parse(".an-info a.an-text").unwrap(); - let bangumi_poster_selector = - &Selector::parse("span[data-src][data-bangumiid], span[data-bangumiid][style]") - .unwrap(); - html.select(bangumi_container_selector) - .filter_map(|bangumi_elem| { - let title_and_href_elem = - bangumi_elem.select(bangumi_info_selector).next(); - let poster_elem = bangumi_elem.select(bangumi_poster_selector).next(); - if let (Some(bangumi_home_page_url), Some(bangumi_title)) = ( - title_and_href_elem.and_then(|elem| elem.attr("href")), - title_and_href_elem.and_then(|elem| elem.attr("title")), - ) { - let origin_poster_src = poster_elem.and_then(|ele| { - ele.attr("data-src") - .and_then(|data_src| { - extract_image_src_from_str(data_src, &mikan_base_url) - }) - .or_else(|| { - ele.attr("style").and_then(|style| { - extract_background_image_src_from_style_attr( - style, - &mikan_base_url, - ) - }) - }) - }); - let bangumi_title = bangumi_title.to_string(); - let bangumi_home_page_url = - my_bangumi_page_url.join(bangumi_home_page_url).ok()?; - let MikanBangumiHomepage { - mikan_bangumi_id, .. - } = extract_mikan_bangumi_id_from_homepage(&bangumi_home_page_url)?; - if let Some(origin_poster_src) = origin_poster_src.as_ref() { - tracing::trace!( - origin_poster_src = origin_poster_src.as_str(), - bangumi_title, - mikan_bangumi_id, - "bangumi info extracted" - ); - } else { - tracing::warn!( - bangumi_title, - mikan_bangumi_id, - "bangumi info extracted, but failed to extract poster_src" - ); - } - let bangumi_expand_info_url = build_mikan_bangumi_expand_info_url( - mikan_base_url.clone(), - &mikan_bangumi_id, - ); - Some(( + if let (Some(bangumi_title), Some(mikan_bangumi_id)) = (bangumi_title, mikan_bangumi_id) + { + let homepage = build_mikan_bangumi_homepage_url( + mikan_base_url.clone(), + &mikan_bangumi_id, + None, + ); + if let Some(origin_poster_src) = origin_poster_src.as_ref() { + tracing::trace!( + origin_poster_src = origin_poster_src.as_str(), bangumi_title, mikan_bangumi_id, - bangumi_expand_info_url, - origin_poster_src, - )) + "bangumi index meta extracted" + ); + } else { + tracing::warn!( + bangumi_title, + mikan_bangumi_id, + "bangumi index meta extracted, but failed to extract poster_src" + ); + } + items.push(MikanBangumiIndexMeta { + homepage, + origin_poster_src, + bangumi_title, + mikan_bangumi_id, + }) + } + } + } + items +} + +#[instrument(skip_all, fields(mikan_bangumi_index = mikan_bangumi_index.mikan_bangumi_id.as_str()))] +pub fn extract_mikan_bangumi_meta_from_expand_subscribed_fragment( + mikan_bangumi_index: MikanBangumiIndexMeta, + expand_subscribed_fragment: &str, + mikan_base_url: Url, +) -> Option { + let html = Html::parse_fragment(expand_subscribed_fragment); + let fansub_container_selector = + &Selector::parse(".js-expand_bangumi-subgroup.js-subscribed").unwrap(); + let fansub_title_selector = &Selector::parse(".tag-res-name[title]").unwrap(); + let fansub_id_selector = + &Selector::parse(".active[data-subtitlegroupid][data-bangumiid]").unwrap(); + + if let Some((fansub_name, mikan_fansub_id)) = { + html.select(fansub_container_selector) + .next() + .and_then(|fansub_info| { + if let (Some(fansub_name), Some(mikan_fansub_id)) = ( + fansub_info + .select(fansub_title_selector) + .next() + .and_then(|ele| ele.attr("title")) + .map(String::from), + fansub_info + .select(fansub_id_selector) + .next() + .and_then(|ele| ele.attr("data-subtitlegroupid")) + .map(String::from), + ) { + Some((fansub_name, mikan_fansub_id)) } else { None } }) - .collect_vec() - }; + } { + tracing::trace!(fansub_name, mikan_fansub_id, "subscribed fansub extracted"); + let mikan_bangumi_id = mikan_bangumi_index.mikan_bangumi_id; + let bangumi_title = mikan_bangumi_index.bangumi_title; + let origin_poster_src = mikan_bangumi_index.origin_poster_src; - for (idx, (bangumi_title, mikan_bangumi_id, bangumi_expand_info_url, origin_poster_src)) in - bangumi_items.iter().enumerate() - { - - if history.get(idx).is_some() { - continue; - } else if let Some((fansub_name, mikan_fansub_id)) = { - let bangumi_expand_info_content = - fetch_html(http_client, bangumi_expand_info_url.clone()).await?; - let bangumi_expand_info_fragment = - Html::parse_fragment(&bangumi_expand_info_content); - bangumi_expand_info_fragment - .select(fansub_container_selector) - .next() - .and_then(|fansub_info| { - if let (Some(fansub_name), Some(mikan_fansub_id)) = ( - fansub_info - .select(fansub_title_selector) - .next() - .and_then(|ele| ele.attr("title")) - .map(String::from), - fansub_info - .select(fansub_id_selector) - .next() - .and_then(|ele| ele.attr("data-subtitlegroupid")) - .map(String::from), - ) { - Some((fansub_name, mikan_fansub_id)) - } else { - None - } - }) - } { - tracing::trace!(fansub_name, mikan_fansub_id, "subscribed fansub extracted"); - let item = MikanBangumiMeta { - homepage: build_mikan_bangumi_homepage( - mikan_base_url.clone(), - mikan_bangumi_id, - Some(&mikan_fansub_id), - ), - bangumi_title: bangumi_title.to_string(), - mikan_bangumi_id: mikan_bangumi_id.to_string(), - mikan_fansub_id: Some(mikan_fansub_id), - fansub: Some(fansub_name), - origin_poster_src: origin_poster_src.clone(), - }; - yield item; - } - } + Some(MikanBangumiMeta { + homepage: build_mikan_bangumi_homepage_url( + mikan_base_url.clone(), + &mikan_bangumi_id, + Some(&mikan_fansub_id), + ), + bangumi_title: bangumi_title.to_string(), + mikan_bangumi_id: mikan_bangumi_id.to_string(), + mikan_fansub_id: Some(mikan_fansub_id), + fansub: Some(fansub_name), + origin_poster_src: origin_poster_src.clone(), + }) + } else { + tracing::trace!("subscribed fansub not found"); + None } } #[cfg(test)] mod test { #![allow(unused_variables)] + use std::{fs, sync::Arc}; + use futures::{TryStreamExt, pin_mut}; use http::header; use rstest::{fixture, rstest}; @@ -507,9 +551,12 @@ mod test { use zune_image::{codecs::ImageFormat, image::Image}; use super::*; - use crate::test_utils::{ - app::UnitTestAppContext, mikan::build_testing_mikan_client, - tracing::try_init_testing_tracing, + use crate::{ + extract::mikan::MikanCredentialForm, + test_utils::{ + app::UnitTestAppContext, mikan::build_testing_mikan_client, + tracing::try_init_testing_tracing, + }, }; #[fixture] @@ -590,7 +637,9 @@ mod test { #[rstest] #[tokio::test] - async fn test_extract_mikan_bangumi_meta_from_bangumi_homepage(before_each: ()) -> RecorderResult<()> { + async fn test_extract_mikan_bangumi_meta_from_bangumi_homepage( + before_each: (), + ) -> RecorderResult<()> { let mut mikan_server = mockito::Server::new_async().await; let mikan_base_url = Url::parse(&mikan_server.url())?; let mikan_client = build_testing_mikan_client(mikan_base_url.clone()).await?; @@ -626,95 +675,217 @@ mod test { } #[rstest] - #[tokio::test] - async fn test_extract_mikan_bangumis_meta_from_my_bangumi_page(before_each: ()) -> RecorderResult<()> { - let mut mikan_server = mockito::Server::new_async().await; + #[test] + fn test_extract_mikan_bangumi_indices_meta_from_season_flow_fragment( + before_each: (), + ) -> RecorderResult<()> { + let fragment = + fs::read_to_string("tests/resources/mikan/BangumiCoverFlow-2025-spring.html")?; - let mikan_base_url = Url::parse(&mikan_server.url())?; - - let my_bangumi_page_url = mikan_base_url.join("/Home/MyBangumi")?; - - let context = Arc::new( - UnitTestAppContext::builder() - .mikan(build_testing_mikan_client(mikan_base_url.clone()).await?) - .build(), + let indices = extract_mikan_bangumi_indices_meta_from_season_flow_fragment( + &fragment, + Url::parse("https://mikanani.me/")?, ); - { - let my_bangumi_without_cookie_mock = mikan_server - .mock("GET", my_bangumi_page_url.path()) - .match_header(header::COOKIE, mockito::Matcher::Missing) - .with_body_from_file("tests/resources/mikan/MyBangumi-noauth.htm") - .create_async() - .await; + tracing::info!("indices: {:#?}", &indices[0]); - let bangumi_metas = extract_mikan_bangumis_meta_from_my_bangumi_page( - context.clone(), - my_bangumi_page_url.clone(), - None, - &[], - ); - - pin_mut!(bangumi_metas); - - let bangumi_metas = bangumi_metas.try_collect::>().await?; - - assert!(bangumi_metas.is_empty()); - - assert!(my_bangumi_without_cookie_mock.matched_async().await); - } - { - let my_bangumi_with_cookie_mock = mikan_server - .mock("GET", my_bangumi_page_url.path()) - .match_header( - header::COOKIE, - mockito::Matcher::AllOf(vec![ - mockito::Matcher::Regex(String::from(".*\\.AspNetCore\\.Antiforgery.*")), - mockito::Matcher::Regex(String::from( - ".*\\.AspNetCore\\.Identity\\.Application.*", - )), - ]), - ) - .with_body_from_file("tests/resources/mikan/MyBangumi.htm") - .create_async() - .await; - - let expand_bangumi_mock = mikan_server - .mock("GET", "/ExpandBangumi") - .match_query(mockito::Matcher::Any) - .with_body_from_file("tests/resources/mikan/ExpandBangumi.htm") - .create_async() - .await; - - let auth_secrecy = Some(MikanAuthSecrecy { - cookie: String::from( - "mikan-announcement=1; .AspNetCore.Antiforgery.abc=abc; \ - .AspNetCore.Identity.Application=abc; ", - ), - user_agent: Some(String::from( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like \ - Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0", - )), - }); - - let bangumi_metas = extract_mikan_bangumis_meta_from_my_bangumi_page( - context.clone(), - my_bangumi_page_url, - auth_secrecy, - &[], - ); - pin_mut!(bangumi_metas); - let bangumi_metas = bangumi_metas.try_collect::>().await?; - - assert!(!bangumi_metas.is_empty()); - - assert!(bangumi_metas[0].origin_poster_src.is_some()); - - assert!(my_bangumi_with_cookie_mock.matched_async().await); - - expand_bangumi_mock.expect(bangumi_metas.len()); - } + assert_eq!(indices.len(), 49); + let first = &indices[0]; + assert_eq!(first.bangumi_title, "吉伊卡哇"); + assert_eq!(first.mikan_bangumi_id, "3288"); + assert_eq!( + first.homepage.to_string(), + String::from("https://mikanani.me/Home/Bangumi/3288") + ); + assert_eq!( + first + .origin_poster_src + .as_ref() + .map(|s| s.to_string()) + .unwrap_or_default(), + String::from("https://mikanani.me/images/Bangumi/202204/d8ef46c0.jpg") + ); Ok(()) } + + #[rstest] + #[test] + fn test_extract_mikan_bangumi_indices_meta_from_season_flow_fragment_noauth( + before_each: (), + ) -> RecorderResult<()> { + let fragment = + fs::read_to_string("tests/resources/mikan/BangumiCoverFlow-2025-spring-noauth.html")?; + + let indices = extract_mikan_bangumi_indices_meta_from_season_flow_fragment( + &fragment, + Url::parse("https://mikanani.me/")?, + ); + + assert!(indices.is_empty()); + + Ok(()) + } + + #[rstest] + #[test] + fn test_extract_mikan_bangumi_meta_from_expand_subscribed_fragment( + before_each: (), + ) -> RecorderResult<()> { + let origin_poster_src = + Url::parse("https://mikanani.me/images/Bangumi/202504/076c1094.jpg")?; + let bangumi_index = MikanBangumiIndexMeta { + homepage: Url::parse("https://mikanani.me/Home/Bangumi/3599")?, + origin_poster_src: Some(origin_poster_src.clone()), + bangumi_title: "夏日口袋".to_string(), + mikan_bangumi_id: "3599".to_string(), + }; + + let fragment = fs::read_to_string("tests/resources/mikan/ExpandBangumi-3599.html")?; + + let bangumi = extract_mikan_bangumi_meta_from_expand_subscribed_fragment( + bangumi_index.clone(), + &fragment, + Url::parse("https://mikanani.me/")?, + ) + .unwrap_or_else(|| { + panic!("bangumi should not be None"); + }); + + assert_eq!( + bangumi.homepage, + Url::parse("https://mikanani.me/Home/Bangumi/3599#370")? + ); + assert_eq!(bangumi.bangumi_title, bangumi_index.bangumi_title); + assert_eq!(bangumi.mikan_bangumi_id, bangumi_index.mikan_bangumi_id); + assert_eq!(bangumi.origin_poster_src, bangumi_index.origin_poster_src); + assert_eq!(bangumi.mikan_fansub_id, Some(String::from("370"))); + assert_eq!(bangumi.fansub, Some(String::from("LoliHouse"))); + + Ok(()) + } + + #[rstest] + #[test] + fn test_extract_mikan_bangumi_meta_from_expand_subscribed_fragment_noauth( + before_each: (), + ) -> RecorderResult<()> { + let origin_poster_src = + Url::parse("https://mikanani.me/images/Bangumi/202504/076c1094.jpg")?; + let bangumi_index = MikanBangumiIndexMeta { + homepage: Url::parse("https://mikanani.me/Home/Bangumi/3599")?, + origin_poster_src: Some(origin_poster_src.clone()), + bangumi_title: "夏日口袋".to_string(), + mikan_bangumi_id: "3599".to_string(), + }; + + let fragment = fs::read_to_string("tests/resources/mikan/ExpandBangumi-3599-noauth.html")?; + + let bangumi = extract_mikan_bangumi_meta_from_expand_subscribed_fragment( + bangumi_index.clone(), + &fragment, + Url::parse("https://mikanani.me/")?, + ); + + assert!(bangumi.is_none()); + + Ok(()) + } + + // #[rstest] + // #[tokio::test] + // async fn test_extract_mikan_bangumis_meta_from_my_bangumi_page( + // before_each: (), + // ) -> RecorderResult<()> { + // let mut mikan_server = mockito::Server::new_async().await; + + // let mikan_base_url = Url::parse(&mikan_server.url())?; + + // let my_bangumi_page_url = mikan_base_url.join("/Home/MyBangumi")?; + + // let context = Arc::new( + // UnitTestAppContext::builder() + // + // .mikan(build_testing_mikan_client(mikan_base_url.clone()).await?) + // .build(), + // ); + + // { + // let my_bangumi_without_cookie_mock = mikan_server + // .mock("GET", my_bangumi_page_url.path()) + // .match_header(header::COOKIE, mockito::Matcher::Missing) + // + // .with_body_from_file("tests/resources/mikan/MyBangumi-noauth.htm") + // .create_async() + // .await; + + // let bangumi_metas = + // extract_mikan_bangumis_meta_from_my_bangumi_page( + // context.clone(), my_bangumi_page_url.clone(), + // None, + // &[], + // ); + + // pin_mut!(bangumi_metas); + + // let bangumi_metas = bangumi_metas.try_collect::>().await?; + + // assert!(bangumi_metas.is_empty()); + + // assert!(my_bangumi_without_cookie_mock.matched_async().await); + // } + // { + // let my_bangumi_with_cookie_mock = mikan_server + // .mock("GET", my_bangumi_page_url.path()) + // .match_header( + // header::COOKIE, + // mockito::Matcher::AllOf(vec![ + // + // mockito::Matcher::Regex(String::from(".*\\.AspNetCore\\.Antiforgery.*")), + // mockito::Matcher::Regex(String::from( + // ".*\\.AspNetCore\\.Identity\\.Application.*", + // )), + // ]), + // ) + // .with_body_from_file("tests/resources/mikan/MyBangumi.htm") + // .create_async() + // .await; + + // let expand_bangumi_mock = mikan_server + // .mock("GET", "/ExpandBangumi") + // .match_query(mockito::Matcher::Any) + // + // .with_body_from_file("tests/resources/mikan/ExpandBangumi.htm") + // .create_async() + // .await; + + // let auth_secrecy = Some(MikanCredentialForm { + // username: String::from("test_username"), + // password: String::from("test_password"), + // user_agent: String::from( + // "Mozilla/5.0 (Windows NT 10.0; Win64; x64) + // AppleWebKit/537.36 (KHTML, like \ Gecko) + // Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0", ), + // }); + + // let bangumi_metas = + // extract_mikan_bangumis_meta_from_my_bangumi_page( + // context.clone(), my_bangumi_page_url, + // auth_secrecy, + // &[], + // ); + // pin_mut!(bangumi_metas); + // let bangumi_metas = bangumi_metas.try_collect::>().await?; + + // assert!(!bangumi_metas.is_empty()); + + // assert!(bangumi_metas[0].origin_poster_src.is_some()); + + // assert!(my_bangumi_with_cookie_mock.matched_async().await); + + // expand_bangumi_mock.expect(bangumi_metas.len()); + // } + + // Ok(()) + // } } diff --git a/apps/recorder/src/lib.rs b/apps/recorder/src/lib.rs index e901092..b29da28 100644 --- a/apps/recorder/src/lib.rs +++ b/apps/recorder/src/lib.rs @@ -14,6 +14,7 @@ pub use downloader; pub mod app; pub mod auth; pub mod cache; +pub mod crypto; pub mod database; pub mod errors; pub mod extract; diff --git a/apps/recorder/src/migrations/defs.rs b/apps/recorder/src/migrations/defs.rs index 00381c6..7a63908 100644 --- a/apps/recorder/src/migrations/defs.rs +++ b/apps/recorder/src/migrations/defs.rs @@ -33,6 +33,7 @@ pub enum Subscriptions { Category, SourceUrl, Enabled, + CredentialId, } #[derive(DeriveIden)] @@ -137,6 +138,18 @@ pub enum Auth { AuthType, } +#[derive(DeriveIden)] +pub enum Credential3rd { + Table, + Id, + SubscriberId, + CredentialType, + Cookies, + Username, + Password, + UserAgent, +} + macro_rules! create_postgres_enum_for_active_enum { ($manager: expr, $active_enum: expr, $($enum_value:expr),+) => { { diff --git a/apps/recorder/src/migrations/m20250501_021523_credential_3rd.rs b/apps/recorder/src/migrations/m20250501_021523_credential_3rd.rs new file mode 100644 index 0000000..c299e42 --- /dev/null +++ b/apps/recorder/src/migrations/m20250501_021523_credential_3rd.rs @@ -0,0 +1,107 @@ +use async_trait::async_trait; +use sea_orm_migration::{ + prelude::*, + schema::{string_null, *}, +}; + +use super::defs::{CustomSchemaManagerExt, GeneralIds, table_auto_z}; +use crate::{ + migrations::defs::{Credential3rd, Subscribers, Subscriptions}, + models::credential_3rd::{Credential3rdType, Credential3rdTypeEnum}, +}; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + create_postgres_enum_for_active_enum!( + manager, + Credential3rdTypeEnum, + Credential3rdType::Mikan + ) + .await?; + + manager + .create_table( + table_auto_z(Credential3rd::Table) + .col(pk_auto(Credential3rd::Id)) + .col(integer(Credential3rd::SubscriberId)) + .col(string(Credential3rd::CredentialType)) + .col(string_null(Credential3rd::Cookies)) + .col(string_null(Credential3rd::Username)) + .col(string_null(Credential3rd::Password)) + .col(string_null(Credential3rd::UserAgent)) + .foreign_key( + ForeignKey::create() + .name("fk_credential_3rd_subscriber_id") + .from(Credential3rd::Table, Credential3rd::SubscriberId) + .to(Subscribers::Table, Subscribers::Id) + .on_update(ForeignKeyAction::Cascade) + .on_delete(ForeignKeyAction::Cascade), + ) + .to_owned(), + ) + .await?; + + manager + .create_index( + Index::create() + .name("idx_credential_3rd_credential_type") + .table(Credential3rd::Table) + .col(Credential3rd::CredentialType) + .to_owned(), + ) + .await?; + + manager + .create_postgres_auto_update_ts_trigger_for_col( + Credential3rd::Table, + GeneralIds::UpdatedAt, + ) + .await?; + + manager + .alter_table( + Table::alter() + .table(Subscriptions::Table) + .add_column_if_not_exists(integer_null(Subscriptions::CredentialId)) + .add_foreign_key( + TableForeignKey::new() + .name("fk_subscriptions_credential_id") + .from_tbl(Subscriptions::Table) + .from_col(Subscriptions::CredentialId) + .to_tbl(Credential3rd::Table) + .to_col(Credential3rd::Id) + .on_update(ForeignKeyAction::Cascade) + .on_delete(ForeignKeyAction::SetNull), + ) + .to_owned(), + ) + .await?; + + Ok(()) + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .alter_table( + Table::alter() + .table(Subscriptions::Table) + .drop_column(Subscriptions::CredentialId) + .to_owned(), + ) + .await?; + + manager + .drop_table(Table::drop().table(Credential3rd::Table).to_owned()) + .await?; + + manager + .drop_postgres_enum_for_active_enum(Credential3rdTypeEnum) + .await?; + + Ok(()) + } +} diff --git a/apps/recorder/src/migrations/mod.rs b/apps/recorder/src/migrations/mod.rs index 0305e6d..89d4bce 100644 --- a/apps/recorder/src/migrations/mod.rs +++ b/apps/recorder/src/migrations/mod.rs @@ -7,6 +7,7 @@ pub mod m20220101_000001_init; pub mod m20240224_082543_add_downloads; pub mod m20240225_060853_subscriber_add_downloader; pub mod m20241231_000001_auth; +pub mod m20250501_021523_credential_3rd; pub struct Migrator; @@ -18,6 +19,7 @@ impl MigratorTrait for Migrator { Box::new(m20240224_082543_add_downloads::Migration), Box::new(m20240225_060853_subscriber_add_downloader::Migration), Box::new(m20241231_000001_auth::Migration), + Box::new(m20250501_021523_credential_3rd::Migration), ] } } diff --git a/apps/recorder/src/models/credential_3rd.rs b/apps/recorder/src/models/credential_3rd.rs new file mode 100644 index 0000000..2def4ea --- /dev/null +++ b/apps/recorder/src/models/credential_3rd.rs @@ -0,0 +1,143 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use sea_orm::{ActiveValue, prelude::*}; +use serde::{Deserialize, Serialize}; + +use crate::{ + app::AppContextTrait, + crypto::UserPassCredential, + errors::{RecorderError, RecorderResult}, +}; + +#[derive( + Debug, Clone, PartialEq, Eq, EnumIter, DeriveActiveEnum, DeriveDisplay, Serialize, Deserialize, +)] +#[sea_orm( + rs_type = "String", + db_type = "Enum", + enum_name = "credential_3rd_type" +)] +pub enum Credential3rdType { + #[sea_orm(string_value = "mikan")] + Mikan, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, DeriveEntityModel)] +#[sea_orm(table_name = "credential_3rd")] +pub struct Model { + #[sea_orm(default_expr = "Expr::current_timestamp()")] + pub created_at: DateTimeUtc, + #[sea_orm(default_expr = "Expr::current_timestamp()")] + pub updated_at: DateTimeUtc, + #[sea_orm(primary_key)] + pub id: i32, + pub subscriber_id: i32, + pub credential_type: Credential3rdType, + pub cookies: Option, + pub username: Option, + pub password: Option, + pub user_agent: Option, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::subscribers::Entity", + from = "Column::SubscriberId", + to = "super::subscribers::Column::Id", + on_update = "Cascade", + on_delete = "Cascade" + )] + Subscriber, + #[sea_orm(has_many = "super::subscriptions::Entity")] + Subscription, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::Subscriber.def() + } +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::Subscription.def() + } +} + +#[async_trait] +impl ActiveModelBehavior for ActiveModel {} + +impl ActiveModel { + pub async fn try_encrypt(mut self, ctx: Arc) -> RecorderResult { + let crypto = ctx.crypto(); + + if let ActiveValue::Set(Some(username)) = self.username { + let username_enc = crypto.encrypt_credentials(&username)?; + self.username = ActiveValue::Set(Some(username_enc)); + } + + if let ActiveValue::Set(Some(password)) = self.password { + let password_enc = crypto.encrypt_credentials(&password)?; + self.password = ActiveValue::Set(Some(password_enc)); + } + + if let ActiveValue::Set(Some(cookies)) = self.cookies { + let cookies_enc = crypto.encrypt_credentials(&cookies)?; + self.cookies = ActiveValue::Set(Some(cookies_enc)); + } + + Ok(self) + } +} + +impl Model { + pub async fn find_by_id( + ctx: Arc, + id: i32, + ) -> RecorderResult> { + let db = ctx.db(); + let credential = Entity::find_by_id(id).one(db).await?; + + Ok(credential) + } + + pub fn try_into_userpass_credential( + self, + ctx: Arc, + ) -> RecorderResult { + let crypto = ctx.crypto(); + let username_enc = self + .username + .ok_or_else(|| RecorderError::Credential3rdError { + message: "UserPassCredential username is required".to_string(), + source: None.into(), + })?; + + let username: String = crypto.decrypt_credentials(&username_enc)?; + + let password_enc = self + .password + .ok_or_else(|| RecorderError::Credential3rdError { + message: "UserPassCredential password is required".to_string(), + source: None.into(), + })?; + + let password: String = crypto.decrypt_credentials(&password_enc)?; + + let cookies: Option = if let Some(cookies_enc) = self.cookies { + let cookies = crypto.decrypt_credentials(&cookies_enc)?; + Some(cookies) + } else { + None + }; + + Ok(UserPassCredential { + username, + password, + cookies, + user_agent: self.user_agent, + }) + } +} diff --git a/apps/recorder/src/models/episodes.rs b/apps/recorder/src/models/episodes.rs index d9e4325..2d636d3 100644 --- a/apps/recorder/src/models/episodes.rs +++ b/apps/recorder/src/models/episodes.rs @@ -9,7 +9,7 @@ use crate::{ app::AppContextTrait, errors::RecorderResult, extract::{ - mikan::{MikanEpisodeMeta, build_mikan_episode_homepage}, + mikan::{MikanEpisodeMeta, build_mikan_episode_homepage_url}, rawname::parse_episode_meta_from_raw_name, }, }; @@ -200,8 +200,10 @@ impl ActiveModel { }) .ok() .unwrap_or_default(); - let homepage = - build_mikan_episode_homepage(ctx.mikan().base_url().clone(), &item.mikan_episode_id); + let homepage = build_mikan_episode_homepage_url( + ctx.mikan().base_url().clone(), + &item.mikan_episode_id, + ); Ok(Self { mikan_episode_id: ActiveValue::Set(Some(item.mikan_episode_id)), diff --git a/apps/recorder/src/models/mod.rs b/apps/recorder/src/models/mod.rs index 788c98f..5f02cc8 100644 --- a/apps/recorder/src/models/mod.rs +++ b/apps/recorder/src/models/mod.rs @@ -1,5 +1,6 @@ pub mod auth; pub mod bangumi; +pub mod credential_3rd; pub mod downloaders; pub mod downloads; pub mod episodes; diff --git a/apps/recorder/src/models/subscriptions.rs b/apps/recorder/src/models/subscriptions.rs index 8f89275..23b0f38 100644 --- a/apps/recorder/src/models/subscriptions.rs +++ b/apps/recorder/src/models/subscriptions.rs @@ -11,7 +11,7 @@ use crate::{ errors::RecorderResult, extract::{ mikan::{ - build_mikan_bangumi_homepage, build_mikan_bangumi_rss_link, + build_mikan_bangumi_homepage_url, build_mikan_bangumi_rss_url, extract_mikan_bangumi_meta_from_bangumi_homepage, extract_mikan_episode_meta_from_episode_homepage, extract_mikan_rss_channel_from_rss_link, @@ -54,6 +54,7 @@ pub struct Model { pub category: SubscriptionCategory, pub source_url: String, pub enabled: bool, + pub credential_id: Option, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] @@ -74,6 +75,14 @@ pub enum Relation { SubscriptionEpisode, #[sea_orm(has_many = "super::subscription_bangumi::Entity")] SubscriptionBangumi, + #[sea_orm( + belongs_to = "super::credential_3rd::Entity", + from = "Column::CredentialId", + to = "super::credential_3rd::Column::Id", + on_update = "Cascade", + on_delete = "SetNull" + )] + Credential3rd, } impl Related for Entity { @@ -122,6 +131,12 @@ impl Related for Entity { } } +impl Related for Entity { + fn to() -> RelationDef { + Relation::Credential3rd.def() + } +} + #[derive(Copy, Clone, Debug, EnumIter, DeriveRelatedEntity)] pub enum RelatedEntity { #[sea_orm(entity = "super::subscribers::Entity")] @@ -134,6 +149,8 @@ pub enum RelatedEntity { SubscriptionEpisode, #[sea_orm(entity = "super::subscription_bangumi::Entity")] SubscriptionBangumi, + #[sea_orm(entity = "super::credential_3rd::Entity")] + Credential3rd, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -270,12 +287,12 @@ impl Model { for ((mikan_bangumi_id, mikan_fansub_id), new_ep_metas) in new_mikan_bangumi_groups { let mikan_base_url = ctx.mikan().base_url(); - let bgm_homepage = build_mikan_bangumi_homepage( + let bgm_homepage = build_mikan_bangumi_homepage_url( mikan_base_url.clone(), &mikan_bangumi_id, Some(&mikan_fansub_id), ); - let bgm_rss_link = build_mikan_bangumi_rss_link( + let bgm_rss_link = build_mikan_bangumi_rss_url( mikan_base_url.clone(), &mikan_bangumi_id, Some(&mikan_fansub_id), diff --git a/apps/recorder/src/tasks/config.rs b/apps/recorder/src/tasks/config.rs new file mode 100644 index 0000000..7c63057 --- /dev/null +++ b/apps/recorder/src/tasks/config.rs @@ -0,0 +1,4 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TaskConfig {} diff --git a/apps/recorder/src/tasks/core.rs b/apps/recorder/src/tasks/core.rs deleted file mode 100644 index d648700..0000000 --- a/apps/recorder/src/tasks/core.rs +++ /dev/null @@ -1,279 +0,0 @@ -use std::{borrow::Cow, sync::Arc}; - -use async_stream::stream; -use futures::{Stream, StreamExt, pin_mut}; -use serde::{Serialize, de::DeserializeOwned}; -use tokio::sync::{RwLock, mpsc}; - -use crate::{ - app::AppContextTrait, - errors::app_error::{RecorderError, RecorderResult}, - models, -}; - -pub struct TaskMeta { - pub subscriber_id: i32, - pub task_id: i32, - pub task_kind: Cow<'static, str>, -} - -pub struct ReplayChannel { - sender: mpsc::UnboundedSender, - channels: Arc>>>, - buffer: Arc>>, -} - -impl ReplayChannel { - pub fn new(history: Vec) -> Self { - let (tx, mut rx) = mpsc::unbounded_channel::(); - let channels = Arc::new(RwLock::new(Vec::>::new())); - let buffer = Arc::new(RwLock::new(history)); - { - let channels = channels.clone(); - let buffer = buffer.clone(); - tokio::spawn(async move { - loop { - match rx.recv().await { - Some(value) => { - let mut w = buffer.write().await; - let senders = channels.read().await; - for s in senders.iter() { - if !s.is_closed() { - if let Err(err) = s.send(value.clone()) { - tracing::error!(err = %err, "replay-channel broadcast to other subscribers error"); - } - } - } - w.push(value); - } - None => { - drop(rx); - let mut cs = channels.write().await; - cs.clear(); - break; - } - } - } - }); - } - - Self { - sender: tx, - channels, - buffer, - } - } - - pub fn sender(&self) -> &mpsc::UnboundedSender { - &self.sender - } - - pub async fn receiver(&self) -> mpsc::UnboundedReceiver { - let (tx, rx) = mpsc::unbounded_channel(); - let items = self.buffer.read().await; - for item in items.iter() { - if let Err(err) = tx.send(item.clone()) { - tracing::error!(err = %err, "replay-channel send replay value to other subscribers error"); - } - } - if !self.sender.is_closed() { - let mut sw = self.channels.write().await; - sw.push(tx); - } - rx - } - - pub async fn close(&self) { - let mut senders = self.channels.write().await; - senders.clear(); - } -} - -pub trait StreamTaskCoreTrait: Sized { - type Request: Serialize + DeserializeOwned; - type Item: Serialize + DeserializeOwned; - - fn task_id(&self) -> i32; - - fn task_kind(&self) -> &str; - - fn new(meta: TaskMeta, request: Self::Request) -> Self; - - fn request(&self) -> &Self::Request; -} - -pub trait StreamTaskReplayLayoutTrait: StreamTaskCoreTrait { - fn history(&self) -> &[Arc>]; - - fn resume_from_model( - task: models::tasks::Model, - stream_items: Vec, - ) -> RecorderResult; - - fn running_receiver( - &self, - ) -> impl Future>>>>; - - #[allow(clippy::type_complexity)] - fn init_receiver( - &self, - ) -> impl Future< - Output = ( - mpsc::UnboundedSender>>, - mpsc::UnboundedReceiver>>, - ), - >; - - fn serialize_request(request: Self::Request) -> RecorderResult { - serde_json::to_value(request).map_err(RecorderError::from) - } - - fn serialize_item(item: RecorderResult) -> RecorderResult { - serde_json::to_value(item).map_err(RecorderError::from) - } - - fn deserialize_request(request: serde_json::Value) -> RecorderResult { - serde_json::from_value(request).map_err(RecorderError::from) - } - - fn deserialize_item(item: serde_json::Value) -> RecorderResult> { - serde_json::from_value(item).map_err(RecorderError::from) - } -} - -pub trait StreamTaskRunnerTrait: StreamTaskCoreTrait { - fn run( - context: Arc, - request: &Self::Request, - history: &[Arc>], - ) -> impl Stream>; -} - -pub trait StreamTaskReplayRunnerTrait: StreamTaskRunnerTrait + StreamTaskReplayLayoutTrait { - fn run_shared( - &self, - context: Arc, - ) -> impl Stream>> { - stream! { - if let Some(mut receiver) = self.running_receiver().await { - while let Some(item) = receiver.recv().await { - yield item - } - } else { - let (tx, _) = self.init_receiver().await; - let stream = Self::run(context, self.request(), self.history()); - - pin_mut!(stream); - - while let Some(item) = stream.next().await { - let item = Arc::new(item); - if let Err(err) = tx.send(item.clone()) { - tracing::error!(task_id = self.task_id(), task_kind = self.task_kind(), err = %err, "run shared send error"); - } - yield item - } - }; - - } - } -} - -pub struct StandardStreamTaskReplayLayout -where - Request: Serialize + DeserializeOwned, - Item: Serialize + DeserializeOwned + Sync + Send + 'static, -{ - pub meta: TaskMeta, - pub request: Request, - pub history: Vec>>, - #[allow(clippy::type_complexity)] - pub channel: Arc>>>>>, -} - -impl StreamTaskCoreTrait for StandardStreamTaskReplayLayout -where - Request: Serialize + DeserializeOwned, - Item: Serialize + DeserializeOwned + Sync + Send + 'static, -{ - type Request = Request; - type Item = Item; - - fn task_id(&self) -> i32 { - self.meta.task_id - } - - fn request(&self) -> &Self::Request { - &self.request - } - - fn task_kind(&self) -> &str { - &self.meta.task_kind - } - - fn new(meta: TaskMeta, request: Self::Request) -> Self { - Self { - meta, - request, - history: vec![], - channel: Arc::new(RwLock::new(None)), - } - } -} - -impl StreamTaskReplayLayoutTrait for StandardStreamTaskReplayLayout -where - Request: Serialize + DeserializeOwned, - Item: Serialize + DeserializeOwned + Sync + Send + 'static, -{ - fn history(&self) -> &[Arc>] { - &self.history - } - - fn resume_from_model( - task: models::tasks::Model, - stream_items: Vec, - ) -> RecorderResult { - Ok(Self { - meta: TaskMeta { - task_id: task.id, - subscriber_id: task.subscriber_id, - task_kind: Cow::Owned(task.task_type), - }, - request: Self::deserialize_request(task.request_data)?, - history: stream_items - .into_iter() - .map(|m| Self::deserialize_item(m.item).map(Arc::new)) - .collect::>>()?, - channel: Arc::new(RwLock::new(None)), - }) - } - - async fn running_receiver( - &self, - ) -> Option>>> { - if let Some(channel) = self.channel.read().await.as_ref() { - Some(channel.receiver().await) - } else { - None - } - } - - async fn init_receiver( - &self, - ) -> ( - mpsc::UnboundedSender>>, - mpsc::UnboundedReceiver>>, - ) { - let channel = ReplayChannel::new(self.history.clone()); - let rx = channel.receiver().await; - let sender = channel.sender().clone(); - - { - { - let mut w = self.channel.write().await; - *w = Some(channel); - } - } - (sender, rx) - } -} diff --git a/apps/recorder/src/tasks/mikan/extract_mikan_bangumis_meta_from_my_bangumi.rs b/apps/recorder/src/tasks/mikan/extract_mikan_bangumis_meta_from_my_bangumi.rs deleted file mode 100644 index 4d024e7..0000000 --- a/apps/recorder/src/tasks/mikan/extract_mikan_bangumis_meta_from_my_bangumi.rs +++ /dev/null @@ -1,37 +0,0 @@ -use std::sync::Arc; - -use futures::Stream; -use serde::{Deserialize, Serialize}; -use url::Url; - -use crate::{ - app::AppContextTrait, - errors::RecorderResult, - extract::mikan::{MikanAuthSecrecy, MikanBangumiMeta, web_extract}, - tasks::core::{StandardStreamTaskReplayLayout, StreamTaskRunnerTrait}, -}; - -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct ExtractMikanBangumisMetaFromMyBangumiRequest { - pub my_bangumi_page_url: Url, - pub auth_secrecy: Option, -} - -pub type ExtractMikanBangumisMetaFromMyBangumiTask = - StandardStreamTaskReplayLayout; - -impl StreamTaskRunnerTrait for ExtractMikanBangumisMetaFromMyBangumiTask { - fn run( - context: Arc, - request: &Self::Request, - history: &[Arc>], - ) -> impl Stream> { - let context = context.clone(); - web_extract::extract_mikan_bangumis_meta_from_my_bangumi_page( - context, - request.my_bangumi_page_url.clone(), - request.auth_secrecy.clone(), - history, - ) - } -} diff --git a/apps/recorder/src/tasks/mikan/extract_season_subscription.rs b/apps/recorder/src/tasks/mikan/extract_season_subscription.rs new file mode 100644 index 0000000..2eca1d2 --- /dev/null +++ b/apps/recorder/src/tasks/mikan/extract_season_subscription.rs @@ -0,0 +1,172 @@ +use std::{ops::Deref, sync::Arc}; + +use apalis::prelude::*; +use apalis_sql::postgres::PostgresStorage; +use fetch::fetch_html; +use serde::{Deserialize, Serialize}; +use snafu::OptionExt; + +use crate::{ + app::AppContextTrait, + errors::{RecorderError, RecorderResult}, + extract::mikan::{ + MikanBangumiMeta, MikanSeasonStr, build_mikan_season_flow_url, + extract_mikan_bangumi_indices_meta_from_season_flow_fragment, + web_extract::{ + MikanBangumiIndexMeta, build_mikan_bangumi_expand_subscribed_fragment_url, + extract_mikan_bangumi_meta_from_expand_subscribed_fragment, + }, + }, +}; + +const TASK_NAME: &str = "mikan_extract_season_subscription"; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ExtractMikanSeasonSubscriptionTask { + pub task_id: i32, + pub year: i32, + pub season_str: MikanSeasonStr, + pub credential_id: i32, + pub subscription_id: i32, + pub subscriber_id: i32, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ExtractMikanSeasonSubscriptionFansubsTask { + pub task_id: i32, + pub year: i32, + pub season_str: MikanSeasonStr, + pub credential_id: i32, + pub subscription_id: i32, + pub subscriber_id: i32, + pub bangumi_indices: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ExtractMikanSeasonSubscriptionTaskResult { + pub task_id: i32, + pub year: i32, + pub season_str: MikanSeasonStr, + pub credential_id: i32, + pub subscription_id: i32, + pub subscriber_id: i32, + pub bangumi_metas: Vec, +} + +pub async fn extract_mikan_season_subscription( + job: ExtractMikanSeasonSubscriptionTask, + data: Data>, +) -> RecorderResult> { + let ctx = data.deref(); + + let mikan_client = ctx + .mikan() + .fork_with_credential(ctx.clone(), Some(job.credential_id)) + .await?; + + let mikan_base_url = mikan_client.base_url().clone(); + + let season_flow_fragment_url = + build_mikan_season_flow_url(mikan_base_url.clone(), job.year, job.season_str); + + let season_flow_fragment = fetch_html(&mikan_client, season_flow_fragment_url.clone()).await?; + + let mut bangumi_indices = extract_mikan_bangumi_indices_meta_from_season_flow_fragment( + &season_flow_fragment, + mikan_base_url.clone(), + ); + + if bangumi_indices.is_empty() && !mikan_client.has_login().await? { + mikan_client.login().await?; + let season_flow_fragment = + fetch_html(&mikan_client, season_flow_fragment_url.clone()).await?; + bangumi_indices = extract_mikan_bangumi_indices_meta_from_season_flow_fragment( + &season_flow_fragment, + mikan_base_url.clone(), + ); + } + + Ok(GoTo::Next(ExtractMikanSeasonSubscriptionFansubsTask { + task_id: job.task_id, + year: job.year, + season_str: job.season_str, + credential_id: job.credential_id, + subscription_id: job.subscription_id, + subscriber_id: job.subscriber_id, + bangumi_indices, + })) +} + +pub async fn extract_mikan_season_subscription_fansubs( + job: ExtractMikanSeasonSubscriptionFansubsTask, + data: Data>, +) -> RecorderResult> { + let ctx = data.deref(); + + let mikan_client = ctx + .mikan() + .fork_with_credential(ctx.clone(), Some(job.credential_id)) + .await?; + + let bangumi_indices = job.bangumi_indices; + + let mut bangumi_metas = vec![]; + + let mikan_base_url = mikan_client.base_url().clone(); + + for bangumi_index in bangumi_indices { + let bangumi_title = bangumi_index.bangumi_title.clone(); + let bangumi_expand_subscribed_fragment_url = + build_mikan_bangumi_expand_subscribed_fragment_url( + mikan_base_url.clone(), + &bangumi_index.mikan_bangumi_id, + ); + let bangumi_expand_subscribed_fragment = + fetch_html(&mikan_client, bangumi_expand_subscribed_fragment_url).await?; + + let bangumi_meta = extract_mikan_bangumi_meta_from_expand_subscribed_fragment( + bangumi_index, + &bangumi_expand_subscribed_fragment, + mikan_base_url.clone(), + ) + .with_whatever_context::<_, String, RecorderError>(|| { + format!( + "failed to extract mikan bangumi fansub of title = {}", + bangumi_title + ) + })?; + + bangumi_metas.push(bangumi_meta); + } + + Ok(GoTo::Done(ExtractMikanSeasonSubscriptionTaskResult { + bangumi_metas, + credential_id: job.credential_id, + season_str: job.season_str, + subscriber_id: job.subscriber_id, + subscription_id: job.subscription_id, + task_id: job.task_id, + year: job.year, + })) +} + +pub fn register_extract_mikan_season_subscription_task( + monitor: Monitor, + ctx: Arc, +) -> RecorderResult<(Monitor, PostgresStorage>)> { + let pool = ctx.db().get_postgres_connection_pool().clone(); + let storage = PostgresStorage::new(pool); + + let steps = StepBuilder::new() + .step_fn(extract_mikan_season_subscription) + .step_fn(extract_mikan_season_subscription_fansubs); + + let worker = WorkerBuilder::new(TASK_NAME) + .catch_panic() + .enable_tracing() + .data(ctx) + .backend(storage.clone()) + .build_stepped(steps); + + Ok((monitor.register(worker), storage)) +} diff --git a/apps/recorder/src/tasks/mikan/mod.rs b/apps/recorder/src/tasks/mikan/mod.rs index 704f219..c2f1fde 100644 --- a/apps/recorder/src/tasks/mikan/mod.rs +++ b/apps/recorder/src/tasks/mikan/mod.rs @@ -1 +1,5 @@ -pub mod extract_mikan_bangumis_meta_from_my_bangumi; +mod extract_season_subscription; + +pub use extract_season_subscription::{ + ExtractMikanSeasonSubscriptionTask, register_extract_mikan_season_subscription_task, +}; diff --git a/apps/recorder/src/tasks/mod.rs b/apps/recorder/src/tasks/mod.rs index 00efa78..9c8035b 100644 --- a/apps/recorder/src/tasks/mod.rs +++ b/apps/recorder/src/tasks/mod.rs @@ -1,4 +1,6 @@ -pub mod core; +pub mod config; pub mod mikan; pub mod service; -pub mod registry; + +pub use config::TaskConfig; +pub use service::TaskService; diff --git a/apps/recorder/src/tasks/registry.rs b/apps/recorder/src/tasks/registry.rs deleted file mode 100644 index 8b13789..0000000 --- a/apps/recorder/src/tasks/registry.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/apps/recorder/src/tasks/service.rs b/apps/recorder/src/tasks/service.rs index 48f4696..bf327e9 100644 --- a/apps/recorder/src/tasks/service.rs +++ b/apps/recorder/src/tasks/service.rs @@ -1,4 +1,41 @@ -#[derive(Debug)] -pub struct TaskService {} +use std::{fmt::Debug, sync::Arc}; -impl TaskService {} +use apalis::prelude::*; +use apalis_sql::postgres::PostgresStorage; +use tokio::sync::Mutex; + +use super::{TaskConfig, mikan::register_extract_mikan_season_subscription_task}; +use crate::{app::AppContextTrait, errors::RecorderResult}; + +pub struct TaskService { + config: TaskConfig, + #[allow(dead_code)] + monitor: Arc>, + pub extract_mikan_season_subscription_task_storage: + PostgresStorage>, +} + +impl TaskService { + pub async fn from_config_and_ctx( + config: TaskConfig, + ctx: Arc, + ) -> RecorderResult { + let monitor = Monitor::new(); + let (monitor, extract_mikan_season_subscription_task_storage) = + register_extract_mikan_season_subscription_task(monitor, ctx.clone())?; + + Ok(Self { + config, + monitor: Arc::new(Mutex::new(monitor)), + extract_mikan_season_subscription_task_storage, + }) + } +} + +impl Debug for TaskService { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TaskService") + .field("config", &self.config) + .finish() + } +} diff --git a/apps/recorder/src/test_utils/app.rs b/apps/recorder/src/test_utils/app.rs index aa17845..5a75f6a 100644 --- a/apps/recorder/src/test_utils/app.rs +++ b/apps/recorder/src/test_utils/app.rs @@ -1,3 +1,5 @@ +use std::fmt::Debug; + use typed_builder::TypedBuilder; use crate::app::AppContextTrait; @@ -13,12 +15,20 @@ pub struct UnitTestAppContext { auth: Option, graphql: Option, storage: Option, + crypto: Option, + tasks: Option, #[builder(default = Some(String::from(env!("CARGO_MANIFEST_DIR"))))] working_dir: Option, #[builder(default = crate::app::Environment::Testing, setter(!strip_option))] environment: crate::app::Environment, } +impl Debug for UnitTestAppContext { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "UnitTestAppContext") + } +} + impl AppContextTrait for UnitTestAppContext { fn logger(&self) -> &crate::logger::LoggerService { self.logger.as_ref().expect("should set logger") @@ -59,4 +69,12 @@ impl AppContextTrait for UnitTestAppContext { fn working_dir(&self) -> &String { self.working_dir.as_ref().expect("should set working_dir") } + + fn crypto(&self) -> &crate::crypto::CryptoService { + self.crypto.as_ref().expect("should set crypto") + } + + fn task(&self) -> &crate::tasks::TaskService { + self.tasks.as_ref().expect("should set tasks") + } } diff --git a/apps/recorder/tests/resources/mikan/BangumiCoverFlow-2025-spring-noauth.html b/apps/recorder/tests/resources/mikan/BangumiCoverFlow-2025-spring-noauth.html new file mode 100644 index 0000000..be9589e --- /dev/null +++ b/apps/recorder/tests/resources/mikan/BangumiCoverFlow-2025-spring-noauth.html @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
>_< 您还没有订阅任何番组,快去首页添加订阅吧
+
\ No newline at end of file diff --git a/apps/recorder/tests/resources/mikan/BangumiCoverFlow-2025-spring.html b/apps/recorder/tests/resources/mikan/BangumiCoverFlow-2025-spring.html new file mode 100644 index 0000000..0f9ec56 --- /dev/null +++ b/apps/recorder/tests/resources/mikan/BangumiCoverFlow-2025-spring.html @@ -0,0 +1,840 @@ + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
\ No newline at end of file diff --git a/apps/recorder/tests/resources/mikan/ExpandBangumi-3599-noauth.html b/apps/recorder/tests/resources/mikan/ExpandBangumi-3599-noauth.html new file mode 100644 index 0000000..e9d8b6f --- /dev/null +++ b/apps/recorder/tests/resources/mikan/ExpandBangumi-3599-noauth.html @@ -0,0 +1,2790 @@ +
+ + + + + +
+
+
+
+
+
+ Summer Pockets  + + +
+
    +
  • +
    Kirara Fantasia
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 三明治摆烂组
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    ANi
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 萌樱字幕组&云歌字幕组
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 北宇治字幕组
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 樱桃花字幕组
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 云光字幕组
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 澄空学园&华盟字幕社&动漫国字幕组 +
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 桜都字幕组
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 星冈学园放送部
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 沸班亚马制作组
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    LoliHouse
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 喵萌奶茶屋
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 爱恋字幕社
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
  • +
    + 不当舔狗制作组
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • +
+
+ +
+
    +
  • +
  • +
  • +
+
+ +
+
+
+ +
+
+
+ +
+
+ + +
+
+
+ +
+
+ + +
+
+
+ +
+
+ +
+
+
+ +
+
+
+
+
+ +
+
+ + + + + +
+
+
+ +
+
+
\ No newline at end of file diff --git a/apps/recorder/tests/resources/mikan/ExpandBangumi-3599.html b/apps/recorder/tests/resources/mikan/ExpandBangumi-3599.html new file mode 100644 index 0000000..3b913c3 --- /dev/null +++ b/apps/recorder/tests/resources/mikan/ExpandBangumi-3599.html @@ -0,0 +1,2790 @@ +
+ + + + + +
+
+
+
+
+
+ Summer Pockets  + + +
+
    +
  • +
    LoliHouse
    +
    +
    +
    +
    + + 订阅设置 +
    +
    +

    订阅语言

    +
    + + + +
    +

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    +
    + +
    +
  • + + + + + + + + + + + + + + +
+
+ +
+
    +
+
+ +
+
+
+ + +
+
+
+ +
+
+ + +
+
+
+ +
+
+ + +
+
+
+ +
+
+ +
+
+
+ +
+
+
+
+
+ +
+
+ + + + +
+
+
+ +
+
+
\ No newline at end of file diff --git a/apps/recorder/tests/resources/mikan/ExpandBangumi.htm b/apps/recorder/tests/resources/mikan/ExpandBangumi.htm deleted file mode 100644 index 9dbb624..0000000 --- a/apps/recorder/tests/resources/mikan/ExpandBangumi.htm +++ /dev/null @@ -1,1466 +0,0 @@ -
- - - - - -
-
-
-
-
- -
    -
  • -
    LoliHouse
    -
    -
    -
    -
    - - 订阅设置 -
    -
    -

    订阅语言

    -
    - - - -
    -

    注:仅会显示订阅/RSS时适用,右侧一览仍为全部条目

    -
    - -
    -
  • - - - - - -
-
- -
-
    -
-
- -
-
-
- -
-
-
- -
-
-
-
-
- -
-
-
-
-
- -
-
-
-
-
- -
-
-
-
-
- -
-
- -
-
-
- -
-
-
diff --git a/apps/recorder/tests/resources/mikan/MyBangumi-noauth.htm b/apps/recorder/tests/resources/mikan/MyBangumi-noauth.htm deleted file mode 100644 index 48f516a..0000000 --- a/apps/recorder/tests/resources/mikan/MyBangumi-noauth.htm +++ /dev/null @@ -1,641 +0,0 @@ - - - - - - - - - - - - - - - - Mikan Project - 我的番组 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - - - - - - - diff --git a/apps/recorder/tests/resources/mikan/MyBangumi.htm b/apps/recorder/tests/resources/mikan/MyBangumi.htm deleted file mode 100644 index 17e280c..0000000 --- a/apps/recorder/tests/resources/mikan/MyBangumi.htm +++ /dev/null @@ -1,3180 +0,0 @@ - - - - - - - - - - - - - - - - - Mikan Project - 我的番组 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - - - - - - - -
-
- - diff --git a/packages/fetch/Cargo.toml b/packages/fetch/Cargo.toml index 56a89f3..555452a 100644 --- a/packages/fetch/Cargo.toml +++ b/packages/fetch/Cargo.toml @@ -16,9 +16,7 @@ axum-extra = { workspace = true } async-trait = { workspace = true } moka = { workspace = true } reqwest = { workspace = true } - leaky-bucket = "1.1" -cookie = "0.18" http-cache-reqwest = { version = "0.15", features = [ "manager-cacache", "manager-moka", @@ -33,3 +31,4 @@ http-cache = { version = "0.20", features = [ "manager-cacache", "manager-moka", ], default-features = false } +reqwest_cookie_store = { version = "0.8.0", features = ["serde"] } diff --git a/packages/fetch/src/client/core.rs b/packages/fetch/src/client/core.rs index 3ab687d..94d318d 100644 --- a/packages/fetch/src/client/core.rs +++ b/packages/fetch/src/client/core.rs @@ -6,7 +6,8 @@ use http_cache_reqwest::{ Cache, CacheManager, CacheMode, HttpCache, HttpCacheOptions, MokaManager, }; use leaky_bucket::RateLimiter; -use reqwest::{ClientBuilder, Request, Response}; +use reqwest::{self, ClientBuilder, Request, Response}; +use reqwest_cookie_store::{CookieStore, CookieStoreRwLock}; use reqwest_middleware::{ ClientBuilder as ClientWithMiddlewareBuilder, ClientWithMiddleware, Middleware, Next, }; @@ -16,7 +17,6 @@ use serde::{Deserialize, Serialize}; use serde_with::serde_as; use snafu::Snafu; -use super::HttpClientSecrecyDataTrait; use crate::get_random_mobile_ua; pub struct RateLimiterMiddleware { @@ -109,6 +109,8 @@ pub enum HttpClientError { ReqwestMiddlewareError { source: reqwest_middleware::Error }, #[snafu(transparent)] HttpError { source: http::Error }, + #[snafu(display("Failed to parse cookies: {}", source))] + ParseCookiesError { source: serde_json::Error }, } pub trait HttpClientTrait: Deref + Debug {} @@ -117,17 +119,29 @@ pub struct HttpClientFork { pub client_builder: ClientBuilder, pub middleware_stack: Vec>, pub config: HttpClientConfig, + pub cookie_store: Option>, } impl HttpClientFork { - pub fn attach_secrecy(self, secrecy: S) -> Self { - let mut fork = self; - fork.client_builder = secrecy.attach_secrecy_to_client(fork.client_builder); - fork + pub fn attach_cookies(mut self, cookies: &str) -> Result { + let cookie_store: CookieStore = serde_json::from_str(cookies) + .map_err(|err| HttpClientError::ParseCookiesError { source: err })?; + + let cookies_store = Arc::new(CookieStoreRwLock::new(cookie_store)); + + self.cookie_store = Some(cookies_store.clone()); + self.client_builder = self.client_builder.cookie_provider(cookies_store); + Ok(self) + } + + pub fn attach_user_agent(mut self, user_agent: &str) -> Self { + self.client_builder = self.client_builder.user_agent(user_agent); + self } } pub struct HttpClient { + pub cookie_store: Option>, client: ClientWithMiddleware, middleware_stack: Vec>, pub config: HttpClientConfig, @@ -268,6 +282,7 @@ impl HttpClient { client: reqwest_with_middleware, middleware_stack, config, + cookie_store: None, }) } @@ -287,6 +302,7 @@ impl HttpClient { client_builder: reqwest_client_builder, middleware_stack: self.middleware_stack.clone(), config: self.config.clone(), + cookie_store: self.cookie_store.clone(), } } @@ -295,6 +311,7 @@ impl HttpClient { client_builder, middleware_stack, config, + cookie_store, } = fork; let reqwest_client = client_builder.build()?; let mut reqwest_with_middleware_builder = ClientWithMiddlewareBuilder::new(reqwest_client); @@ -309,6 +326,7 @@ impl HttpClient { client: reqwest_with_middleware, middleware_stack, config, + cookie_store, }) } } diff --git a/packages/fetch/src/client/mod.rs b/packages/fetch/src/client/mod.rs index cc0b773..636fe9a 100644 --- a/packages/fetch/src/client/mod.rs +++ b/packages/fetch/src/client/mod.rs @@ -1,9 +1,6 @@ pub mod core; -pub mod secrecy; pub use core::{ HttpClient, HttpClientCacheBackendConfig, HttpClientCachePresetConfig, HttpClientConfig, HttpClientError, HttpClientTrait, }; - -pub use secrecy::{HttpClientCookiesAuth, HttpClientSecrecyDataTrait}; diff --git a/packages/fetch/src/client/secrecy.rs b/packages/fetch/src/client/secrecy.rs deleted file mode 100644 index 93963bb..0000000 --- a/packages/fetch/src/client/secrecy.rs +++ /dev/null @@ -1,47 +0,0 @@ -use std::sync::Arc; - -use cookie::Cookie; -use reqwest::{ClientBuilder, cookie::Jar}; -use url::Url; - -use crate::FetchError; - -pub trait HttpClientSecrecyDataTrait { - fn attach_secrecy_to_client(&self, client_builder: ClientBuilder) -> ClientBuilder { - client_builder - } -} - -#[derive(Default)] -pub struct HttpClientCookiesAuth { - pub cookie_jar: Arc, - pub user_agent: Option, -} - -impl HttpClientCookiesAuth { - pub fn from_cookies( - cookies: &str, - url: &Url, - user_agent: Option, - ) -> Result { - let cookie_jar = Arc::new(Jar::default()); - for cookie in Cookie::split_parse(cookies).collect::>, _>>()? { - cookie_jar.add_cookie_str(&cookie.to_string(), url); - } - - Ok(Self { - cookie_jar, - user_agent, - }) - } -} - -impl HttpClientSecrecyDataTrait for HttpClientCookiesAuth { - fn attach_secrecy_to_client(&self, client_builder: ClientBuilder) -> ClientBuilder { - let mut client_builder = client_builder.cookie_provider(self.cookie_jar.clone()); - if let Some(ref user_agent) = self.user_agent { - client_builder = client_builder.user_agent(user_agent); - } - client_builder - } -} diff --git a/packages/fetch/src/errors.rs b/packages/fetch/src/errors.rs index 9656ca2..38125f7 100644 --- a/packages/fetch/src/errors.rs +++ b/packages/fetch/src/errors.rs @@ -3,8 +3,6 @@ use snafu::Snafu; #[derive(Debug, Snafu)] #[snafu(visibility(pub))] pub enum FetchError { - #[snafu(transparent)] - CookieParseError { source: cookie::ParseError }, #[snafu(transparent)] ReqwestError { source: reqwest::Error }, #[snafu(transparent)] diff --git a/packages/fetch/src/lib.rs b/packages/fetch/src/lib.rs index 2d427b4..220a5e1 100644 --- a/packages/fetch/src/lib.rs +++ b/packages/fetch/src/lib.rs @@ -9,10 +9,7 @@ pub mod test_util; pub use core::get_random_mobile_ua; pub use bytes::fetch_bytes; -pub use client::{ - HttpClient, HttpClientConfig, HttpClientCookiesAuth, HttpClientError, - HttpClientSecrecyDataTrait, HttpClientTrait, -}; +pub use client::{HttpClient, HttpClientConfig, HttpClientError, HttpClientTrait}; pub use errors::FetchError; pub use html::fetch_html; pub use image::fetch_image;