feat: classic episodes scraper
This commit is contained in:
		
							parent
							
								
									28dd9da6ac
								
							
						
					
					
						commit
						fe0b7e88e6
					
				
							
								
								
									
										995
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										995
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -2,8 +2,20 @@
 | 
				
			|||||||
name = "recorder"
 | 
					name = "recorder"
 | 
				
			||||||
version = "0.1.0"
 | 
					version = "0.1.0"
 | 
				
			||||||
edition = "2024"
 | 
					edition = "2024"
 | 
				
			||||||
 | 
					 | 
				
			||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
					# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[features]
 | 
				
			||||||
 | 
					default = ["jxl"]
 | 
				
			||||||
 | 
					playground = ["dep:inquire", "dep:color-eyre", "dep:polars"]
 | 
				
			||||||
 | 
					testcontainers = [
 | 
				
			||||||
 | 
					    "dep:testcontainers",
 | 
				
			||||||
 | 
					    "dep:testcontainers-modules",
 | 
				
			||||||
 | 
					    "dep:testcontainers-ext",
 | 
				
			||||||
 | 
					    "downloader/testcontainers",
 | 
				
			||||||
 | 
					    "testcontainers-modules/postgres",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					jxl = ["dep:jpegxl-rs", "dep:jpegxl-sys"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[lib]
 | 
					[lib]
 | 
				
			||||||
name = "recorder"
 | 
					name = "recorder"
 | 
				
			||||||
path = "src/lib.rs"
 | 
					path = "src/lib.rs"
 | 
				
			||||||
@ -13,17 +25,25 @@ name = "recorder_cli"
 | 
				
			|||||||
path = "src/bin/main.rs"
 | 
					path = "src/bin/main.rs"
 | 
				
			||||||
required-features = []
 | 
					required-features = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[features]
 | 
					[[example]]
 | 
				
			||||||
default = ["jxl"]
 | 
					name = "mikan_collect_classic_eps"
 | 
				
			||||||
playground = ["dep:inquire", "dep:color-eyre"]
 | 
					path = "examples/mikan_collect_classic_eps.rs"
 | 
				
			||||||
testcontainers = [
 | 
					required-features = ["playground"]
 | 
				
			||||||
    "dep:testcontainers",
 | 
					
 | 
				
			||||||
    "dep:testcontainers-modules",
 | 
					[[example]]
 | 
				
			||||||
    "dep:testcontainers-ext",
 | 
					name = "mikan_doppel_season_subscription"
 | 
				
			||||||
    "downloader/testcontainers",
 | 
					path = "examples/mikan_doppel_season_subscription.rs"
 | 
				
			||||||
    "testcontainers-modules/postgres",
 | 
					required-features = ["playground"]
 | 
				
			||||||
]
 | 
					
 | 
				
			||||||
jxl = ["dep:jpegxl-rs", "dep:jpegxl-sys"]
 | 
					[[example]]
 | 
				
			||||||
 | 
					name = "mikan_doppel_subscriber_subscription"
 | 
				
			||||||
 | 
					path = "examples/mikan_doppel_subscriber_subscription.rs"
 | 
				
			||||||
 | 
					required-features = ["playground"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[example]]
 | 
				
			||||||
 | 
					name = "playground"
 | 
				
			||||||
 | 
					path = "examples/playground.rs"
 | 
				
			||||||
 | 
					required-features = ["playground"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[dependencies]
 | 
					[dependencies]
 | 
				
			||||||
downloader = { workspace = true }
 | 
					downloader = { workspace = true }
 | 
				
			||||||
@ -93,7 +113,7 @@ fancy-regex = "0.14"
 | 
				
			|||||||
lightningcss = "1.0.0-alpha.66"
 | 
					lightningcss = "1.0.0-alpha.66"
 | 
				
			||||||
html-escape = "0.2.13"
 | 
					html-escape = "0.2.13"
 | 
				
			||||||
opendal = { version = "0.53", features = ["default", "services-fs"] }
 | 
					opendal = { version = "0.53", features = ["default", "services-fs"] }
 | 
				
			||||||
scraper = "0.23"
 | 
					scraper = "0.23.1"
 | 
				
			||||||
async-graphql = { version = "7", features = ["dynamic-schema"] }
 | 
					async-graphql = { version = "7", features = ["dynamic-schema"] }
 | 
				
			||||||
async-graphql-axum = "7"
 | 
					async-graphql-axum = "7"
 | 
				
			||||||
seaography = { version = "1.1", features = [
 | 
					seaography = { version = "1.1", features = [
 | 
				
			||||||
@ -134,11 +154,11 @@ icu = "2.0.0"
 | 
				
			|||||||
tracing-tree = "0.4.0"
 | 
					tracing-tree = "0.4.0"
 | 
				
			||||||
num_cpus = "1.17.0"
 | 
					num_cpus = "1.17.0"
 | 
				
			||||||
headers-accept = "0.1.4"
 | 
					headers-accept = "0.1.4"
 | 
				
			||||||
 | 
					polars = { version = "0.49.1", features = ["parquet"], optional = true }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[dev-dependencies]
 | 
					[dev-dependencies]
 | 
				
			||||||
inquire = { workspace = true }
 | 
					inquire = { workspace = true }
 | 
				
			||||||
color-eyre = { workspace = true }
 | 
					color-eyre = { workspace = true }
 | 
				
			||||||
 | 
					 | 
				
			||||||
serial_test = "3"
 | 
					serial_test = "3"
 | 
				
			||||||
insta = { version = "1", features = ["redactions", "toml", "filters"] }
 | 
					insta = { version = "1", features = ["redactions", "toml", "filters"] }
 | 
				
			||||||
rstest = "0.25"
 | 
					rstest = "0.25"
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										443
									
								
								apps/recorder/examples/mikan_collect_classic_eps.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										443
									
								
								apps/recorder/examples/mikan_collect_classic_eps.rs
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,443 @@
 | 
				
			|||||||
 | 
					use std::collections::HashSet;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use chrono::{DateTime, Duration, FixedOffset, NaiveDate, NaiveTime, TimeZone, Utc};
 | 
				
			||||||
 | 
					use fetch::{HttpClientConfig, fetch_html};
 | 
				
			||||||
 | 
					use lazy_static::lazy_static;
 | 
				
			||||||
 | 
					use nom::{
 | 
				
			||||||
 | 
					    IResult, Parser,
 | 
				
			||||||
 | 
					    branch::alt,
 | 
				
			||||||
 | 
					    bytes::complete::{tag, take, take_till1},
 | 
				
			||||||
 | 
					    character::complete::space1,
 | 
				
			||||||
 | 
					    combinator::map,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					use recorder::{
 | 
				
			||||||
 | 
					    errors::{RecorderError, RecorderResult},
 | 
				
			||||||
 | 
					    extract::{
 | 
				
			||||||
 | 
					        html::extract_inner_text_from_element_ref,
 | 
				
			||||||
 | 
					        mikan::{MikanClient, MikanConfig, MikanEpisodeHash, MikanFansubHash},
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					use regex::Regex;
 | 
				
			||||||
 | 
					use scraper::{ElementRef, Html, Selector};
 | 
				
			||||||
 | 
					use snafu::FromString;
 | 
				
			||||||
 | 
					use url::Url;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					lazy_static! {
 | 
				
			||||||
 | 
					    static ref TEST_FOLDER: std::path::PathBuf =
 | 
				
			||||||
 | 
					        if cfg!(any(test, debug_assertions, feature = "playground")) {
 | 
				
			||||||
 | 
					            std::path::PathBuf::from(format!(
 | 
				
			||||||
 | 
					                "{}/tests/resources/mikan/classic_episodes",
 | 
				
			||||||
 | 
					                env!("CARGO_MANIFEST_DIR")
 | 
				
			||||||
 | 
					            ))
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            std::path::PathBuf::from("tests/resources/mikan/classic_episodes")
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					lazy_static! {
 | 
				
			||||||
 | 
					    static ref TOTAL_PAGE_REGEX: Regex =
 | 
				
			||||||
 | 
					        Regex::new(r#"\$\(\'\.classic-view-pagination2\'\)\.bootpag\(\{\s*total:\s*(\d+)"#)
 | 
				
			||||||
 | 
					            .unwrap();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub struct MikanClassicEpisodeTableRow {
 | 
				
			||||||
 | 
					    pub id: i32,
 | 
				
			||||||
 | 
					    pub publish_at: DateTime<Utc>,
 | 
				
			||||||
 | 
					    pub mikan_fansub_id: Option<String>,
 | 
				
			||||||
 | 
					    pub fansub_name: Option<String>,
 | 
				
			||||||
 | 
					    pub mikan_episode_id: String,
 | 
				
			||||||
 | 
					    pub original_name: String,
 | 
				
			||||||
 | 
					    pub magnet_link: Option<String>,
 | 
				
			||||||
 | 
					    pub file_size: Option<String>,
 | 
				
			||||||
 | 
					    pub torrent_link: Option<String>,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl MikanClassicEpisodeTableRow {
 | 
				
			||||||
 | 
					    fn timezone() -> FixedOffset {
 | 
				
			||||||
 | 
					        FixedOffset::east_opt(8 * 3600).unwrap()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn fixed_date_parser(input: &str) -> IResult<&str, NaiveDate> {
 | 
				
			||||||
 | 
					        alt((
 | 
				
			||||||
 | 
					            map(tag("今天"), move |_| {
 | 
				
			||||||
 | 
					                Utc::now().with_timezone(&Self::timezone()).date_naive()
 | 
				
			||||||
 | 
					            }),
 | 
				
			||||||
 | 
					            map(tag("昨天"), move |_| {
 | 
				
			||||||
 | 
					                Utc::now().with_timezone(&Self::timezone()).date_naive() - Duration::days(1)
 | 
				
			||||||
 | 
					            }),
 | 
				
			||||||
 | 
					        ))
 | 
				
			||||||
 | 
					        .parse(input)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn formatted_date_parser(input: &str) -> IResult<&str, NaiveDate> {
 | 
				
			||||||
 | 
					        let (remain, date_str) = take_till1(|c: char| c.is_whitespace()).parse(input)?;
 | 
				
			||||||
 | 
					        let date = NaiveDate::parse_from_str(date_str, "%Y/%m/%d").map_err(|_| {
 | 
				
			||||||
 | 
					            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify))
 | 
				
			||||||
 | 
					        })?;
 | 
				
			||||||
 | 
					        Ok((remain, date))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn date_parser(input: &str) -> IResult<&str, NaiveDate> {
 | 
				
			||||||
 | 
					        alt((Self::fixed_date_parser, Self::formatted_date_parser)).parse(input)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn time_parser(input: &str) -> IResult<&str, NaiveTime> {
 | 
				
			||||||
 | 
					        let (remain, time_str) = take(5usize).parse(input)?;
 | 
				
			||||||
 | 
					        let time = NaiveTime::parse_from_str(time_str, "%H:%M").map_err(|_| {
 | 
				
			||||||
 | 
					            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify))
 | 
				
			||||||
 | 
					        })?;
 | 
				
			||||||
 | 
					        Ok((remain, time))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn extract_publish_at(text: &str) -> Option<DateTime<Utc>> {
 | 
				
			||||||
 | 
					        let (_, (date, _, time)) = (Self::date_parser, space1, Self::time_parser)
 | 
				
			||||||
 | 
					            .parse(text)
 | 
				
			||||||
 | 
					            .ok()?;
 | 
				
			||||||
 | 
					        let local_dt = Self::timezone()
 | 
				
			||||||
 | 
					            .from_local_datetime(&date.and_time(time))
 | 
				
			||||||
 | 
					            .single()?;
 | 
				
			||||||
 | 
					        Some(local_dt.with_timezone(&Utc))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn from_element_ref(
 | 
				
			||||||
 | 
					        row: ElementRef<'_>,
 | 
				
			||||||
 | 
					        rev_id: i32,
 | 
				
			||||||
 | 
					        idx: i32,
 | 
				
			||||||
 | 
					        mikan_base_url: &Url,
 | 
				
			||||||
 | 
					    ) -> RecorderResult<Self> {
 | 
				
			||||||
 | 
					        let publish_at_selector = &Selector::parse("td:nth-of-type(1)").unwrap();
 | 
				
			||||||
 | 
					        let fansub_selector = &Selector::parse("td:nth-of-type(2) > a").unwrap();
 | 
				
			||||||
 | 
					        let original_name_selector =
 | 
				
			||||||
 | 
					            &Selector::parse("td:nth-of-type(3) > a:nth-of-type(1)").unwrap();
 | 
				
			||||||
 | 
					        let magnet_link_selector =
 | 
				
			||||||
 | 
					            &Selector::parse("td:nth-of-type(3) > a:nth-of-type(2)").unwrap();
 | 
				
			||||||
 | 
					        let file_size_selector = &Selector::parse("td:nth-of-type(4)").unwrap();
 | 
				
			||||||
 | 
					        let torrent_link_selector = &Selector::parse("td:nth-of-type(5) > a").unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let publish_at = row
 | 
				
			||||||
 | 
					            .select(publish_at_selector)
 | 
				
			||||||
 | 
					            .next()
 | 
				
			||||||
 | 
					            .map(extract_inner_text_from_element_ref)
 | 
				
			||||||
 | 
					            .and_then(|e| Self::extract_publish_at(&e));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let (mikan_fansub_hash, fansub_name) = row
 | 
				
			||||||
 | 
					            .select(fansub_selector)
 | 
				
			||||||
 | 
					            .next()
 | 
				
			||||||
 | 
					            .and_then(|e| {
 | 
				
			||||||
 | 
					                e.attr("href")
 | 
				
			||||||
 | 
					                    .and_then(|s| mikan_base_url.join(s).ok())
 | 
				
			||||||
 | 
					                    .and_then(|u| MikanFansubHash::from_homepage_url(&u))
 | 
				
			||||||
 | 
					                    .map(|h| (h, extract_inner_text_from_element_ref(e)))
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					            .unzip();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let (mikan_episode_hash, original_name) = row
 | 
				
			||||||
 | 
					            .select(original_name_selector)
 | 
				
			||||||
 | 
					            .next()
 | 
				
			||||||
 | 
					            .and_then(|el| {
 | 
				
			||||||
 | 
					                el.attr("href")
 | 
				
			||||||
 | 
					                    .and_then(|s| mikan_base_url.join(s).ok())
 | 
				
			||||||
 | 
					                    .and_then(|u| MikanEpisodeHash::from_homepage_url(&u))
 | 
				
			||||||
 | 
					                    .map(|h| (h, extract_inner_text_from_element_ref(el)))
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					            .unzip();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let magnet_link = row
 | 
				
			||||||
 | 
					            .select(magnet_link_selector)
 | 
				
			||||||
 | 
					            .next()
 | 
				
			||||||
 | 
					            .and_then(|el| el.attr("data-clipboard-text"));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let file_size = row
 | 
				
			||||||
 | 
					            .select(file_size_selector)
 | 
				
			||||||
 | 
					            .next()
 | 
				
			||||||
 | 
					            .map(extract_inner_text_from_element_ref);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let torrent_link = row
 | 
				
			||||||
 | 
					            .select(torrent_link_selector)
 | 
				
			||||||
 | 
					            .next()
 | 
				
			||||||
 | 
					            .and_then(|el| el.attr("href"));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if let (Some(mikan_episode_hash), Some(original_name), Some(publish_at)) = (
 | 
				
			||||||
 | 
					            mikan_episode_hash.as_ref(),
 | 
				
			||||||
 | 
					            original_name.as_ref(),
 | 
				
			||||||
 | 
					            publish_at.as_ref(),
 | 
				
			||||||
 | 
					        ) {
 | 
				
			||||||
 | 
					            Ok(Self {
 | 
				
			||||||
 | 
					                id: rev_id * 1000 + idx,
 | 
				
			||||||
 | 
					                publish_at: *publish_at,
 | 
				
			||||||
 | 
					                mikan_fansub_id: mikan_fansub_hash.map(|h| h.mikan_fansub_id.clone()),
 | 
				
			||||||
 | 
					                fansub_name,
 | 
				
			||||||
 | 
					                mikan_episode_id: mikan_episode_hash.mikan_episode_id.clone(),
 | 
				
			||||||
 | 
					                original_name: original_name.clone(),
 | 
				
			||||||
 | 
					                magnet_link: magnet_link.map(|s| s.to_string()),
 | 
				
			||||||
 | 
					                file_size: file_size.map(|s| s.to_string()),
 | 
				
			||||||
 | 
					                torrent_link: torrent_link.map(|s| s.to_string()),
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            let mut missing_fields = vec![];
 | 
				
			||||||
 | 
					            if mikan_episode_hash.is_none() {
 | 
				
			||||||
 | 
					                missing_fields.push("mikan_episode_id");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if original_name.is_none() {
 | 
				
			||||||
 | 
					                missing_fields.push("original_name");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if publish_at.is_none() {
 | 
				
			||||||
 | 
					                missing_fields.push("publish_at");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Err(RecorderError::without_source(format!(
 | 
				
			||||||
 | 
					                "Failed to parse episode table row, missing fields: {missing_fields:?}, row \
 | 
				
			||||||
 | 
					                 index: {idx}"
 | 
				
			||||||
 | 
					            )))
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub struct MikanClassicEpisodeTablePage {
 | 
				
			||||||
 | 
					    pub page: i32,
 | 
				
			||||||
 | 
					    pub total: i32,
 | 
				
			||||||
 | 
					    pub html: String,
 | 
				
			||||||
 | 
					    pub rows: Vec<MikanClassicEpisodeTableRow>,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl MikanClassicEpisodeTablePage {
 | 
				
			||||||
 | 
					    pub fn from_html(
 | 
				
			||||||
 | 
					        html: String,
 | 
				
			||||||
 | 
					        mikan_base_url: &Url,
 | 
				
			||||||
 | 
					        page: i32,
 | 
				
			||||||
 | 
					        updated_info: Option<(i32, i32)>,
 | 
				
			||||||
 | 
					    ) -> RecorderResult<Self> {
 | 
				
			||||||
 | 
					        let tr_selector = &Selector::parse("tbody tr").unwrap();
 | 
				
			||||||
 | 
					        let doc = Html::parse_document(&html);
 | 
				
			||||||
 | 
					        if let Some(mut total) = TOTAL_PAGE_REGEX
 | 
				
			||||||
 | 
					            .captures(&html)
 | 
				
			||||||
 | 
					            .and_then(|c| c.get(1))
 | 
				
			||||||
 | 
					            .and_then(|s| s.as_str().parse::<i32>().ok())
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            if let Some((_, update_total)) = updated_info {
 | 
				
			||||||
 | 
					                total = update_total;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            let rev_id = total - page;
 | 
				
			||||||
 | 
					            let rows = doc
 | 
				
			||||||
 | 
					                .select(tr_selector)
 | 
				
			||||||
 | 
					                .rev()
 | 
				
			||||||
 | 
					                .enumerate()
 | 
				
			||||||
 | 
					                .map(|(idx, tr)| {
 | 
				
			||||||
 | 
					                    MikanClassicEpisodeTableRow::from_element_ref(
 | 
				
			||||||
 | 
					                        tr,
 | 
				
			||||||
 | 
					                        rev_id,
 | 
				
			||||||
 | 
					                        idx as i32,
 | 
				
			||||||
 | 
					                        mikan_base_url,
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					                })
 | 
				
			||||||
 | 
					                .collect::<RecorderResult<Vec<_>>>()?;
 | 
				
			||||||
 | 
					            Ok(Self {
 | 
				
			||||||
 | 
					                page,
 | 
				
			||||||
 | 
					                total,
 | 
				
			||||||
 | 
					                html,
 | 
				
			||||||
 | 
					                rows,
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            Err(RecorderError::without_source(
 | 
				
			||||||
 | 
					                "Failed to parse pagination meta and rows".into(),
 | 
				
			||||||
 | 
					            ))
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn save_to_files(&self) -> RecorderResult<()> {
 | 
				
			||||||
 | 
					        use polars::prelude::*;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let rev_id = self.total - self.page;
 | 
				
			||||||
 | 
					        let parquet_path = TEST_FOLDER.join(format!("parquet/rev_{rev_id}.parquet"));
 | 
				
			||||||
 | 
					        let csv_path = TEST_FOLDER.join(format!("csv/rev_{rev_id}.csv"));
 | 
				
			||||||
 | 
					        let html_path = TEST_FOLDER.join(format!("html/rev_{rev_id}.html"));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        std::fs::write(html_path, self.html.clone())?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut publish_at_vec = Vec::new();
 | 
				
			||||||
 | 
					        let mut mikan_fansub_id_vec = Vec::new();
 | 
				
			||||||
 | 
					        let mut fansub_name_vec = Vec::new();
 | 
				
			||||||
 | 
					        let mut mikan_episode_id_vec = Vec::new();
 | 
				
			||||||
 | 
					        let mut original_name_vec = Vec::new();
 | 
				
			||||||
 | 
					        let mut magnet_link_vec = Vec::new();
 | 
				
			||||||
 | 
					        let mut file_size_vec = Vec::new();
 | 
				
			||||||
 | 
					        let mut torrent_link_vec = Vec::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for row in &self.rows {
 | 
				
			||||||
 | 
					            publish_at_vec.push(row.publish_at.to_rfc3339());
 | 
				
			||||||
 | 
					            mikan_fansub_id_vec.push(row.mikan_fansub_id.clone());
 | 
				
			||||||
 | 
					            fansub_name_vec.push(row.fansub_name.clone());
 | 
				
			||||||
 | 
					            mikan_episode_id_vec.push(row.mikan_episode_id.clone());
 | 
				
			||||||
 | 
					            original_name_vec.push(row.original_name.clone());
 | 
				
			||||||
 | 
					            magnet_link_vec.push(row.magnet_link.clone());
 | 
				
			||||||
 | 
					            file_size_vec.push(row.file_size.clone());
 | 
				
			||||||
 | 
					            torrent_link_vec.push(row.torrent_link.clone());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let df = df! [
 | 
				
			||||||
 | 
					            "publish_at_timestamp" => publish_at_vec,
 | 
				
			||||||
 | 
					            "mikan_fansub_id" => mikan_fansub_id_vec,
 | 
				
			||||||
 | 
					            "fansub_name" => fansub_name_vec,
 | 
				
			||||||
 | 
					            "mikan_episode_id" => mikan_episode_id_vec,
 | 
				
			||||||
 | 
					            "original_name" => original_name_vec,
 | 
				
			||||||
 | 
					            "magnet_link" => magnet_link_vec,
 | 
				
			||||||
 | 
					            "file_size" => file_size_vec,
 | 
				
			||||||
 | 
					            "torrent_link" => torrent_link_vec,
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					        .map_err(|e| {
 | 
				
			||||||
 | 
					            let message = format!("Failed to create DataFrame: {e}");
 | 
				
			||||||
 | 
					            RecorderError::with_source(Box::new(e), message)
 | 
				
			||||||
 | 
					        })?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut parquet_file = std::fs::File::create(&parquet_path)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ParquetWriter::new(&mut parquet_file)
 | 
				
			||||||
 | 
					            .finish(&mut df.clone())
 | 
				
			||||||
 | 
					            .map_err(|e| {
 | 
				
			||||||
 | 
					                let message = format!("Failed to write parquet file: {e}");
 | 
				
			||||||
 | 
					                RecorderError::with_source(Box::new(e), message)
 | 
				
			||||||
 | 
					            })?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut csv_file = std::fs::File::create(&csv_path)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        CsvWriter::new(&mut csv_file)
 | 
				
			||||||
 | 
					            .include_header(true)
 | 
				
			||||||
 | 
					            .with_quote_style(QuoteStyle::Always)
 | 
				
			||||||
 | 
					            .finish(&mut df.clone())
 | 
				
			||||||
 | 
					            .map_err(|e| {
 | 
				
			||||||
 | 
					                let message = format!("Failed to write csv file: {e}");
 | 
				
			||||||
 | 
					                RecorderError::with_source(Box::new(e), message)
 | 
				
			||||||
 | 
					            })?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        println!(
 | 
				
			||||||
 | 
					            "[{}/{}] Saved {} rows to rev_{}.{{parquet,html,csv}}",
 | 
				
			||||||
 | 
					            self.page,
 | 
				
			||||||
 | 
					            self.total,
 | 
				
			||||||
 | 
					            self.rows.len(),
 | 
				
			||||||
 | 
					            rev_id
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Ok(())
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn waiting_rev_ids(total: i32) -> RecorderResult<Vec<i32>> {
 | 
				
			||||||
 | 
					        let dir = TEST_FOLDER.join("csv");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let files = std::fs::read_dir(dir)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let rev_ids = files
 | 
				
			||||||
 | 
					            .filter_map(|f| f.ok())
 | 
				
			||||||
 | 
					            .filter_map(|f| {
 | 
				
			||||||
 | 
					                f.path().file_stem().and_then(|s| {
 | 
				
			||||||
 | 
					                    s.to_str().and_then(|s| {
 | 
				
			||||||
 | 
					                        if s.starts_with("rev_") {
 | 
				
			||||||
 | 
					                            s.replace("rev_", "").parse::<i32>().ok()
 | 
				
			||||||
 | 
					                        } else {
 | 
				
			||||||
 | 
					                            None
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    })
 | 
				
			||||||
 | 
					                })
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					            .collect::<HashSet<_>>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Ok((0..total)
 | 
				
			||||||
 | 
					            .filter(|rev_id| !rev_ids.contains(rev_id))
 | 
				
			||||||
 | 
					            .collect::<Vec<_>>())
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					async fn scrape_mikan_classic_episode_table_page(
 | 
				
			||||||
 | 
					    mikan_client: &MikanClient,
 | 
				
			||||||
 | 
					    page: i32,
 | 
				
			||||||
 | 
					    updated_info: Option<(i32, i32)>,
 | 
				
			||||||
 | 
					) -> RecorderResult<MikanClassicEpisodeTablePage> {
 | 
				
			||||||
 | 
					    let mikan_base_url = mikan_client.base_url();
 | 
				
			||||||
 | 
					    let url = mikan_base_url.join(&format!("/Home/Classic/{page}"))?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if let Some((rev_id, update_total)) = updated_info.as_ref() {
 | 
				
			||||||
 | 
					        let html_path = TEST_FOLDER.join(format!("html/rev_{rev_id}.html"));
 | 
				
			||||||
 | 
					        if html_path.exists() {
 | 
				
			||||||
 | 
					            let html = std::fs::read_to_string(&html_path)?;
 | 
				
			||||||
 | 
					            println!("[{page}/{update_total}] html exists, skipping fetch");
 | 
				
			||||||
 | 
					            return MikanClassicEpisodeTablePage::from_html(
 | 
				
			||||||
 | 
					                html,
 | 
				
			||||||
 | 
					                mikan_base_url,
 | 
				
			||||||
 | 
					                page,
 | 
				
			||||||
 | 
					                updated_info,
 | 
				
			||||||
 | 
					            );
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let total = if let Some((_, update_total)) = updated_info.as_ref() {
 | 
				
			||||||
 | 
					        update_total.to_string()
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        "Unknown".to_string()
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    println!("[{page}/{total}] fetching html...");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let html = fetch_html(mikan_client, url).await?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    println!("[{page}/{total}] fetched html done");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::fs::write(TEST_FOLDER.join("html/temp.html"), html.clone())?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MikanClassicEpisodeTablePage::from_html(html, mikan_base_url, page, updated_info)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					async fn scrape_mikan_classic_episode_table_page_from_rev_id(
 | 
				
			||||||
 | 
					    mikan_client: &MikanClient,
 | 
				
			||||||
 | 
					    total: i32,
 | 
				
			||||||
 | 
					    rev_idx: i32,
 | 
				
			||||||
 | 
					) -> RecorderResult<MikanClassicEpisodeTablePage> {
 | 
				
			||||||
 | 
					    let page = total - rev_idx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    scrape_mikan_classic_episode_table_page(mikan_client, page, Some((rev_idx, total))).await
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[tokio::main]
 | 
				
			||||||
 | 
					async fn main() -> RecorderResult<()> {
 | 
				
			||||||
 | 
					    std::fs::create_dir_all(TEST_FOLDER.join("html"))?;
 | 
				
			||||||
 | 
					    std::fs::create_dir_all(TEST_FOLDER.join("parquet"))?;
 | 
				
			||||||
 | 
					    std::fs::create_dir_all(TEST_FOLDER.join("csv"))?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mikan_scrape_client = MikanClient::from_config(MikanConfig {
 | 
				
			||||||
 | 
					        http_client: HttpClientConfig {
 | 
				
			||||||
 | 
					            exponential_backoff_max_retries: Some(3),
 | 
				
			||||||
 | 
					            leaky_bucket_max_tokens: Some(2),
 | 
				
			||||||
 | 
					            leaky_bucket_initial_tokens: Some(0),
 | 
				
			||||||
 | 
					            leaky_bucket_refill_tokens: Some(1),
 | 
				
			||||||
 | 
					            leaky_bucket_refill_interval: Some(std::time::Duration::from_millis(1000)),
 | 
				
			||||||
 | 
					            user_agent: Some(
 | 
				
			||||||
 | 
					                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
 | 
				
			||||||
 | 
					                 Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
 | 
				
			||||||
 | 
					                    .to_string(),
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					            ..Default::default()
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        base_url: Url::parse("https://mikanani.me")?,
 | 
				
			||||||
 | 
					    })
 | 
				
			||||||
 | 
					    .await?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let first_page_and_pagination_info =
 | 
				
			||||||
 | 
					        scrape_mikan_classic_episode_table_page(&mikan_scrape_client, 1, None).await?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let total_page = first_page_and_pagination_info.total;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    first_page_and_pagination_info.save_to_files()?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let next_rev_ids = MikanClassicEpisodeTablePage::waiting_rev_ids(total_page)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for todo_rev_id in next_rev_ids {
 | 
				
			||||||
 | 
					        let page = scrape_mikan_classic_episode_table_page_from_rev_id(
 | 
				
			||||||
 | 
					            &mikan_scrape_client,
 | 
				
			||||||
 | 
					            total_page,
 | 
				
			||||||
 | 
					            todo_rev_id,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        .await?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        page.save_to_files()?;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Ok(())
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@ -12,6 +12,7 @@ pub const MIKAN_BANGUMI_POSTER_PATH: &str = "/images/Bangumi";
 | 
				
			|||||||
pub const MIKAN_EPISODE_TORRENT_PATH: &str = "/Download";
 | 
					pub const MIKAN_EPISODE_TORRENT_PATH: &str = "/Download";
 | 
				
			||||||
pub const MIKAN_SUBSCRIBER_SUBSCRIPTION_RSS_PATH: &str = "/RSS/MyBangumi";
 | 
					pub const MIKAN_SUBSCRIBER_SUBSCRIPTION_RSS_PATH: &str = "/RSS/MyBangumi";
 | 
				
			||||||
pub const MIKAN_BANGUMI_RSS_PATH: &str = "/RSS/Bangumi";
 | 
					pub const MIKAN_BANGUMI_RSS_PATH: &str = "/RSS/Bangumi";
 | 
				
			||||||
 | 
					pub const MIKAN_FANSUB_HOMEPAGE_PATH: &str = "/Home/PublishGroup";
 | 
				
			||||||
pub const MIKAN_BANGUMI_ID_QUERY_KEY: &str = "bangumiId";
 | 
					pub const MIKAN_BANGUMI_ID_QUERY_KEY: &str = "bangumiId";
 | 
				
			||||||
pub const MIKAN_FANSUB_ID_QUERY_KEY: &str = "subgroupid";
 | 
					pub const MIKAN_FANSUB_ID_QUERY_KEY: &str = "subgroupid";
 | 
				
			||||||
pub const MIKAN_SUBSCRIBER_SUBSCRIPTION_TOKEN_QUERY_KEY: &str = "token";
 | 
					pub const MIKAN_SUBSCRIBER_SUBSCRIPTION_TOKEN_QUERY_KEY: &str = "token";
 | 
				
			||||||
 | 
				
			|||||||
@ -11,10 +11,11 @@ pub use constants::{
 | 
				
			|||||||
    MIKAN_ACCOUNT_MANAGE_PAGE_PATH, MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH,
 | 
					    MIKAN_ACCOUNT_MANAGE_PAGE_PATH, MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH,
 | 
				
			||||||
    MIKAN_BANGUMI_HOMEPAGE_PATH, MIKAN_BANGUMI_ID_QUERY_KEY, MIKAN_BANGUMI_POSTER_PATH,
 | 
					    MIKAN_BANGUMI_HOMEPAGE_PATH, MIKAN_BANGUMI_ID_QUERY_KEY, MIKAN_BANGUMI_POSTER_PATH,
 | 
				
			||||||
    MIKAN_BANGUMI_RSS_PATH, MIKAN_EPISODE_HOMEPAGE_PATH, MIKAN_EPISODE_TORRENT_PATH,
 | 
					    MIKAN_BANGUMI_RSS_PATH, MIKAN_EPISODE_HOMEPAGE_PATH, MIKAN_EPISODE_TORRENT_PATH,
 | 
				
			||||||
    MIKAN_FANSUB_ID_QUERY_KEY, MIKAN_LOGIN_PAGE_PATH, MIKAN_LOGIN_PAGE_SEARCH,
 | 
					    MIKAN_FANSUB_HOMEPAGE_PATH, MIKAN_FANSUB_ID_QUERY_KEY, MIKAN_LOGIN_PAGE_PATH,
 | 
				
			||||||
    MIKAN_POSTER_BUCKET_KEY, MIKAN_SEASON_FLOW_PAGE_PATH, MIKAN_SEASON_STR_QUERY_KEY,
 | 
					    MIKAN_LOGIN_PAGE_SEARCH, MIKAN_POSTER_BUCKET_KEY, MIKAN_SEASON_FLOW_PAGE_PATH,
 | 
				
			||||||
    MIKAN_SUBSCRIBER_SUBSCRIPTION_RSS_PATH, MIKAN_SUBSCRIBER_SUBSCRIPTION_TOKEN_QUERY_KEY,
 | 
					    MIKAN_SEASON_STR_QUERY_KEY, MIKAN_SUBSCRIBER_SUBSCRIPTION_RSS_PATH,
 | 
				
			||||||
    MIKAN_UNKNOWN_FANSUB_ID, MIKAN_UNKNOWN_FANSUB_NAME, MIKAN_YEAR_QUERY_KEY,
 | 
					    MIKAN_SUBSCRIBER_SUBSCRIPTION_TOKEN_QUERY_KEY, MIKAN_UNKNOWN_FANSUB_ID,
 | 
				
			||||||
 | 
					    MIKAN_UNKNOWN_FANSUB_NAME, MIKAN_YEAR_QUERY_KEY,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
pub use credential::MikanCredentialForm;
 | 
					pub use credential::MikanCredentialForm;
 | 
				
			||||||
pub use subscription::{
 | 
					pub use subscription::{
 | 
				
			||||||
@ -22,11 +23,12 @@ pub use subscription::{
 | 
				
			|||||||
};
 | 
					};
 | 
				
			||||||
pub use web::{
 | 
					pub use web::{
 | 
				
			||||||
    MikanBangumiHash, MikanBangumiIndexHash, MikanBangumiIndexMeta, MikanBangumiMeta,
 | 
					    MikanBangumiHash, MikanBangumiIndexHash, MikanBangumiIndexMeta, MikanBangumiMeta,
 | 
				
			||||||
    MikanBangumiPosterMeta, MikanEpisodeHash, MikanEpisodeMeta, MikanRssEpisodeItem,
 | 
					    MikanBangumiPosterMeta, MikanEpisodeHash, MikanEpisodeMeta, MikanFansubHash,
 | 
				
			||||||
    MikanSeasonFlowUrlMeta, MikanSeasonStr, MikanSubscriberSubscriptionRssUrlMeta,
 | 
					    MikanRssEpisodeItem, MikanSeasonFlowUrlMeta, MikanSeasonStr,
 | 
				
			||||||
    build_mikan_bangumi_expand_subscribed_url, build_mikan_bangumi_homepage_url,
 | 
					    MikanSubscriberSubscriptionRssUrlMeta, build_mikan_bangumi_expand_subscribed_url,
 | 
				
			||||||
    build_mikan_bangumi_subscription_rss_url, build_mikan_episode_homepage_url,
 | 
					    build_mikan_bangumi_homepage_url, build_mikan_bangumi_subscription_rss_url,
 | 
				
			||||||
    build_mikan_season_flow_url, build_mikan_subscriber_subscription_rss_url,
 | 
					    build_mikan_episode_homepage_url, build_mikan_season_flow_url,
 | 
				
			||||||
 | 
					    build_mikan_subscriber_subscription_rss_url,
 | 
				
			||||||
    extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
 | 
					    extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
 | 
				
			||||||
    extract_mikan_bangumi_meta_from_expand_subscribed_fragment,
 | 
					    extract_mikan_bangumi_meta_from_expand_subscribed_fragment,
 | 
				
			||||||
    extract_mikan_episode_meta_from_episode_homepage_html,
 | 
					    extract_mikan_episode_meta_from_episode_homepage_html,
 | 
				
			||||||
 | 
				
			|||||||
@ -22,8 +22,8 @@ use crate::{
 | 
				
			|||||||
        mikan::{
 | 
					        mikan::{
 | 
				
			||||||
            MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH, MIKAN_BANGUMI_HOMEPAGE_PATH,
 | 
					            MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH, MIKAN_BANGUMI_HOMEPAGE_PATH,
 | 
				
			||||||
            MIKAN_BANGUMI_ID_QUERY_KEY, MIKAN_BANGUMI_POSTER_PATH, MIKAN_BANGUMI_RSS_PATH,
 | 
					            MIKAN_BANGUMI_ID_QUERY_KEY, MIKAN_BANGUMI_POSTER_PATH, MIKAN_BANGUMI_RSS_PATH,
 | 
				
			||||||
            MIKAN_EPISODE_HOMEPAGE_PATH, MIKAN_FANSUB_ID_QUERY_KEY, MIKAN_POSTER_BUCKET_KEY,
 | 
					            MIKAN_EPISODE_HOMEPAGE_PATH, MIKAN_FANSUB_HOMEPAGE_PATH, MIKAN_FANSUB_ID_QUERY_KEY,
 | 
				
			||||||
            MIKAN_SEASON_FLOW_PAGE_PATH, MIKAN_SEASON_STR_QUERY_KEY,
 | 
					            MIKAN_POSTER_BUCKET_KEY, MIKAN_SEASON_FLOW_PAGE_PATH, MIKAN_SEASON_STR_QUERY_KEY,
 | 
				
			||||||
            MIKAN_SUBSCRIBER_SUBSCRIPTION_RSS_PATH, MIKAN_SUBSCRIBER_SUBSCRIPTION_TOKEN_QUERY_KEY,
 | 
					            MIKAN_SUBSCRIBER_SUBSCRIPTION_RSS_PATH, MIKAN_SUBSCRIBER_SUBSCRIPTION_TOKEN_QUERY_KEY,
 | 
				
			||||||
            MIKAN_YEAR_QUERY_KEY, MikanClient,
 | 
					            MIKAN_YEAR_QUERY_KEY, MikanClient,
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
@ -205,6 +205,32 @@ impl MikanBangumiMeta {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone, Debug, PartialEq, Eq, Hash)]
 | 
				
			||||||
 | 
					pub struct MikanFansubHash {
 | 
				
			||||||
 | 
					    pub mikan_fansub_id: String,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl MikanFansubHash {
 | 
				
			||||||
 | 
					    pub fn from_homepage_url(url: &Url) -> Option<Self> {
 | 
				
			||||||
 | 
					        let path = url.path();
 | 
				
			||||||
 | 
					        if path.starts_with(MIKAN_FANSUB_HOMEPAGE_PATH) {
 | 
				
			||||||
 | 
					            let mikan_fansub_id = path.replace(&format!("{MIKAN_FANSUB_HOMEPAGE_PATH}/"), "");
 | 
				
			||||||
 | 
					            Some(Self { mikan_fansub_id })
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            None
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn build_homepage_url(self, mikan_base_url: Url) -> Url {
 | 
				
			||||||
 | 
					        let mut url = mikan_base_url;
 | 
				
			||||||
 | 
					        url.set_path(&format!(
 | 
				
			||||||
 | 
					            "{MIKAN_FANSUB_HOMEPAGE_PATH}/{}",
 | 
				
			||||||
 | 
					            self.mikan_fansub_id
 | 
				
			||||||
 | 
					        ));
 | 
				
			||||||
 | 
					        url
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Clone, Debug, PartialEq)]
 | 
					#[derive(Clone, Debug, PartialEq)]
 | 
				
			||||||
pub struct MikanEpisodeMeta {
 | 
					pub struct MikanEpisodeMeta {
 | 
				
			||||||
    pub homepage: Url,
 | 
					    pub homepage: Url,
 | 
				
			||||||
 | 
				
			|||||||
@ -152,7 +152,10 @@ impl ActiveModel {
 | 
				
			|||||||
            season_raw: ActiveValue::Set(season_raw),
 | 
					            season_raw: ActiveValue::Set(season_raw),
 | 
				
			||||||
            fansub: ActiveValue::Set(Some(meta.fansub)),
 | 
					            fansub: ActiveValue::Set(Some(meta.fansub)),
 | 
				
			||||||
            poster_link: ActiveValue::Set(poster_link),
 | 
					            poster_link: ActiveValue::Set(poster_link),
 | 
				
			||||||
            origin_poster_link: ActiveValue::Set(meta.origin_poster_src.map(|src| src.to_string())),
 | 
					            origin_poster_link: ActiveValue::Set(
 | 
				
			||||||
 | 
					                meta.origin_poster_src
 | 
				
			||||||
 | 
					                    .map(|src| src[url::Position::BeforePath..].to_string()),
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
            homepage: ActiveValue::Set(Some(meta.homepage.to_string())),
 | 
					            homepage: ActiveValue::Set(Some(meta.homepage.to_string())),
 | 
				
			||||||
            rss_link: ActiveValue::Set(Some(rss_url.to_string())),
 | 
					            rss_link: ActiveValue::Set(Some(rss_url.to_string())),
 | 
				
			||||||
            ..Default::default()
 | 
					            ..Default::default()
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user