feat: add torrent ep parser and fix raw parser cases

This commit is contained in:
master 2024-03-13 23:57:00 +08:00
parent 8c460dfdc0
commit 5d78ed3624
7 changed files with 451 additions and 149 deletions

View File

@ -416,7 +416,6 @@ impl Debug for QBittorrentDownloader {
#[cfg(test)] #[cfg(test)]
pub mod tests { pub mod tests {
use itertools::Itertools; use itertools::Itertools;
use testcontainers::core::ExecCommand;
use super::*; use super::*;

View File

@ -1,25 +1,8 @@
//! `SeaORM` Entity. Generated by sea-orm-codegen 0.12.2 //! `SeaORM` Entity. Generated by sea-orm-codegen 0.12.2
use sea_orm::{entity::prelude::*, FromJsonQueryResult}; use sea_orm::entity::prelude::*;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, FromJsonQueryResult)]
pub struct EpisodeFileMeta {
pub media_path: String,
pub group: Option<String>,
pub title: String,
pub season: i32,
pub episode_index: Option<i32>,
pub extension: String,
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, FromJsonQueryResult)]
pub struct SubtitleFileMeta {
pub episode_file_meta: EpisodeFileMeta,
pub extension: String,
pub lang: Option<String>,
}
#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)]
#[sea_orm(table_name = "episodes")] #[sea_orm(table_name = "episodes")]
pub struct Model { pub struct Model {

View File

@ -1,8 +1,8 @@
use std::collections::{BTreeMap, HashMap}; use std::collections::HashMap;
use fancy_regex::Regex as FancyRegex; use fancy_regex::Regex as FancyRegex;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use maplit::{btreemap, hashmap}; use maplit::hashmap;
use regex::Regex; use regex::Regex;
const LANG_ZH_TW: &str = "zh-tw"; const LANG_ZH_TW: &str = "zh-tw";
@ -26,13 +26,13 @@ lazy_static! {
FancyRegex::new(r"(.*)第?(\d*\.*\d*)[话話集](?:END)?(.*)").unwrap(), FancyRegex::new(r"(.*)第?(\d*\.*\d*)[话話集](?:END)?(.*)").unwrap(),
FancyRegex::new(r"(.*)(?:S\d{2})?EP?(\d+)(.*)").unwrap(), FancyRegex::new(r"(.*)(?:S\d{2})?EP?(\d+)(.*)").unwrap(),
]; ];
pub static ref SUBTITLE_LANG: BTreeMap<&'static str, Vec<&'static str>> = { pub static ref SUBTITLE_LANG: Vec<(&'static str, Vec<&'static str>)> = {
btreemap! { vec![
LANG_ZH_TW => vec!["tc", "cht", "", "zh-tw"], (LANG_ZH_TW, vec!["tc", "cht", "", "zh-tw"]),
LANG_ZH => vec!["sc", "chs", "", "zh", "zh-cn"], (LANG_ZH, vec!["sc", "chs", "", "zh", "zh-cn"]),
LANG_EN => vec!["en", "eng", ""], (LANG_EN, vec!["en", "eng", ""]),
LANG_JP => vec!["jp", "jpn", ""], (LANG_JP, vec!["jp", "jpn", ""]),
} ]
}; };
pub static ref BRACKETS_REG: Regex = Regex::new(r"[\[\]()【】()]").unwrap(); pub static ref BRACKETS_REG: Regex = Regex::new(r"[\[\]()【】()]").unwrap();
pub static ref DIGIT_1PLUS_REG: Regex = Regex::new(r"\d+").unwrap(); pub static ref DIGIT_1PLUS_REG: Regex = Regex::new(r"\d+").unwrap();

View File

@ -5,4 +5,3 @@ pub mod mikan;
pub mod raw; pub mod raw;
pub mod title_parser; pub mod title_parser;
pub mod torrent; pub mod torrent;
pub mod torrent_parser;

View File

@ -184,7 +184,7 @@ fn extract_name_from_title_body_name_section(
let mut split_space = split[0].split(' ').collect_vec(); let mut split_space = split[0].split(' ').collect_vec();
let mut search_indices = vec![0]; let mut search_indices = vec![0];
if split_space.len() > 1 { if split_space.len() > 1 {
search_indices.push(search_indices.len() - 1); search_indices.push(split_space.len() - 1);
} }
for i in search_indices { for i in search_indices {
if NAME_ZH_TEST.is_match(split_space[i]) { if NAME_ZH_TEST.is_match(split_space[i]) {
@ -288,7 +288,7 @@ pub fn parse_episode_meta_from_raw_name(s: &str) -> eyre::Result<RawEpisodeMeta>
let (name_en, name_zh, name_jp) = extract_name_from_title_body_name_section(&title_body); let (name_en, name_zh, name_jp) = extract_name_from_title_body_name_section(&title_body);
let (name_en_no_season, name_zh_no_season, name_jp_no_season) = let (name_en_no_season, name_zh_no_season, name_jp_no_season) =
extract_name_from_title_body_name_section(&name_without_season); extract_name_from_title_body_name_section(&name_without_season);
let episode_index = extract_episode_index_from_title_episode(title_episode).unwrap_or(0); let episode_index = extract_episode_index_from_title_episode(title_episode).unwrap_or(1);
let (sub, resolution, source) = extract_tags_from_title_extra(title_extra); let (sub, resolution, source) = extract_tags_from_title_extra(title_extra);
Ok(RawEpisodeMeta { Ok(RawEpisodeMeta {
name_en, name_en,
@ -495,7 +495,7 @@ mod tests {
"name_zh": "爱丽丝与特蕾丝的虚幻工厂", "name_zh": "爱丽丝与特蕾丝的虚幻工厂",
"name_zh_no_season": "爱丽丝与特蕾丝的虚幻工厂", "name_zh_no_season": "爱丽丝与特蕾丝的虚幻工厂",
"season": 1, "season": 1,
"episode_index": 0, "episode_index": 1,
"sub": "简繁内封", "sub": "简繁内封",
"source": "WebRip", "source": "WebRip",
"fansub": "千夏字幕组", "fansub": "千夏字幕组",
@ -511,13 +511,10 @@ mod tests {
r#"{ r#"{
"name_en": "Yuru Camp Movie", "name_en": "Yuru Camp Movie",
"name_en_no_season": "Yuru Camp Movie", "name_en_no_season": "Yuru Camp Movie",
"name_jp": null,
"name_jp_no_season": null,
"name_zh": "电影 轻旅轻营 (摇曳露营)", "name_zh": "电影 轻旅轻营 (摇曳露营)",
"name_zh_no_season": "电影 轻旅轻营 (摇曳露营)", "name_zh_no_season": "电影 轻旅轻营 (摇曳露营)",
"season": 1, "season": 1,
"season_raw": null, "episode_index": 1,
"episode_index": 0,
"sub": "繁体", "sub": "繁体",
"source": "UHDRip", "source": "UHDRip",
"fansub": "千夏字幕组&喵萌奶茶屋", "fansub": "千夏字幕组&喵萌奶茶屋",
@ -526,6 +523,24 @@ mod tests {
) )
} }
#[test]
fn test_parse_ep_with_large_episode_style() {
test_raw_ep_parser_case(
r#"[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]"#,
r#"{
"name_en": "New Doraemon",
"name_en_no_season": "New Doraemon",
"name_zh": "哆啦A梦新番",
"name_zh_no_season": "哆啦A梦新番",
"season": 1,
"episode_index": 747,
"sub": "GB",
"fansub": "梦蓝字幕组",
"resolution": "1080P"
}"#,
)
}
#[test] #[test]
fn test_parse_ep_with_many_square_brackets_split_title() { fn test_parse_ep_with_many_square_brackets_split_title() {
test_raw_ep_parser_case( test_raw_ep_parser_case(
@ -536,7 +551,7 @@ mod tests {
"name_zh": "剧场版-摇曳露营", "name_zh": "剧场版-摇曳露营",
"name_zh_no_season": "剧场版-摇曳露营", "name_zh_no_season": "剧场版-摇曳露营",
"season": 1, "season": 1,
"episode_index": 0, "episode_index": 1,
"sub": "简日双语", "sub": "简日双语",
"fansub": "MCE汉化组", "fansub": "MCE汉化组",
"resolution": "1080P" "resolution": "1080P"
@ -544,6 +559,24 @@ mod tests {
) )
} }
#[test]
fn test_parse_ep_with_implicit_lang_title_sep() {
test_raw_ep_parser_case(
r#"[织梦字幕组][尼尔:机械纪元 NieR Automata Ver1.1a][02集][1080P][AVC][简日双语]"#,
r#"{
"name_en": "NieR Automata Ver1.1a",
"name_en_no_season": "NieR Automata Ver1.1a",
"name_zh": "尼尔:机械纪元",
"name_zh_no_season": "尼尔:机械纪元",
"season": 1,
"episode_index": 2,
"sub": "简日双语",
"fansub": "织梦字幕组",
"resolution": "1080P"
}"#,
)
}
#[test] #[test]
fn test_parse_ep_with_square_brackets_wrapped_and_space_split() { fn test_parse_ep_with_square_brackets_wrapped_and_space_split() {
test_raw_ep_parser_case( test_raw_ep_parser_case(
@ -602,9 +635,101 @@ mod tests {
) )
} }
#[test]
fn test_parse_ep_title_leading_space_style() {
test_raw_ep_parser_case(
r#"[ANi] 16bit 的感动 ANOTHER LAYER - 01 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"#,
r#"{
"name_zh": "16bit 的感动 ANOTHER LAYER",
"name_zh_no_season": "16bit 的感动 ANOTHER LAYER",
"season": 1,
"season_raw": null,
"episode_index": 1,
"sub": "CHT",
"source": "Baha",
"fansub": "ANi",
"resolution": "1080P"
}"#,
)
}
#[test]
fn test_parse_ep_title_leading_month_and_wrapped_brackets_style() {
test_raw_ep_parser_case(
r#"【喵萌奶茶屋】★07月新番★[银砂糖师与黑妖精 ~ Sugar Apple Fairy Tale ~][13][1080p][简日双语][招募翻译]"#,
r#"{
"name_en": "~ Sugar Apple Fairy Tale ~",
"name_en_no_season": "~ Sugar Apple Fairy Tale ~",
"name_zh": "银砂糖师与黑妖精",
"name_zh_no_season": "银砂糖师与黑妖精",
"season": 1,
"episode_index": 13,
"sub": "简日双语",
"fansub": "喵萌奶茶屋",
"resolution": "1080p"
}"#,
)
}
#[test]
fn test_parse_ep_title_leading_month_style() {
test_raw_ep_parser_case(
r#"【极影字幕社】★4月新番 天国大魔境 Tengoku Daimakyou 第05话 GB 720P MP4字幕社招人内详"#,
r#"{
"name_en": "Tengoku Daimakyou",
"name_en_no_season": "Tengoku Daimakyou",
"name_zh": "天国大魔境",
"name_zh_no_season": "天国大魔境",
"season": 1,
"episode_index": 5,
"sub": "字幕社招人内详",
"source": null,
"fansub": "极影字幕社",
"resolution": "720P"
}"#,
)
}
#[test]
fn test_parse_ep_tokusatsu_style() {
test_raw_ep_parser_case(
r#"[MagicStar] 假面骑士Geats / 仮面ライダーギーツ EP33 [WEBDL] [1080p] [TTFC]【生】"#,
r#"{
"name_jp": "仮面ライダーギーツ",
"name_jp_no_season": "仮面ライダーギーツ",
"name_zh": "假面骑士Geats",
"name_zh_no_season": "假面骑士Geats",
"season": 1,
"episode_index": 33,
"source": "WEBDL",
"fansub": "MagicStar",
"resolution": "1080p"
}"#,
)
}
#[test]
fn test_parse_ep_with_multi_lang_zh_title() {
test_raw_ep_parser_case(
r#"[百冬练习组&LoliHouse] BanG Dream! 少女乐团派对☆PICO FEVER / Garupa Pico: Fever! - 26 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][END] [101.69 MB]"#,
r#"{
"name_en": "Garupa Pico: Fever!",
"name_en_no_season": "Garupa Pico: Fever!",
"name_zh": "BanG Dream! 少女乐团派对☆PICO FEVER",
"name_zh_no_season": "BanG Dream! 少女乐团派对☆PICO FEVER",
"season": 1,
"episode_index": 26,
"sub": "简繁内封字幕",
"source": "WebRip",
"fansub": "百冬练习组&LoliHouse",
"resolution": "1080p"
}"#,
)
}
// TODO: FIXME // TODO: FIXME
#[test] #[test]
fn test_bad_case() { fn test_bad_cases() {
test_raw_ep_parser_case( test_raw_ep_parser_case(
r#"[7³ACG x 桜都字幕组] 摇曳露营△ 剧场版/映画 ゆるキャン△/Eiga Yuru Camp△ [简繁字幕] BDrip 1080p x265 FLAC 2.0"#, r#"[7³ACG x 桜都字幕组] 摇曳露营△ 剧场版/映画 ゆるキャン△/Eiga Yuru Camp△ [简繁字幕] BDrip 1080p x265 FLAC 2.0"#,
r#"{ r#"{
@ -612,12 +737,28 @@ mod tests {
"name_zh_no_season": "摇曳露营△剧场版", "name_zh_no_season": "摇曳露营△剧场版",
"season": 1, "season": 1,
"season_raw": null, "season_raw": null,
"episode_index": 0, "episode_index": 1,
"sub": "简繁字幕", "sub": "简繁字幕",
"source": "BDrip", "source": "BDrip",
"fansub": "7³ACG x 桜都字幕组", "fansub": "7³ACG x 桜都字幕组",
"resolution": "1080p" "resolution": "1080p"
}"#, }"#,
) );
test_raw_ep_parser_case(
r#"【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】"#,
r#"{
"name_en": "第二季 Komi-san wa, Komyushou Desu. S02",
"name_en_no_season": "Komi-san wa, Komyushou Desu.",
"name_zh": "古见同学有交流障碍症",
"name_zh_no_season": "古见同学有交流障碍症",
"season": 2,
"season_raw": "第二季",
"episode_index": 22,
"sub": "GB",
"fansub": "幻樱字幕组",
"resolution": "1920X1080"
}"#,
);
} }
} }

View File

@ -1,45 +1,316 @@
use eyre::OptionExt;
use fancy_regex::Regex as FancyRegex;
use lazy_static::lazy_static;
use quirks_path::Path;
use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] use crate::parsers::defs::SUBTITLE_LANG;
pub struct TorrentEpisodeMediaMeta {}
lazy_static! {
static ref TORRENT_EP_PARSE_RULES: Vec<FancyRegex> = {
vec![
FancyRegex::new(
r"(.*) - (\d{1,4}(?!\d|p)|\d{1,4}\.\d{1,2}(?!\d|p))(?:v\d{1,2})?(?: )?(?:END)?(.*)",
)
.unwrap(),
FancyRegex::new(
r"(.*)[\[\ E](\d{1,4}|\d{1,4}\.\d{1,2})(?:v\d{1,2})?(?: )?(?:END)?[\]\ ](.*)",
)
.unwrap(),
FancyRegex::new(r"(.*)\[(?:第)?(\d*\.*\d*)[话集話](?:END)?\](.*)").unwrap(),
FancyRegex::new(r"(.*)第?(\d*\.*\d*)[话話集](?:END)?(.*)").unwrap(),
FancyRegex::new(r"(.*)(?:S\d{2})?EP?(\d+)(.*)").unwrap(),
]
};
static ref GET_FANSUB_SPLIT_RE: Regex = Regex::new(r"[\[\]()【】()]").unwrap();
static ref GET_FANSUB_FULL_MATCH_RE: Regex = Regex::new(r"^\d+$").unwrap();
static ref GET_SEASON_AND_TITLE_SUB_RE: Regex = Regex::new(r"([Ss]|Season )\d{1,3}").unwrap();
static ref GET_SEASON_AND_TITLE_FIND_RE: Regex =
Regex::new(r"([Ss]|Season )(\d{1,3})").unwrap();
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct TorrentEpisodeSubtitleMeta {} pub struct TorrentEpisodeMediaMeta {
pub fansub: Option<String>,
pub title: String,
pub season: i32,
pub episode_index: i32,
pub extname: String,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct TorrentEpisodeSubtitleMeta {
pub media: TorrentEpisodeMediaMeta,
pub lang: Option<String>,
}
fn get_fansub(group_and_title: &str) -> (Option<&str>, &str) {
let n = GET_FANSUB_SPLIT_RE
.split(group_and_title)
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect::<Vec<_>>();
match (n.get(0), n.get(1)) {
(None, None) => (None, ""),
(Some(n0), None) => (None, *n0),
(Some(n0), Some(n1)) => {
if GET_FANSUB_FULL_MATCH_RE.is_match(*n1) {
(None, group_and_title)
} else {
(Some(*n0), *n1)
}
}
_ => unreachable!("vec contains n1 must contains n0"),
}
}
fn get_season_and_title(season_and_title: &str) -> (String, i32) {
let replaced_title = GET_SEASON_AND_TITLE_SUB_RE.replace_all(season_and_title, "");
let title = replaced_title.trim().to_string();
let season = GET_SEASON_AND_TITLE_FIND_RE
.captures(season_and_title)
.map(|m| {
m.get(2)
.unwrap_or_else(|| unreachable!("season regex should have 2 groups"))
.as_str()
.parse::<i32>()
.unwrap_or_else(|_| unreachable!("season should be a number"))
})
.unwrap_or(1);
(title, season)
}
fn get_subtitle_lang(media_name: &str) -> Option<&str> {
let media_name_lower = media_name.to_lowercase();
for (lang, lang_aliases) in SUBTITLE_LANG.iter() {
if lang_aliases
.iter()
.any(|alias| media_name_lower.contains(alias))
{
return Some(lang);
}
}
return None;
}
pub fn parse_episode_media_meta_from_torrent( pub fn parse_episode_media_meta_from_torrent(
torrent_path: &str, torrent_path: &Path,
torrent_name: Option<&str>, torrent_name: Option<&str>,
season: Option<i32>, season: Option<i32>,
) -> eyre::Result<TorrentEpisodeMediaMeta> { ) -> eyre::Result<TorrentEpisodeMediaMeta> {
todo!() let media_name = torrent_path
.file_name()
.ok_or_else(|| eyre::eyre!("failed to get file name of {}", torrent_path))?;
let mut match_obj = None;
for rule in TORRENT_EP_PARSE_RULES.iter() {
match_obj = if let Some(torrent_name) = torrent_name.as_ref() {
rule.captures(torrent_name)?
} else {
rule.captures(media_name)?
};
if match_obj.is_some() {
break;
}
}
if let Some(match_obj) = match_obj {
let group_season_and_title = match_obj
.get(1)
.ok_or_else(|| eyre::eyre!("should have 1 group"))?
.as_str();
let (fansub, season_and_title) = get_fansub(group_season_and_title);
let (title, season) = if let Some(season) = season {
let (title, _) = get_season_and_title(season_and_title);
(title, season)
} else {
get_season_and_title(season_and_title)
};
let episode_index = match_obj
.get(2)
.ok_or_eyre("should have 2 group")?
.as_str()
.parse::<i32>()
.unwrap_or(1);
let extname = torrent_path
.extension()
.map(|e| format!(".{}", e))
.unwrap_or_default();
Ok(TorrentEpisodeMediaMeta {
fansub: fansub.map(|s| s.to_string()),
title,
season,
episode_index,
extname,
})
} else {
Err(eyre::eyre!(
"failed to parse episode media meta from torrent_path='{}' torrent_name='{:?}'",
torrent_path,
torrent_name
))
}
} }
pub fn parse_episode_subtitle_meta_from_torrent( pub fn parse_episode_subtitle_meta_from_torrent(
torrent_path: &str, torrent_path: &Path,
torrent_name: Option<&str>, torrent_name: Option<&str>,
season: Option<i32>, season: Option<i32>,
) -> eyre::Result<TorrentEpisodeMediaMeta> { ) -> eyre::Result<TorrentEpisodeSubtitleMeta> {
todo!() let media_meta = parse_episode_media_meta_from_torrent(torrent_path, torrent_name, season)?;
let media_name = torrent_path
.file_name()
.ok_or_else(|| eyre::eyre!("failed to get file name of {}", torrent_path))?;
let lang = get_subtitle_lang(media_name);
Ok(TorrentEpisodeSubtitleMeta {
media: media_meta,
lang: lang.map(|s| s.to_string()),
})
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use quirks_path::Path;
use super::{ use super::{
parse_episode_media_meta_from_torrent, parse_episode_subtitle_meta_from_torrent, parse_episode_media_meta_from_torrent, parse_episode_subtitle_meta_from_torrent,
TorrentEpisodeMediaMeta, TorrentEpisodeSubtitleMeta, TorrentEpisodeMediaMeta, TorrentEpisodeSubtitleMeta,
}; };
#[test]
fn test_lilith_raws_media() {
test_torrent_ep_parser(
r#"[Lilith-Raws] Boku no Kokoro no Yabai Yatsu - 01 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4].mp4"#,
r#"{"fansub": "Lilith-Raws", "title": "Boku no Kokoro no Yabai Yatsu", "season": 1, "episode_index": 1, "extname": ".mp4"}"#,
);
}
#[test]
fn test_sakurato_media() {
test_torrent_ep_parser(
r#"[Sakurato] Tonikaku Kawaii S2 [03][AVC-8bit 1080p AAC][CHS].mp4"#,
r#"{"fansub": "Sakurato", "title": "Tonikaku Kawaii", "season": 2, "episode_index": 3, "extname": ".mp4"}"#,
)
}
#[test]
fn test_lolihouse_media() {
test_torrent_ep_parser(
r#"[SweetSub&LoliHouse] Heavenly Delusion - 08 [WebRip 1080p HEVC-10bit AAC ASSx2].mkv"#,
r#"{"fansub": "SweetSub&LoliHouse", "title": "Heavenly Delusion", "season": 1, "episode_index": 8, "extname": ".mkv"}"#,
)
}
#[test]
fn test_sbsub_media() {
test_torrent_ep_parser(
r#"[SBSUB][CONAN][1082][V2][1080P][AVC_AAC][CHS_JP](C1E4E331).mp4"#,
r#"{"fansub": "SBSUB", "title": "CONAN", "season": 1, "episode_index": 1082, "extname": ".mp4"}"#,
)
}
#[test]
fn test_non_fansub_media() {
test_torrent_ep_parser(
r#"海盗战记 (2019) S04E11.mp4"#,
r#"{"title": "海盗战记 (2019)", "season": 4, "episode_index": 11, "extname": ".mp4"}"#,
)
}
#[test]
fn test_non_fansub_media_with_dirname() {
test_torrent_ep_parser(
r#"海盗战记/海盗战记 S01E01.mp4"#,
r#"{"title": "海盗战记", "season": 1, "episode_index": 1, "extname": ".mp4"}"#,
);
}
#[test]
fn test_non_fansub_tc_subtitle() {
test_torrent_ep_parser(
r#"海盗战记 S01E08.zh-tw.ass"#,
r#"{"media": { "title": "海盗战记", "season": 1, "episode_index": 8, "extname": ".ass" }, "lang": "zh-tw"}"#,
);
}
#[test]
fn test_non_fansub_sc_subtitle() {
test_torrent_ep_parser(
r#"海盗战记 S01E01.SC.srt"#,
r#"{ "media": { "title": "海盗战记", "season": 1, "episode_index": 1, "extname": ".srt" }, "lang": "zh" }"#,
)
}
#[test]
fn test_non_fansub_media_with_season_zero() {
test_torrent_ep_parser(
r#"水星的魔女(2022) S00E19.mp4"#,
r#"{"fansub": null,"title": "水星的魔女(2022)","season": 0,"episode_index": 19,"extname": ".mp4"}"#,
)
}
#[test]
fn test_shimian_fansub_media() {
test_torrent_ep_parser(
r#"【失眠搬运组】放学后失眠的你-Kimi wa Houkago Insomnia - 06 [bilibili - 1080p AVC1 CHS-JP].mp4"#,
r#"{"fansub": "失眠搬运组","title": "放学后失眠的你-Kimi wa Houkago Insomnia","season": 1,"episode_index": 6,"extname": ".mp4"}"#,
)
}
pub fn test_torrent_ep_parser(raw_name: &str, expected: &str) { pub fn test_torrent_ep_parser(raw_name: &str, expected: &str) {
let expected: Option<TorrentEpisodeMediaMeta> = serde_json::from_str(expected).unwrap(); let extname = Path::new(raw_name)
let found = parse_episode_media_meta_from_torrent(raw_name, None, None).ok(); .extension()
.map(|e| format!(".{}", e))
.unwrap_or_default()
.to_lowercase();
if extname == ".srt" || extname == ".ass" {
let expected: Option<TorrentEpisodeSubtitleMeta> = serde_json::from_str(expected).ok();
let found_raw =
parse_episode_subtitle_meta_from_torrent(Path::new(raw_name), None, None);
let found = found_raw.as_ref().ok().map(|s| s.clone());
if expected != found { if expected != found {
if found_raw.is_ok() {
println!( println!(
"expected {} and found {} are not equal", "expected {} and found {} are not equal",
serde_json::to_string_pretty(&expected).unwrap(), serde_json::to_string_pretty(&expected).unwrap(),
serde_json::to_string_pretty(&found).unwrap() serde_json::to_string_pretty(&found).unwrap()
) )
} else {
println!(
"expected {} and found {:#?} are not equal",
serde_json::to_string_pretty(&expected).unwrap(),
found_raw
)
}
}
assert_eq!(expected, found);
} else {
let expected: Option<TorrentEpisodeMediaMeta> = serde_json::from_str(expected).ok();
let found_raw = parse_episode_media_meta_from_torrent(Path::new(raw_name), None, None);
let found = found_raw.as_ref().ok().map(|s| s.clone());
if expected != found {
if found_raw.is_ok() {
println!(
"expected {} and found {} are not equal",
serde_json::to_string_pretty(&expected).unwrap(),
serde_json::to_string_pretty(&found).unwrap()
)
} else {
println!(
"expected {} and found {:#?} are not equal",
serde_json::to_string_pretty(&expected).unwrap(),
found_raw
)
}
} }
assert_eq!(expected, found); assert_eq!(expected, found);
} }
}
} }

View File

@ -1,91 +0,0 @@
use quirks_path::Path;
use super::defs::{
BRACKETS_REG, DIGIT_1PLUS_REG, SEASON_REGEX, SUBTITLE_LANG, TORRENT_PRASE_RULE_REGS,
};
pub fn get_path_basename(path: &Path) -> &str {
path.parent().map_or("", |s| s.as_str())
}
pub fn get_fansub(group_and_title: &str) -> (Option<&str>, &str) {
let n = BRACKETS_REG
.split(group_and_title)
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect::<Vec<_>>();
if n.len() > 1 {
if DIGIT_1PLUS_REG.is_match(n[1]) {
(None, group_and_title)
} else {
(Some(n[0]), n[1])
}
} else {
(None, n[0])
}
}
pub fn get_season_and_title(season_and_title: &str) -> (String, i32) {
let title = SEASON_REGEX.replace(season_and_title, "");
let title = title.trim().to_string();
let mut season = 1;
if let Some(match_result) = SEASON_REGEX.captures(season_and_title) {
let season_str = match_result
.get(2)
.unwrap_or_else(|| unreachable!("season regex should have 2 groups"))
.as_str();
season = season_str
.parse::<i32>()
.unwrap_or_else(|_| unreachable!("season should be a number"));
}
(title, season)
}
pub fn get_subtitle_lang(subtitle_name: &str) -> Option<&'static str> {
let subtitle_name_lower = subtitle_name.to_lowercase();
for (lang, matches) in SUBTITLE_LANG.iter() {
for m in matches {
if subtitle_name_lower.contains(m) {
return Some(lang);
}
}
}
None
}
pub fn parse_torrent(
torrent_path: &Path,
torrent_name: Option<&str>,
season: Option<i32>,
file_type: Option<&str>,
) {
let media_name = get_path_basename(torrent_path);
for rule in TORRENT_PRASE_RULE_REGS.iter() {
let match_obj = if let Some(torrent_name) = torrent_name {
rule.captures(torrent_name)
} else {
rule.captures(media_name)
};
if let Ok(Some(match_obj)) = match_obj {
let group_and_title = match_obj
.get(1)
.unwrap_or_else(|| unreachable!("should have 1 group"))
.as_str();
let (group, title) = get_fansub(group_and_title);
let season_and_title = get_season_and_title(title);
let season = season.unwrap_or(season_and_title.1);
let title = season_and_title.0;
let episode = match_obj
.get(2)
.unwrap_or_else(|| unreachable!("should have 2 group"))
.as_str()
.parse::<i32>()
.unwrap_or_else(|_| unreachable!("episode should be a number"));
let extension = media_name;
todo!()
}
}
}