feat: add tmdb parsers

This commit is contained in:
master 2024-03-25 02:15:00 +08:00
parent 4f124c9c0f
commit 5e51b2752d
15 changed files with 218 additions and 91 deletions

View File

@ -3,7 +3,7 @@ recorder = "run -p recorder --bin recorder_cli -- --environment recorder.develop
recorder-playground = "run -p recorder --example playground -- --environment recorder.development"
[build]
rustflags = ["-Zthreads=8"]
rustflags = ["-Zthreads=12", "-Clink-arg=-fuse-ld=lld"]
[target.x86_64-unknown-linux-gnu]
linker = "clang"

3
.gitignore vendored
View File

@ -221,4 +221,5 @@ index.d.ts.map
/*.session.sql
/temp
/rustc-ice-*
/rustc-ice-*
/test.env

20
Cargo.lock generated
View File

@ -1299,6 +1299,18 @@ dependencies = [
"matches",
]
[[package]]
name = "dateparser"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2ef451feee09ae5ecd8a02e738bd9adee9266b8fa9b44e22d3ce968d8694238"
dependencies = [
"anyhow",
"chrono",
"lazy_static",
"regex",
]
[[package]]
name = "der"
version = "0.7.8"
@ -1452,6 +1464,12 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "dotenv"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f"
[[package]]
name = "dotenvy"
version = "0.15.7"
@ -3622,6 +3640,8 @@ dependencies = [
"axum",
"bytes",
"chrono",
"dateparser",
"dotenv",
"eyre",
"fancy-regex",
"futures",

View File

@ -1,3 +1,8 @@
cargo-features = ["codegen-backend"]
[workspace]
members = ["crates/quirks_path", "crates/recorder"]
resolver = "2"
[profile.dev]
debug = 0
codegen-backend = "cranelift"

View File

@ -62,6 +62,8 @@ quirks_path = { path = "../quirks_path" }
tokio-utils = "0.1.2"
weak-table = "0.3.2"
oxilangtag = { version = "0.1.5", features = ["serde"] }
dateparser = "0.2.1"
dotenv = "0.15.0"
[dev-dependencies]
serial_test = "2.0.0"

View File

@ -60,12 +60,12 @@ impl LanguagePreset {
let primary = lang_tag.primary_language();
let region = lang_tag.region();
let kind = match primary {
"zh" if region == "TW" => LanguagePresetName::ZhTW,
"zh" if region == "CN" => LanguagePresetName::ZhCN,
"zh" => LanguagePresetName::Zh,
"en" => LanguagePresetName::En,
"ja" => LanguagePresetName::Ja,
let kind = match (primary, region) {
("zh", Some("TW")) => LanguagePresetName::ZhTW,
("zh", Some("CN")) => LanguagePresetName::ZhCN,
("zh", _) => LanguagePresetName::Zh,
("en", _) => LanguagePresetName::En,
("ja", _) => LanguagePresetName::Ja,
_ => Err(ParseError::UnsupportedLanguagePreset(s_rc.to_string()))?,
};

View File

@ -11,12 +11,19 @@ use serde::{Deserialize, Serialize};
)]
#[serde(rename_all = "snake_case")]
pub enum BangumiDistribution {
#[sea_orm(string_value = "movie")]
Movie,
#[sea_orm(string_value = "ova")]
Ova,
#[sea_orm(string_value = "oad")]
Oad,
#[sea_orm(string_value = "sp")]
Sp,
#[sea_orm(string_value = "ex")]
Ex,
#[sea_orm(string_value = "tv")]
Tv,
#[sea_orm(string_value = "unknown")]
Unknown,
}

View File

@ -5,11 +5,6 @@ use lazy_static::lazy_static;
use maplit::hashmap;
use regex::Regex;
const LANG_ZH_TW: &str = "zh-tw";
const LANG_ZH: &str = "zh";
const LANG_EN: &str = "en";
const LANG_JP: &str = "jp";
lazy_static! {
pub static ref SEASON_REGEX: Regex =
Regex::new(r"(S\|[Ss]eason\s+)(\d+)").expect("Invalid regex");

View File

@ -1,11 +1,15 @@
use serde::{Deserialize, Serialize};
use super::tmdb_client::TMDB_API_ORIGIN;
use crate::{
i18n::LanguagePreset,
models::bangumi::BangumiDistribution,
parsers::tmdb::{
tmdb_client::TmdbApiClient,
tmdb_dtos::{TmdbSearchMultiItemDto, TmdbSearchMultiPageDto},
tmdb_dtos::{
TmdbMediaDetailDto, TmdbMovieDetailDto, TmdbSearchMultiItemDto, TmdbSearchMultiPageDto,
TmdbTvSeriesDetailDto,
},
},
};
@ -31,9 +35,12 @@ const TMDB_ANIMATION_GENRE_ID: i64 = 16;
#[inline]
fn build_tmdb_search_api_url(query: &str, lang: &LanguagePreset, page: u32) -> String {
format!(
"{TMDB_API_ORIGIN}/3/search/multi?language={lang_tag}&query={query}&page={page}&\
"{endpoint}/3/search/multi?language={lang_tag}&query={query}&page={page}&\
include_adult=true",
endpoint = TMDB_API_ORIGIN,
lang_tag = lang.name_str(),
query = query,
page = page
)
}
@ -49,7 +56,10 @@ fn build_tmdb_info_api_url(
_ => "tv",
};
format!(
"{TMDB_API_ORIGIN}/3/{tmdb_media_type}/{id}?language={lang_tag}",
"{endpoint}/3/{tmdb_media_type}/{id}?language={lang_tag}",
endpoint = TMDB_API_ORIGIN,
tmdb_media_type = tmdb_media_type,
id = id,
lang_tag = lang.name_str()
)
}
@ -94,9 +104,16 @@ pub async fn get_tmdb_info_from_id_lang_and_distribution(
id: i64,
lang: &LanguagePreset,
distribution: &BangumiDistribution,
) -> eyre::Result<TmdbSearchMultiItemDto> {
) -> eyre::Result<TmdbMediaDetailDto> {
let info_url = build_tmdb_info_api_url(id, lang, distribution);
let info: TmdbSearchMultiItemDto = tmdb_client.fetch(|fetch| fetch.get(info_url)).await?;
let info = if distribution == &BangumiDistribution::Movie {
let info: Box<TmdbMovieDetailDto> = tmdb_client.fetch(|fetch| fetch.get(info_url)).await?;
TmdbMediaDetailDto::Movie(info)
} else {
let info: Box<TmdbTvSeriesDetailDto> =
tmdb_client.fetch(|fetch| fetch.get(info_url)).await?;
TmdbMediaDetailDto::Tv(info)
};
Ok(info)
}
@ -113,46 +130,76 @@ pub async fn parse_tmdb_bangumi_from_title_and_lang(
.await?;
}
if search_result.is_empty() {
return Ok(None);
Ok(None)
} else {
let mut target_and_priority: Option<(TmdbSearchMultiItemDto, u32)> = None;
let mut target_and_priority: Option<(&TmdbSearchMultiItemDto, u32)> = None;
for item in search_result.iter() {
let is_animation = tmdb_genres_is_match_animation(&item.genre_ids);
let is_prefer_media_type =
item.media_type.as_deref() == Some(distribution.prefer_tmdb_media_type());
let is_prefer_media_type = item.media_type == distribution.prefer_tmdb_media_type();
let priority =
(if is_prefer_media_type { 10 } else { 0 }) + (if is_animation { 1 } else { 0 });
if let Some((last_target_id, last_priority)) = target_and_priority.as_deref_mut() {
if priority > last_priority {
*last_target_id = item;
if let Some((last_target, last_priority)) = target_and_priority.as_mut() {
if priority > *last_priority {
*last_target = item;
}
} else {
target_and_priority = Some((item, priority));
}
}
if let Some((target, _)) = target_and_priority {
let info_url = get_tmdb_info_from_id_lang_and_distribution(
let info = get_tmdb_info_from_id_lang_and_distribution(
tmdb_client,
target.id,
lang,
BangumiDistribution::from_tmdb_media_type(target.media_type),
);
let info: TmdbSearchMultiItemDto =
tmdb_client.fetch(|fetch| fetch.get(info_url)).await?;
let last_season = match distribution {
BangumiDistribution::Movie => 1,
BangumiDistribution::Tv => info.number_of_seasons,
_ => 1,
};
Ok(Some(TmdbBangumiItem {
id: info.id,
name: info.name,
origin_name: info.original_name,
last_season,
year: info.first_air_date,
poster_link: info.poster_path,
}))
&BangumiDistribution::from_tmdb_media_type(&target.media_type),
)
.await?;
match info {
TmdbMediaDetailDto::Movie(info) => Ok(Some(TmdbBangumiItem {
id: info.id,
name: info.name,
origin_name: info.original_name,
last_season: 1,
year: Some(info.release_date),
poster_link: info.poster_path,
})),
TmdbMediaDetailDto::Tv(info) => Ok(Some(TmdbBangumiItem {
id: info.id,
name: info.name,
origin_name: info.original_name,
last_season: info.number_of_seasons,
year: info.first_air_date,
poster_link: info.poster_path,
})),
}
} else {
Ok(None)
}
}
}
#[cfg(test)]
mod tests {
use crate::parsers::tmdb::{
tmdb_bgm_parser::parse_tmdb_bangumi_from_title_and_lang,
tmdb_client::tests::prepare_tmdb_api_client,
};
#[tokio::test]
async fn test_parse_tmdb_bangumi_from_title_and_lang() {
let client = prepare_tmdb_api_client().await;
let result = parse_tmdb_bangumi_from_title_and_lang(
client.as_ref(),
"青春猪头",
&crate::i18n::LanguagePreset::parse("zh-CN").expect("failed to create language preset"),
&crate::models::bangumi::BangumiDistribution::Tv,
)
.await
.expect("failed to parse tmdb bangumi from title and lang");
assert_eq!(
result.as_ref().map_or("", |item| &item.name),
"青春猪头少年不会梦到兔女郎学姐"
);
}
}

View File

@ -1,8 +1,9 @@
use std::sync::{Arc, RwLock, Weak};
use std::sync::{Arc, Weak};
use lazy_static::lazy_static;
use opendal::raw::Accessor;
use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION};
use serde::de::DeserializeOwned;
use tokio::sync::RwLock;
use tokio_utils::RateLimiter;
use weak_table::WeakValueHashMap;
@ -19,28 +20,30 @@ pub struct TmdbApiClient {
lazy_static! {
static ref TMDB_API_CLIENT_MAP: Arc<RwLock<WeakValueHashMap<String, Weak<TmdbApiClient>>>> =
{ Arc::new(RwLock::new(WeakValueHashMap::new())) };
Arc::new(RwLock::new(WeakValueHashMap::new()));
}
impl TmdbApiClient {
pub async fn new<S: AsRef<str>>(api_token: S) -> Arc<Self> {
pub async fn new<S: AsRef<str>>(api_token: S) -> eyre::Result<Arc<Self>> {
let api_token = api_token.as_ref();
let map_read = TMDB_API_CLIENT_MAP.read().await;
if let Some(client) = map_read.get(api_token) {
return client.clone();
{
let map_read = TMDB_API_CLIENT_MAP.read().await;
if let Some(client) = map_read.get(api_token) {
return Ok(client.clone());
}
}
let client = Arc::new(TmdbApiClient {
api_token: api_token.to_string(),
rate_limiter: RateLimiter::new(std::time::Duration::from_millis(50)),
fetch_client: reqwest::Client::builder()
.user_agent(DEFAULT_USER_AGENT)
.build(),
.build()?,
headers: {
let mut header_map = HeaderMap::new();
header_map.insert(ACCEPT, HeaderValue::from("application/json"));
header_map.insert(ACCEPT, HeaderValue::from_static("application/json"));
header_map.insert(
AUTHORIZATION,
HeaderValue::from(format!("Bearer {api_token}")),
HeaderValue::from_str(&format!("Bearer {api_token}"))?,
);
header_map
},
@ -49,7 +52,7 @@ impl TmdbApiClient {
let mut map_write = TMDB_API_CLIENT_MAP.write().await;
map_write.insert(api_token.to_string(), client.clone());
}
client.clone()
Ok(client)
}
pub fn get_api_token(&self) -> &str {
@ -59,6 +62,7 @@ impl TmdbApiClient {
pub async fn fetch<R, F>(&self, f: F) -> Result<R, reqwest::Error>
where
F: FnOnce(&reqwest::Client) -> reqwest::RequestBuilder,
R: DeserializeOwned,
{
self.rate_limiter
.throttle(|| async {
@ -72,3 +76,18 @@ impl TmdbApiClient {
.await
}
}
#[cfg(test)]
pub(crate) mod tests {
use std::{env, sync::Arc};
use crate::parsers::tmdb::tmdb_client::TmdbApiClient;
pub async fn prepare_tmdb_api_client() -> Arc<TmdbApiClient> {
dotenv::from_filename("test.env").expect("failed to load test.env");
let tmdb_api_token = env::var("TMDB_API_TOKEN").expect("TMDB_API_TOKEN is not set");
TmdbApiClient::new(tmdb_api_token)
.await
.expect("failed to create tmdb api client")
}
}

View File

@ -1,25 +1,27 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbListItemDto {
pub id: i64,
#[serde(alias = "title")]
pub name: String,
#[serde(alias = "original_title")]
pub original_name: String,
pub original_language: String,
pub adult: bool,
pub poster_path: Option<String>,
pub backdrop_path: Option<String>,
pub media_type: String,
pub original_language: String,
pub original_name: String,
pub overview: String,
pub genre_ids: Vec<i64>,
pub popularity: f64,
pub popularity: f32,
pub first_air_date: String,
pub origin_country: Option<Vec<String>>,
pub vote_average: f32,
pub vote_count: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbListPageDto {
pub id: i64,
pub page: u32,
@ -30,13 +32,13 @@ pub struct TmdbListPageDto {
pub results: Vec<TmdbListItemDto>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbGenresObjDto {
pub id: i64,
pub name: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbEpisodeAirDto {
pub id: i64,
pub name: String,
@ -53,7 +55,7 @@ pub struct TmdbEpisodeAirDto {
pub still_path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbSeasonDto {
pub air_date: String,
pub episode_count: i32,
@ -65,17 +67,21 @@ pub struct TmdbSeasonDto {
pub vote_average: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbSpokenLanguageDto {
pub iso_639_1: String,
pub name: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbTvSeriesDetailDto {
pub adult: bool,
pub id: i64,
#[serde(alias = "title")]
pub name: String,
#[serde(alias = "original_title")]
pub original_name: String,
pub original_language: String,
pub backdrop_path: Option<String>,
pub episode_run_time: Option<Vec<i32>>,
pub genres: Vec<TmdbGenresObjDto>,
@ -88,9 +94,7 @@ pub struct TmdbTvSeriesDetailDto {
pub next_episode_to_air: Option<TmdbEpisodeAirDto>,
pub number_of_episodes: i32,
pub number_of_seasons: i32,
pub origin_country: Vec<String>,
pub original_language: String,
pub original_name: String,
pub origin_country: Option<Vec<String>>,
pub overview: String,
pub popularity: f32,
pub poster_path: Option<String>,
@ -102,8 +106,12 @@ pub struct TmdbTvSeriesDetailDto {
pub vote_count: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbMovieDetailDto {
#[serde(alias = "title")]
pub name: String,
#[serde(alias = "original_title")]
pub original_name: String,
pub adult: bool,
pub backdrop_path: Option<String>,
pub homepage: Option<String>,
@ -111,7 +119,6 @@ pub struct TmdbMovieDetailDto {
pub budget: i64,
pub imdb_id: Option<String>,
pub original_language: String,
pub original_title: String,
pub overview: String,
pub popularity: f32,
pub poster_path: Option<String>,
@ -121,20 +128,21 @@ pub struct TmdbMovieDetailDto {
pub spoken_languages: Vec<TmdbSpokenLanguageDto>,
pub status: String,
pub tagline: String,
pub title: String,
pub video: bool,
pub vote_average: f32,
pub vote_count: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbSearchMultiItemDto {
pub adult: bool,
pub backdrop_path: Option<String>,
pub id: i64,
#[serde(alias = "title")]
pub name: String,
pub original_language: String,
#[serde(alias = "original_title")]
pub original_name: String,
pub original_language: String,
pub overview: String,
pub poster_path: Option<String>,
pub media_type: String,
@ -143,16 +151,17 @@ pub struct TmdbSearchMultiItemDto {
pub first_air_date: Option<String>,
pub vote_average: f32,
pub vote_count: i32,
pub origin_country: Vec<String>,
pub origin_country: Option<Vec<String>>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "media_type", rename_all = "snake_case")]
pub enum TmdbMediaDetailDto {
Tv(TmdbTvSeriesDetailDto),
Movie(TmdbMovieDetailDto),
Tv(Box<TmdbTvSeriesDetailDto>),
Movie(Box<TmdbMovieDetailDto>),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbSearchMultiPageDto {
pub total_results: u32,
pub total_pages: u32,

View File

@ -1,5 +1,4 @@
use std::fmt::Debug;
use super::tmdb_client::TMDB_API_ORIGIN;
use crate::{
i18n::LanguagePreset,
parsers::tmdb::{
@ -11,15 +10,18 @@ use crate::{
#[inline]
fn build_tmdb_list_api_url(list_id: i64, lang: &LanguagePreset, page: u32) -> String {
format!(
"{TMDB_API_ORIGIN}/4/list/{list_id}?language={lang_tag}&{page}",
lang_tag = lang.name_str()
"{endpoint}/4/list/{list_id}?language={lang_tag}&page={page}",
endpoint = TMDB_API_ORIGIN,
list_id = list_id,
lang_tag = lang.name_str(),
page = page
)
}
pub async fn parse_tmdb_list_items_from_list_api(
tmdb_client: &TmdbApiClient,
list_id: i64,
lang: &LanguagePreset,
tmdb_client: &TmdbApiClient,
) -> eyre::Result<Vec<TmdbListItemDto>> {
let mut items: Vec<TmdbListItemDto> = vec![];
@ -37,8 +39,28 @@ pub async fn parse_tmdb_list_items_from_list_api(
let page: TmdbListPageDto = tmdb_client
.fetch(|fetch| fetch.get(build_tmdb_list_api_url(list_id, lang, i)))
.await?;
items.extend(page.results);
}
Ok(items)
}
#[cfg(test)]
mod tests {
use super::super::tmdb_client::tests::prepare_tmdb_api_client;
#[tokio::test]
async fn test_parse_tmdb_list_items_from_list_api() {
let client = prepare_tmdb_api_client().await;
let items = super::parse_tmdb_list_items_from_list_api(
client.as_ref(),
8294054,
&crate::i18n::LanguagePreset::parse("zh-CN").expect("failed to create language preset"),
)
.await
.expect("failed to parse tmdb list items from list api");
assert!(items.iter().any(|item| item.name == "葬送的芙莉莲"));
}
}

View File

@ -84,11 +84,10 @@ fn get_season_and_title(season_and_title: &str) -> (String, i32) {
(title, season)
}
fn get_subtitle_lang(subtitle_str: &str) -> Option<&str> {
let media_name_lower = subtitle_str.to_lowercase().trim();
LanguagePreset::parse(media_name_lower)
.ok()
.map(|p| p.name_str())
fn get_subtitle_lang(subtitle_str: &str) -> Option<LanguagePreset> {
let lowercase = subtitle_str.to_lowercase();
let media_name_lower = lowercase.trim();
LanguagePreset::parse(media_name_lower).ok()
}
pub fn parse_episode_media_meta_from_torrent(
@ -162,7 +161,7 @@ pub fn parse_episode_subtitle_meta_from_torrent(
Ok(TorrentEpisodeSubtitleMeta {
media: media_meta,
lang: lang.map(|s| s.to_string()),
lang: lang.map(|s| s.name_str().to_string()),
})
}
@ -266,7 +265,7 @@ mod tests {
let expected: Option<TorrentEpisodeSubtitleMeta> = serde_json::from_str(expected).ok();
let found_raw =
parse_episode_subtitle_meta_from_torrent(Path::new(raw_name), None, None);
let found = found_raw.as_ref().ok().map(|s| s.clone());
let found = found_raw.as_ref().ok().cloned();
if expected != found {
if found_raw.is_ok() {
@ -287,7 +286,7 @@ mod tests {
} else {
let expected: Option<TorrentEpisodeMediaMeta> = serde_json::from_str(expected).ok();
let found_raw = parse_episode_media_meta_from_torrent(Path::new(raw_name), None, None);
let found = found_raw.as_ref().ok().map(|s| s.clone());
let found = found_raw.as_ref().ok().cloned();
if expected != found {
if found_raw.is_ok() {

View File

@ -71,7 +71,7 @@ pub fn gen_bangumi_sub_path(data: &bangumi::Model) -> PathBuf {
}
pub fn rule_name(bgm: &bangumi::Model, conf: &subscribers::SubscriberBangumiConfig) -> String {
if let (Some(true), Some(group_name)) = (conf.leading_fansub_tag, &bgm.fansub) {
if let (true, Some(group_name)) = (conf.leading_fansub_tag, &bgm.fansub) {
format!("[{}] {} S{}", group_name, bgm.official_title, bgm.season)
} else {
format!("{} S{}", bgm.official_title, bgm.season)

1
test.env.example Normal file
View File

@ -0,0 +1 @@
TMDB_API_TOKEN=your_token_here