feat: add basic webui
This commit is contained in:
64
apps/recorder/src/extract/mikan/client.rs
Normal file
64
apps/recorder/src/extract/mikan/client.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
use std::ops::Deref;
|
||||
|
||||
use loco_rs::app::{AppContext, Initializer};
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
use super::{AppMikanConfig, MIKAN_BASE_URL};
|
||||
use crate::{config::AppConfigExt, fetch::HttpClient};
|
||||
|
||||
static APP_MIKAN_CLIENT: OnceCell<AppMikanClient> = OnceCell::new();
|
||||
|
||||
pub struct AppMikanClient {
|
||||
http_client: HttpClient,
|
||||
base_url: String,
|
||||
}
|
||||
|
||||
impl AppMikanClient {
|
||||
pub fn new(mut config: AppMikanConfig) -> loco_rs::Result<Self> {
|
||||
let http_client =
|
||||
HttpClient::new(config.http_client.take()).map_err(loco_rs::Error::wrap)?;
|
||||
let base_url = config
|
||||
.base_url
|
||||
.unwrap_or_else(|| String::from(MIKAN_BASE_URL));
|
||||
Ok(Self {
|
||||
http_client,
|
||||
base_url,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn global() -> &'static AppMikanClient {
|
||||
APP_MIKAN_CLIENT
|
||||
.get()
|
||||
.expect("Global mikan http client is not initialized")
|
||||
}
|
||||
|
||||
pub fn base_url(&self) -> &str {
|
||||
&self.base_url
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for AppMikanClient {
|
||||
type Target = HttpClient;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.http_client
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AppMikanClientInitializer;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Initializer for AppMikanClientInitializer {
|
||||
fn name(&self) -> String {
|
||||
"AppMikanClientInitializer".to_string()
|
||||
}
|
||||
|
||||
async fn before_run(&self, app_context: &AppContext) -> loco_rs::Result<()> {
|
||||
let config = &app_context.config;
|
||||
let app_mikan_conf = config.get_mikan_conf()?.unwrap_or_default();
|
||||
|
||||
APP_MIKAN_CLIENT.get_or_try_init(|| AppMikanClient::new(app_mikan_conf))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
11
apps/recorder/src/extract/mikan/config.rs
Normal file
11
apps/recorder/src/extract/mikan/config.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::fetch::HttpClientConfig;
|
||||
|
||||
pub const MIKAN_CONF_KEY: &str = "mikan";
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub struct AppMikanConfig {
|
||||
pub http_client: Option<HttpClientConfig>,
|
||||
pub base_url: Option<String>,
|
||||
}
|
||||
4
apps/recorder/src/extract/mikan/constants.rs
Normal file
4
apps/recorder/src/extract/mikan/constants.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub const MIKAN_BUCKET_KEY: &str = "mikan";
|
||||
pub const MIKAN_BASE_URL: &str = "https://mikanani.me/";
|
||||
pub const MIKAN_UNKNOWN_FANSUB_NAME: &str = "生肉/不明字幕";
|
||||
pub const MIKAN_UNKNOWN_FANSUB_ID: &str = "202";
|
||||
22
apps/recorder/src/extract/mikan/mod.rs
Normal file
22
apps/recorder/src/extract/mikan/mod.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
pub mod client;
|
||||
pub mod config;
|
||||
pub mod constants;
|
||||
pub mod rss_parser;
|
||||
pub mod web_parser;
|
||||
|
||||
pub use client::{AppMikanClient, AppMikanClientInitializer};
|
||||
pub use config::{AppMikanConfig, MIKAN_CONF_KEY};
|
||||
pub use constants::{MIKAN_BASE_URL, MIKAN_BUCKET_KEY};
|
||||
pub use rss_parser::{
|
||||
build_mikan_bangumi_rss_link, build_mikan_subscriber_aggregation_rss_link,
|
||||
parse_mikan_bangumi_id_from_rss_link, parse_mikan_rss_channel_from_rss_link,
|
||||
parse_mikan_rss_items_from_rss_link, parse_mikan_subscriber_aggregation_id_from_rss_link,
|
||||
MikanBangumiAggregationRssChannel, MikanBangumiRssChannel, MikanBangumiRssLink,
|
||||
MikanRssChannel, MikanRssItem, MikanSubscriberAggregationRssChannel,
|
||||
MikanSubscriberAggregationRssLink,
|
||||
};
|
||||
pub use web_parser::{
|
||||
build_mikan_bangumi_homepage, build_mikan_episode_homepage,
|
||||
parse_mikan_bangumi_meta_from_mikan_homepage, parse_mikan_episode_meta_from_mikan_homepage,
|
||||
MikanBangumiMeta, MikanEpisodeMeta,
|
||||
};
|
||||
353
apps/recorder/src/extract/mikan/rss_parser.rs
Normal file
353
apps/recorder/src/extract/mikan/rss_parser.rs
Normal file
@@ -0,0 +1,353 @@
|
||||
use std::ops::Deref;
|
||||
|
||||
use chrono::DateTime;
|
||||
use itertools::Itertools;
|
||||
use reqwest::IntoUrl;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use torrent::core::BITTORRENT_MIME_TYPE;
|
||||
use url::Url;
|
||||
|
||||
use super::{
|
||||
web_parser::{parse_mikan_episode_id_from_homepage, MikanEpisodeHomepage},
|
||||
AppMikanClient,
|
||||
};
|
||||
use crate::{extract::errors::ParseError, fetch::bytes::download_bytes_with_client};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct MikanRssItem {
|
||||
pub title: String,
|
||||
pub homepage: Url,
|
||||
pub url: Url,
|
||||
pub content_length: Option<u64>,
|
||||
pub mime: String,
|
||||
pub pub_date: Option<i64>,
|
||||
pub mikan_episode_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct MikanBangumiRssChannel {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: String,
|
||||
pub items: Vec<MikanRssItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct MikanBangumiAggregationRssChannel {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
pub mikan_bangumi_id: String,
|
||||
pub items: Vec<MikanRssItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct MikanSubscriberAggregationRssChannel {
|
||||
pub mikan_aggregation_id: String,
|
||||
pub url: Url,
|
||||
pub items: Vec<MikanRssItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum MikanRssChannel {
|
||||
Bangumi(MikanBangumiRssChannel),
|
||||
BangumiAggregation(MikanBangumiAggregationRssChannel),
|
||||
SubscriberAggregation(MikanSubscriberAggregationRssChannel),
|
||||
}
|
||||
|
||||
impl MikanRssChannel {
|
||||
pub fn items(&self) -> &[MikanRssItem] {
|
||||
match &self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { items, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { items, .. })
|
||||
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { items, .. }) => {
|
||||
items
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_items(self) -> Vec<MikanRssItem> {
|
||||
match self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { items, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { items, .. })
|
||||
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { items, .. }) => {
|
||||
items
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self) -> Option<&str> {
|
||||
match &self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { name, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { name, .. }) => {
|
||||
Some(name.as_str())
|
||||
}
|
||||
Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { .. }) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn url(&self) -> &Url {
|
||||
match &self {
|
||||
Self::Bangumi(MikanBangumiRssChannel { url, .. })
|
||||
| Self::BangumiAggregation(MikanBangumiAggregationRssChannel { url, .. })
|
||||
| Self::SubscriberAggregation(MikanSubscriberAggregationRssChannel { url, .. }) => url,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<rss::Item> for MikanRssItem {
|
||||
type Error = ParseError;
|
||||
|
||||
fn try_from(item: rss::Item) -> Result<Self, Self::Error> {
|
||||
let mime_type = item
|
||||
.enclosure()
|
||||
.map(|x| x.mime_type.to_string())
|
||||
.unwrap_or_default();
|
||||
if mime_type == BITTORRENT_MIME_TYPE {
|
||||
let enclosure = item.enclosure.unwrap();
|
||||
|
||||
let homepage = item
|
||||
.link
|
||||
.ok_or_else(|| ParseError::MikanRssItemFormatError {
|
||||
reason: String::from("must to have link for homepage"),
|
||||
})?;
|
||||
|
||||
let homepage = Url::parse(&homepage)?;
|
||||
|
||||
let enclosure_url = Url::parse(&enclosure.url)?;
|
||||
|
||||
let MikanEpisodeHomepage {
|
||||
mikan_episode_id, ..
|
||||
} = parse_mikan_episode_id_from_homepage(&homepage).ok_or_else(|| {
|
||||
ParseError::MikanRssItemFormatError {
|
||||
reason: String::from("homepage link format invalid"),
|
||||
}
|
||||
})?;
|
||||
|
||||
Ok(MikanRssItem {
|
||||
title: item.title.unwrap_or_default(),
|
||||
homepage,
|
||||
url: enclosure_url,
|
||||
content_length: enclosure.length.parse().ok(),
|
||||
mime: enclosure.mime_type,
|
||||
pub_date: item
|
||||
.pub_date
|
||||
.and_then(|s| DateTime::parse_from_rfc2822(&s).ok())
|
||||
.map(|s| s.timestamp_millis()),
|
||||
mikan_episode_id,
|
||||
})
|
||||
} else {
|
||||
Err(ParseError::MimeError {
|
||||
expected: String::from(BITTORRENT_MIME_TYPE),
|
||||
found: mime_type,
|
||||
desc: String::from("MikanRssItem"),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MikanBangumiRssLink {
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MikanSubscriberAggregationRssLink {
|
||||
pub mikan_aggregation_id: String,
|
||||
}
|
||||
|
||||
pub fn build_mikan_bangumi_rss_link(
|
||||
mikan_base_url: &str,
|
||||
mikan_bangumi_id: &str,
|
||||
mikan_fansub_id: Option<&str>,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = Url::parse(mikan_base_url)?;
|
||||
url.set_path("/RSS/Bangumi");
|
||||
url.query_pairs_mut()
|
||||
.append_pair("bangumiId", mikan_bangumi_id);
|
||||
if let Some(mikan_fansub_id) = mikan_fansub_id {
|
||||
url.query_pairs_mut()
|
||||
.append_pair("subgroupid", mikan_fansub_id);
|
||||
};
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn build_mikan_subscriber_aggregation_rss_link(
|
||||
mikan_base_url: &str,
|
||||
mikan_aggregation_id: &str,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = Url::parse(mikan_base_url)?;
|
||||
url.set_path("/RSS/MyBangumi");
|
||||
url.query_pairs_mut()
|
||||
.append_pair("token", mikan_aggregation_id);
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn parse_mikan_bangumi_id_from_rss_link(url: &Url) -> Option<MikanBangumiRssLink> {
|
||||
if url.path() == "/RSS/Bangumi" {
|
||||
url.query_pairs()
|
||||
.find(|(k, _)| k == "bangumiId")
|
||||
.map(|(_, v)| MikanBangumiRssLink {
|
||||
mikan_bangumi_id: v.to_string(),
|
||||
mikan_fansub_id: url
|
||||
.query_pairs()
|
||||
.find(|(k, _)| k == "subgroupid")
|
||||
.map(|(_, v)| v.to_string()),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_mikan_subscriber_aggregation_id_from_rss_link(
|
||||
url: &Url,
|
||||
) -> Option<MikanSubscriberAggregationRssLink> {
|
||||
if url.path() == "/RSS/MyBangumi" {
|
||||
url.query_pairs().find(|(k, _)| k == "token").map(|(_, v)| {
|
||||
MikanSubscriberAggregationRssLink {
|
||||
mikan_aggregation_id: v.to_string(),
|
||||
}
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_rss_items_from_rss_link(
|
||||
client: Option<&AppMikanClient>,
|
||||
url: impl IntoUrl,
|
||||
) -> eyre::Result<Vec<MikanRssItem>> {
|
||||
let channel = parse_mikan_rss_channel_from_rss_link(client, url).await?;
|
||||
|
||||
Ok(channel.into_items())
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_rss_channel_from_rss_link(
|
||||
client: Option<&AppMikanClient>,
|
||||
url: impl IntoUrl,
|
||||
) -> eyre::Result<MikanRssChannel> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let bytes = download_bytes_with_client(http_client, url.as_str()).await?;
|
||||
|
||||
let channel = rss::Channel::read_from(&bytes[..])?;
|
||||
|
||||
let channel_link = Url::parse(channel.link())?;
|
||||
|
||||
if let Some(MikanBangumiRssLink {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id,
|
||||
}) = parse_mikan_bangumi_id_from_rss_link(&channel_link)
|
||||
{
|
||||
let channel_name = channel.title().replace("Mikan Project - ", "");
|
||||
|
||||
let items = channel
|
||||
.items
|
||||
.into_iter()
|
||||
// @TODO log error
|
||||
.flat_map(MikanRssItem::try_from)
|
||||
.collect_vec();
|
||||
|
||||
if let Some(mikan_fansub_id) = mikan_fansub_id {
|
||||
Ok(MikanRssChannel::Bangumi(MikanBangumiRssChannel {
|
||||
name: channel_name,
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id,
|
||||
url: channel_link,
|
||||
items,
|
||||
}))
|
||||
} else {
|
||||
Ok(MikanRssChannel::BangumiAggregation(
|
||||
MikanBangumiAggregationRssChannel {
|
||||
name: channel_name,
|
||||
mikan_bangumi_id,
|
||||
url: channel_link,
|
||||
items,
|
||||
},
|
||||
))
|
||||
}
|
||||
} else if let Some(MikanSubscriberAggregationRssLink {
|
||||
mikan_aggregation_id,
|
||||
..
|
||||
}) = parse_mikan_subscriber_aggregation_id_from_rss_link(&channel_link)
|
||||
{
|
||||
let items = channel
|
||||
.items
|
||||
.into_iter()
|
||||
// @TODO log error
|
||||
.flat_map(MikanRssItem::try_from)
|
||||
.collect_vec();
|
||||
|
||||
return Ok(MikanRssChannel::SubscriberAggregation(
|
||||
MikanSubscriberAggregationRssChannel {
|
||||
mikan_aggregation_id,
|
||||
items,
|
||||
url: channel_link,
|
||||
},
|
||||
));
|
||||
} else {
|
||||
return Err(ParseError::MikanRssFormatError {
|
||||
url: url.as_str().into(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use torrent::core::BITTORRENT_MIME_TYPE;
|
||||
|
||||
use crate::extract::mikan::{
|
||||
parse_mikan_rss_channel_from_rss_link, MikanBangumiAggregationRssChannel,
|
||||
MikanBangumiRssChannel, MikanRssChannel,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
pub async fn test_parse_mikan_rss_channel_from_rss_link() {
|
||||
{
|
||||
let bangumi_url = "https://mikanani.me/RSS/Bangumi?bangumiId=3141&subgroupid=370";
|
||||
|
||||
let channel = parse_mikan_rss_channel_from_rss_link(None, bangumi_url)
|
||||
.await
|
||||
.expect("should get mikan channel from rss url");
|
||||
|
||||
assert_matches!(
|
||||
&channel,
|
||||
MikanRssChannel::Bangumi(MikanBangumiRssChannel { .. })
|
||||
);
|
||||
|
||||
assert_matches!(&channel.name(), Some("葬送的芙莉莲"));
|
||||
|
||||
let items = channel.items();
|
||||
let first_sub_item = items
|
||||
.first()
|
||||
.expect("mikan subscriptions should have at least one subs");
|
||||
|
||||
assert_eq!(first_sub_item.mime, BITTORRENT_MIME_TYPE);
|
||||
|
||||
assert!(&first_sub_item
|
||||
.homepage
|
||||
.as_str()
|
||||
.starts_with("https://mikanani.me/Home/Episode"));
|
||||
|
||||
let name = first_sub_item.title.as_str();
|
||||
assert!(name.contains("葬送的芙莉莲"));
|
||||
}
|
||||
{
|
||||
let bangumi_url = "https://mikanani.me/RSS/Bangumi?bangumiId=3416";
|
||||
|
||||
let channel = parse_mikan_rss_channel_from_rss_link(None, bangumi_url)
|
||||
.await
|
||||
.expect("should get mikan channel from rss url");
|
||||
|
||||
assert_matches!(
|
||||
&channel,
|
||||
MikanRssChannel::BangumiAggregation(MikanBangumiAggregationRssChannel { .. })
|
||||
);
|
||||
|
||||
assert_matches!(&channel.name(), Some("叹气的亡灵想隐退"));
|
||||
}
|
||||
}
|
||||
}
|
||||
493
apps/recorder/src/extract/mikan/web_parser.rs
Normal file
493
apps/recorder/src/extract/mikan/web_parser.rs
Normal file
@@ -0,0 +1,493 @@
|
||||
use std::ops::Deref;
|
||||
|
||||
use bytes::Bytes;
|
||||
use eyre::ContextCompat;
|
||||
use html_escape::decode_html_entities;
|
||||
use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use lightningcss::{properties::Property, values::image::Image as CSSImage};
|
||||
use loco_rs::app::AppContext;
|
||||
use regex::Regex;
|
||||
use scraper::Html;
|
||||
use url::Url;
|
||||
|
||||
use super::{
|
||||
parse_mikan_bangumi_id_from_rss_link, AppMikanClient, MikanBangumiRssLink, MIKAN_BUCKET_KEY,
|
||||
};
|
||||
use crate::{
|
||||
app::AppContextExt,
|
||||
dal::DalContentCategory,
|
||||
extract::html::parse_style_attr,
|
||||
fetch::{html::download_html_with_client, image::download_image_with_client},
|
||||
models::subscribers,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanEpisodeMeta {
|
||||
pub homepage: Url,
|
||||
pub origin_poster_src: Option<Url>,
|
||||
pub bangumi_title: String,
|
||||
pub episode_title: String,
|
||||
pub fansub: String,
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: String,
|
||||
pub mikan_episode_id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanBangumiMeta {
|
||||
pub homepage: Url,
|
||||
pub origin_poster_src: Option<Url>,
|
||||
pub bangumi_title: String,
|
||||
pub mikan_bangumi_id: String,
|
||||
pub mikan_fansub_id: Option<String>,
|
||||
pub fansub: Option<String>,
|
||||
pub mikan_fansub_candidates: Vec<(String, String)>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanBangumiPosterMeta {
|
||||
pub origin_poster_src: Url,
|
||||
pub poster_data: Option<Bytes>,
|
||||
pub poster_src: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MikanEpisodeHomepage {
|
||||
pub mikan_episode_id: String,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref MIKAN_TITLE_SEASON: Regex = Regex::new("第.*季").unwrap();
|
||||
}
|
||||
|
||||
pub fn build_mikan_bangumi_homepage(
|
||||
mikan_base_url: &str,
|
||||
mikan_bangumi_id: &str,
|
||||
mikan_fansub_id: Option<&str>,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = Url::parse(mikan_base_url)?;
|
||||
url.set_path(&format!("/Home/Bangumi/{mikan_bangumi_id}"));
|
||||
url.set_fragment(mikan_fansub_id);
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn build_mikan_episode_homepage(
|
||||
mikan_base_url: &str,
|
||||
mikan_episode_id: &str,
|
||||
) -> eyre::Result<Url> {
|
||||
let mut url = Url::parse(mikan_base_url)?;
|
||||
url.set_path(&format!("/Home/Episode/{mikan_episode_id}"));
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
pub fn parse_mikan_episode_id_from_homepage(url: &Url) -> Option<MikanEpisodeHomepage> {
|
||||
if url.path().starts_with("/Home/Episode/") {
|
||||
let mikan_episode_id = url.path().replace("/Home/Episode/", "");
|
||||
Some(MikanEpisodeHomepage { mikan_episode_id })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_bangumi_poster_from_origin_poster_src(
|
||||
client: Option<&AppMikanClient>,
|
||||
origin_poster_src: Url,
|
||||
) -> eyre::Result<MikanBangumiPosterMeta> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let poster_data = download_image_with_client(http_client, origin_poster_src.clone()).await?;
|
||||
Ok(MikanBangumiPosterMeta {
|
||||
origin_poster_src,
|
||||
poster_data: Some(poster_data),
|
||||
poster_src: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_bangumi_poster_from_origin_poster_src_with_cache(
|
||||
ctx: &AppContext,
|
||||
origin_poster_src: Url,
|
||||
subscriber_id: i32,
|
||||
) -> eyre::Result<MikanBangumiPosterMeta> {
|
||||
let dal_client = ctx.get_dal_client();
|
||||
let mikan_client = ctx.get_mikan_client();
|
||||
let subscriber_pid = &subscribers::Model::find_pid_by_id_with_cache(ctx, subscriber_id).await?;
|
||||
if let Some(poster_src) = dal_client
|
||||
.exists_object(
|
||||
DalContentCategory::Image,
|
||||
subscriber_pid,
|
||||
Some(MIKAN_BUCKET_KEY),
|
||||
&origin_poster_src.path().replace("/images/Bangumi/", ""),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
return Ok(MikanBangumiPosterMeta {
|
||||
origin_poster_src,
|
||||
poster_data: None,
|
||||
poster_src: Some(poster_src.to_string()),
|
||||
});
|
||||
}
|
||||
|
||||
let poster_data =
|
||||
download_image_with_client(Some(mikan_client.deref()), origin_poster_src.clone()).await?;
|
||||
|
||||
let poster_str = dal_client
|
||||
.store_object(
|
||||
DalContentCategory::Image,
|
||||
subscriber_pid,
|
||||
Some(MIKAN_BUCKET_KEY),
|
||||
&origin_poster_src.path().replace("/images/Bangumi/", ""),
|
||||
poster_data.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(MikanBangumiPosterMeta {
|
||||
origin_poster_src,
|
||||
poster_data: Some(poster_data),
|
||||
poster_src: Some(poster_str.to_string()),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_bangumi_meta_from_mikan_homepage(
|
||||
client: Option<&AppMikanClient>,
|
||||
url: Url,
|
||||
) -> eyre::Result<MikanBangumiMeta> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let url_host = url.origin().unicode_serialization();
|
||||
let content = download_html_with_client(http_client, url.as_str()).await?;
|
||||
let html = Html::parse_document(&content);
|
||||
|
||||
let bangumi_fansubs = html
|
||||
.select(&scraper::Selector::parse(".subgroup-text").unwrap())
|
||||
.filter_map(|el| {
|
||||
if let (Some(fansub_id), Some(fansub_name)) = (
|
||||
el.value()
|
||||
.attr("id")
|
||||
.map(|s| decode_html_entities(s).trim().to_string()),
|
||||
el.select(&scraper::Selector::parse("a:nth-child(1)").unwrap())
|
||||
.next()
|
||||
.map(|child| {
|
||||
let mut s = String::from(
|
||||
child
|
||||
.prev_sibling()
|
||||
.and_then(|t| t.value().as_text())
|
||||
.map(|s| s.trim())
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
s.extend(child.text());
|
||||
decode_html_entities(&s).trim().to_string()
|
||||
}),
|
||||
) {
|
||||
Some((fansub_id, fansub_name))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
let fansub_info = url.fragment().and_then(|b| {
|
||||
bangumi_fansubs
|
||||
.iter()
|
||||
.find_map(|(id, name)| if id == b { Some((id, name)) } else { None })
|
||||
});
|
||||
|
||||
let bangumi_title = html
|
||||
.select(&scraper::Selector::parse(".bangumi-title").unwrap())
|
||||
.next()
|
||||
.map(|el| {
|
||||
decode_html_entities(&el.text().collect::<String>())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
.and_then(|title| if title.is_empty() { None } else { Some(title) })
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!("Missing mikan bangumi official title for {}", url)
|
||||
})?;
|
||||
|
||||
let MikanBangumiRssLink {
|
||||
mikan_bangumi_id, ..
|
||||
} = html
|
||||
.select(&scraper::Selector::parse(".bangumi-title > .mikan-rss").unwrap())
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("href"))
|
||||
.as_ref()
|
||||
.and_then(|s| url.join(s).ok())
|
||||
.and_then(|rss_link_url| parse_mikan_bangumi_id_from_rss_link(&rss_link_url))
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!("Missing mikan bangumi rss link or error format for {}", url)
|
||||
})?;
|
||||
|
||||
let origin_poster_src = html
|
||||
.select(&scraper::Selector::parse(".bangumi-poster").unwrap())
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("style"))
|
||||
.as_ref()
|
||||
.and_then(|s| parse_style_attr(s))
|
||||
.and_then(|style| {
|
||||
style.iter().find_map(|(prop, _)| {
|
||||
match prop {
|
||||
Property::BackgroundImage(images) => {
|
||||
for img in images {
|
||||
if let CSSImage::Url(path) = img {
|
||||
if let Ok(url) =
|
||||
Url::parse(&url_host).and_then(|s| s.join(path.url.trim()))
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Property::Background(backgrounds) => {
|
||||
for bg in backgrounds {
|
||||
if let CSSImage::Url(path) = &bg.image {
|
||||
if let Ok(url) =
|
||||
Url::parse(&url_host).and_then(|s| s.join(path.url.trim()))
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
None
|
||||
})
|
||||
})
|
||||
.map(|mut origin_poster_src| {
|
||||
origin_poster_src.set_query(None);
|
||||
origin_poster_src
|
||||
});
|
||||
|
||||
Ok(MikanBangumiMeta {
|
||||
homepage: url,
|
||||
bangumi_title,
|
||||
origin_poster_src,
|
||||
mikan_bangumi_id,
|
||||
fansub: fansub_info.map(|s| s.1.to_string()),
|
||||
mikan_fansub_id: fansub_info.map(|s| s.0.to_string()),
|
||||
mikan_fansub_candidates: bangumi_fansubs.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn parse_mikan_episode_meta_from_mikan_homepage(
|
||||
client: Option<&AppMikanClient>,
|
||||
url: Url,
|
||||
) -> eyre::Result<MikanEpisodeMeta> {
|
||||
let http_client = client.map(|s| s.deref());
|
||||
let url_host = url.origin().unicode_serialization();
|
||||
let content = download_html_with_client(http_client, url.as_str()).await?;
|
||||
|
||||
let html = Html::parse_document(&content);
|
||||
|
||||
let bangumi_title = html
|
||||
.select(&scraper::Selector::parse(".bangumi-title").unwrap())
|
||||
.next()
|
||||
.map(|el| {
|
||||
decode_html_entities(&el.text().collect::<String>())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
.and_then(|title| if title.is_empty() { None } else { Some(title) })
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!("Missing mikan bangumi official title for {}", url)
|
||||
})?;
|
||||
|
||||
let episode_title = html
|
||||
.select(&scraper::Selector::parse("title").unwrap())
|
||||
.next()
|
||||
.map(|el| {
|
||||
decode_html_entities(&el.text().collect::<String>())
|
||||
.replace(" - Mikan Project", "")
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
.and_then(|title| if title.is_empty() { None } else { Some(title) })
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!("Missing mikan episode official title for {}", url)
|
||||
})?;
|
||||
|
||||
let (mikan_bangumi_id, mikan_fansub_id) = html
|
||||
.select(&scraper::Selector::parse(".bangumi-title > .mikan-rss").unwrap())
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("href"))
|
||||
.as_ref()
|
||||
.and_then(|s| url.join(s).ok())
|
||||
.and_then(|rss_link_url| parse_mikan_bangumi_id_from_rss_link(&rss_link_url))
|
||||
.and_then(
|
||||
|MikanBangumiRssLink {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id,
|
||||
..
|
||||
}| {
|
||||
mikan_fansub_id.map(|mikan_fansub_id| (mikan_bangumi_id, mikan_fansub_id))
|
||||
},
|
||||
)
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!("Missing mikan bangumi rss link or error format for {}", url)
|
||||
})?;
|
||||
|
||||
let fansub = html
|
||||
.select(&scraper::Selector::parse(".bangumi-info>.magnet-link-wrap").unwrap())
|
||||
.next()
|
||||
.map(|el| {
|
||||
decode_html_entities(&el.text().collect::<String>())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
.wrap_err_with(|| {
|
||||
// todo: error handler
|
||||
format!("Missing mikan bangumi fansub name for {}", url)
|
||||
})?;
|
||||
|
||||
let origin_poster_src = html
|
||||
.select(&scraper::Selector::parse(".bangumi-poster").unwrap())
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("style"))
|
||||
.as_ref()
|
||||
.and_then(|s| parse_style_attr(s))
|
||||
.and_then(|style| {
|
||||
style.iter().find_map(|(prop, _)| {
|
||||
match prop {
|
||||
Property::BackgroundImage(images) => {
|
||||
for img in images {
|
||||
if let CSSImage::Url(path) = img {
|
||||
if let Ok(url) =
|
||||
Url::parse(&url_host).and_then(|s| s.join(path.url.trim()))
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Property::Background(backgrounds) => {
|
||||
for bg in backgrounds {
|
||||
if let CSSImage::Url(path) = &bg.image {
|
||||
if let Ok(url) =
|
||||
Url::parse(&url_host).and_then(|s| s.join(path.url.trim()))
|
||||
{
|
||||
return Some(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
None
|
||||
})
|
||||
})
|
||||
.map(|mut origin_poster_src| {
|
||||
origin_poster_src.set_query(None);
|
||||
origin_poster_src
|
||||
});
|
||||
|
||||
let MikanEpisodeHomepage {
|
||||
mikan_episode_id, ..
|
||||
} = parse_mikan_episode_id_from_homepage(&url)
|
||||
.wrap_err_with(|| format!("Failed to extract mikan_episode_id from {}", &url))?;
|
||||
|
||||
Ok(MikanEpisodeMeta {
|
||||
mikan_bangumi_id,
|
||||
mikan_fansub_id,
|
||||
bangumi_title,
|
||||
episode_title,
|
||||
homepage: url,
|
||||
origin_poster_src,
|
||||
fansub,
|
||||
mikan_episode_id,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use url::Url;
|
||||
use zune_image::{codecs::ImageFormat, image::Image};
|
||||
|
||||
use super::{
|
||||
parse_mikan_bangumi_meta_from_mikan_homepage,
|
||||
parse_mikan_bangumi_poster_from_origin_poster_src,
|
||||
parse_mikan_episode_meta_from_mikan_homepage,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_mikan_episode() {
|
||||
let test_fn = async || -> eyre::Result<()> {
|
||||
let url_str =
|
||||
"https://mikanani.me/Home/Episode/475184dce83ea2b82902592a5ac3343f6d54b36a";
|
||||
let url = Url::parse(url_str)?;
|
||||
|
||||
let ep_meta = parse_mikan_episode_meta_from_mikan_homepage(None, url.clone()).await?;
|
||||
|
||||
assert_eq!(ep_meta.homepage, url);
|
||||
assert_eq!(ep_meta.bangumi_title, "葬送的芙莉莲");
|
||||
assert_eq!(
|
||||
ep_meta.origin_poster_src,
|
||||
Some(Url::parse(
|
||||
"https://mikanani.me/images/Bangumi/202309/5ce9fed1.jpg"
|
||||
)?)
|
||||
);
|
||||
assert_eq!(ep_meta.fansub, "LoliHouse");
|
||||
assert_eq!(ep_meta.mikan_fansub_id, "370");
|
||||
assert_eq!(ep_meta.mikan_bangumi_id, "3141");
|
||||
|
||||
assert_matches!(ep_meta.origin_poster_src, Some(..));
|
||||
|
||||
let bgm_poster = parse_mikan_bangumi_poster_from_origin_poster_src(
|
||||
None,
|
||||
ep_meta.origin_poster_src.unwrap(),
|
||||
)
|
||||
.await?;
|
||||
let u8_data = bgm_poster.poster_data.expect("should have poster data");
|
||||
let image = Image::read(u8_data.to_vec(), Default::default());
|
||||
assert!(
|
||||
image.is_ok_and(|img| img
|
||||
.metadata()
|
||||
.get_image_format()
|
||||
.is_some_and(|fmt| matches!(fmt, ImageFormat::JPEG))),
|
||||
"should start with valid jpeg data magic number"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
test_fn().await.expect("test parse mikan failed");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_mikan_bangumi() {
|
||||
let test_fn = async || -> eyre::Result<()> {
|
||||
let url_str = "https://mikanani.me/Home/Bangumi/3416#370";
|
||||
let url = Url::parse(url_str)?;
|
||||
|
||||
let bgm_meta = parse_mikan_bangumi_meta_from_mikan_homepage(None, url.clone()).await?;
|
||||
|
||||
assert_eq!(bgm_meta.homepage, url);
|
||||
assert_eq!(bgm_meta.bangumi_title, "叹气的亡灵想隐退");
|
||||
assert_eq!(
|
||||
bgm_meta.origin_poster_src,
|
||||
Some(Url::parse(
|
||||
"https://mikanani.me/images/Bangumi/202410/480ef127.jpg"
|
||||
)?)
|
||||
);
|
||||
assert_eq!(bgm_meta.fansub, Some(String::from("LoliHouse")));
|
||||
assert_eq!(bgm_meta.mikan_fansub_id, Some(String::from("370")));
|
||||
assert_eq!(bgm_meta.mikan_bangumi_id, "3416");
|
||||
|
||||
assert_eq!(
|
||||
bgm_meta.homepage.as_str(),
|
||||
"https://mikanani.me/Home/Bangumi/3416#370"
|
||||
);
|
||||
|
||||
assert_eq!(bgm_meta.mikan_fansub_candidates.len(), 6);
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
test_fn().await.expect("test parse mikan failed");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user