refactor: continue

This commit is contained in:
2025-05-13 01:23:59 +08:00
parent 760cb2344e
commit bf270e4e87
34 changed files with 1210 additions and 1427 deletions

View File

@@ -2,7 +2,6 @@ mod client;
mod config;
mod constants;
mod credential;
mod rss;
mod subscription;
mod web;
@@ -14,23 +13,22 @@ pub use constants::{
MIKAN_SEASON_FLOW_PAGE_PATH, MIKAN_UNKNOWN_FANSUB_ID, MIKAN_UNKNOWN_FANSUB_NAME,
};
pub use credential::MikanCredentialForm;
pub use rss::{
MikanBangumiRssChannel, MikanBangumiRssUrlMeta, MikanRssChannel, MikanRssItem,
MikanSubscriberRssChannel, MikanSubscriberSubscriptionRssUrlMeta,
build_mikan_bangumi_subscription_rss_url, build_mikan_subscriber_subscription_rss_url,
};
pub use subscription::{
MikanBangumiSubscription, MikanSeasonSubscription, MikanSubscriberSubscription,
};
pub use web::{
MikanBangumiHash, MikanBangumiIndexHash, MikanBangumiIndexMeta, MikanBangumiMeta,
MikanBangumiPosterMeta, MikanEpisodeHash, MikanEpisodeMeta, MikanSeasonFlowUrlMeta,
MikanSeasonStr, build_mikan_bangumi_expand_subscribed_url, build_mikan_bangumi_homepage_url,
build_mikan_episode_homepage_url, build_mikan_season_flow_url,
MikanBangumiPosterMeta, MikanEpisodeHash, MikanEpisodeMeta, MikanRssItem,
MikanSeasonFlowUrlMeta, MikanSeasonStr, MikanSubscriberSubscriptionRssUrlMeta,
build_mikan_bangumi_expand_subscribed_url, build_mikan_bangumi_homepage_url,
build_mikan_bangumi_subscription_rss_url, build_mikan_episode_homepage_url,
build_mikan_season_flow_url, build_mikan_subscriber_subscription_rss_url,
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
extract_mikan_bangumi_meta_from_expand_subscribed_fragment,
extract_mikan_episode_meta_from_episode_homepage_html,
scrape_mikan_bangumi_meta_from_bangumi_homepage_url,
scrape_mikan_bangumi_meta_list_from_season_flow_url,
scrape_mikan_bangumi_meta_stream_from_season_flow_url,
scrape_mikan_episode_meta_from_episode_homepage_url, scrape_mikan_poster_data_from_image_url,
scrape_mikan_poster_meta_from_image_url,
};

View File

@@ -1,204 +0,0 @@
use std::borrow::Cow;
use chrono::DateTime;
use downloader::bittorrent::defs::BITTORRENT_MIME_TYPE;
use serde::{Deserialize, Serialize};
use url::Url;
use crate::{errors::app_error::RecorderError, extract::mikan::MikanEpisodeHash};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanRssItem {
pub title: String,
pub homepage: Url,
pub url: Url,
pub content_length: Option<u64>,
pub mime: String,
pub pub_date: Option<i64>,
pub mikan_episode_id: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanBangumiRssChannel {
pub name: String,
pub url: Url,
pub mikan_bangumi_id: String,
pub mikan_fansub_id: String,
pub items: Vec<MikanRssItem>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanSubscriberRssChannel {
pub mikan_subscription_token: String,
pub url: Url,
pub items: Vec<MikanRssItem>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum MikanRssChannel {
Bangumi(MikanBangumiRssChannel),
Subscriber(MikanSubscriberRssChannel),
}
impl MikanRssChannel {
pub fn items(&self) -> &[MikanRssItem] {
match &self {
Self::Bangumi(MikanBangumiRssChannel { items, .. })
| Self::Subscriber(MikanSubscriberRssChannel { items, .. }) => items,
}
}
pub fn into_items(self) -> Vec<MikanRssItem> {
match self {
Self::Bangumi(MikanBangumiRssChannel { items, .. })
| Self::Subscriber(MikanSubscriberRssChannel { items, .. }) => items,
}
}
pub fn name(&self) -> Option<&str> {
match &self {
Self::Bangumi(MikanBangumiRssChannel { name, .. }) => Some(name.as_str()),
Self::Subscriber(MikanSubscriberRssChannel { .. }) => None,
}
}
pub fn url(&self) -> &Url {
match &self {
Self::Bangumi(MikanBangumiRssChannel { url, .. })
| Self::Subscriber(MikanSubscriberRssChannel { url, .. }) => url,
}
}
}
impl TryFrom<rss::Item> for MikanRssItem {
type Error = RecorderError;
fn try_from(item: rss::Item) -> Result<Self, Self::Error> {
let enclosure = item.enclosure.ok_or_else(|| {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("enclosure"))
})?;
let mime_type = enclosure.mime_type;
if mime_type != BITTORRENT_MIME_TYPE {
return Err(RecorderError::MimeError {
expected: String::from(BITTORRENT_MIME_TYPE),
found: mime_type.to_string(),
desc: String::from("MikanRssItem"),
});
}
let title = item.title.ok_or_else(|| {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("title:title"))
})?;
let enclosure_url = Url::parse(&enclosure.url).map_err(|err| {
RecorderError::from_mikan_rss_invalid_field_and_source(
"enclosure_url:enclosure.link".into(),
err,
)
})?;
let homepage = item
.link
.and_then(|link| Url::parse(&link).ok())
.ok_or_else(|| {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("homepage:link"))
})?;
let MikanEpisodeHash {
mikan_episode_token: mikan_episode_id,
..
} = MikanEpisodeHash::from_homepage_url(&homepage).ok_or_else(|| {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("mikan_episode_id"))
})?;
Ok(MikanRssItem {
title,
homepage,
url: enclosure_url,
content_length: enclosure.length.parse().ok(),
mime: mime_type,
pub_date: item
.pub_date
.and_then(|s| DateTime::parse_from_rfc2822(&s).ok())
.map(|s| s.timestamp_millis()),
mikan_episode_id,
})
}
}
#[derive(Debug, Clone)]
pub struct MikanBangumiRssUrlMeta {
pub mikan_bangumi_id: String,
pub mikan_fansub_id: String,
}
impl MikanBangumiRssUrlMeta {
pub fn from_url(url: &Url) -> Option<Self> {
if url.path() == "/RSS/Bangumi" {
if let (Some(mikan_fansub_id), Some(mikan_bangumi_id)) = (
url.query_pairs()
.find(|(k, _)| k == "subgroupid")
.map(|(_, v)| v.to_string()),
url.query_pairs()
.find(|(k, _)| k == "bangumiId")
.map(|(_, v)| v.to_string()),
) {
Some(MikanBangumiRssUrlMeta {
mikan_bangumi_id,
mikan_fansub_id,
})
} else {
None
}
} else {
None
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanSubscriberSubscriptionRssUrlMeta {
pub mikan_subscription_token: String,
}
impl MikanSubscriberSubscriptionRssUrlMeta {
pub fn from_url(url: &Url) -> Option<Self> {
if url.path() == "/RSS/MyBangumi" {
url.query_pairs().find(|(k, _)| k == "token").map(|(_, v)| {
MikanSubscriberSubscriptionRssUrlMeta {
mikan_subscription_token: v.to_string(),
}
})
} else {
None
}
}
}
pub fn build_mikan_bangumi_subscription_rss_url(
mikan_base_url: Url,
mikan_bangumi_id: &str,
mikan_fansub_id: Option<&str>,
) -> Url {
let mut url = mikan_base_url;
url.set_path("/RSS/Bangumi");
url.query_pairs_mut()
.append_pair("bangumiId", mikan_bangumi_id);
if let Some(mikan_fansub_id) = mikan_fansub_id {
url.query_pairs_mut()
.append_pair("subgroupid", mikan_fansub_id);
};
url
}
pub fn build_mikan_subscriber_subscription_rss_url(
mikan_base_url: Url,
mikan_subscription_token: &str,
) -> Url {
let mut url = mikan_base_url;
url.set_path("/RSS/MyBangumi");
url.query_pairs_mut()
.append_pair("token", mikan_subscription_token);
url
}

View File

@@ -1,146 +1,206 @@
use std::{
collections::{HashMap, HashSet},
fmt::Debug,
sync::Arc,
};
use async_graphql::{InputObject, SimpleObject};
use async_stream::try_stream;
use fetch::{fetch_bytes, fetch_html};
use futures::Stream;
use fetch::fetch_bytes;
use futures::try_join;
use itertools::Itertools;
use maplit::hashmap;
use scraper::Html;
use sea_orm::{
ActiveModelTrait, ActiveValue, ColumnTrait, EntityTrait, IntoSimpleExpr, QueryFilter,
QuerySelect, prelude::Expr, sea_query::OnConflict,
ActiveValue::Set, ColumnTrait, Condition, EntityTrait, JoinType, QueryFilter, QuerySelect,
RelationTrait,
};
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
use snafu::OptionExt;
use url::Url;
use super::scrape_mikan_bangumi_meta_list_from_season_flow_url;
use crate::{
app::AppContextTrait,
errors::{RecorderError, RecorderResult},
extract::mikan::{
MikanBangumiHash, MikanBangumiMeta, MikanBangumiRssUrlMeta, MikanEpisodeHash,
MikanEpisodeMeta, MikanRssItem, MikanSeasonFlowUrlMeta, MikanSeasonStr,
MikanSubscriberSubscriptionRssUrlMeta, build_mikan_bangumi_expand_subscribed_url,
MikanBangumiHash, MikanBangumiMeta, MikanEpisodeHash, MikanEpisodeMeta, MikanRssItem,
MikanSeasonFlowUrlMeta, MikanSeasonStr, MikanSubscriberSubscriptionRssUrlMeta,
build_mikan_bangumi_subscription_rss_url, build_mikan_season_flow_url,
build_mikan_subscriber_subscription_rss_url,
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
extract_mikan_bangumi_meta_from_expand_subscribed_fragment,
scrape_mikan_episode_meta_from_episode_homepage_url,
},
migrations::defs::Bangumi,
models::{bangumi, episodes, subscription_bangumi, subscription_episode, subscriptions},
models::{
bangumi, episodes, subscription_bangumi, subscription_episode,
subscriptions::{self, SubscriptionTrait},
},
};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, InputObject, SimpleObject)]
#[tracing::instrument(err, skip(ctx, rss_item_list))]
async fn sync_mikan_feeds_from_rss_item_list(
ctx: &dyn AppContextTrait,
rss_item_list: Vec<MikanRssItem>,
subscriber_id: i32,
subscription_id: i32,
) -> RecorderResult<()> {
let (new_episode_meta_list, existed_episode_hash2id_map) = {
let existed_episode_hash2id_map = episodes::Model::get_existed_mikan_episode_list(
ctx,
rss_item_list.iter().map(|s| MikanEpisodeHash {
mikan_episode_id: s.mikan_episode_id.clone(),
}),
subscriber_id,
subscription_id,
)
.await?
.map(|(episode_id, hash, bangumi_id)| (hash.mikan_episode_id, (episode_id, bangumi_id)))
.collect::<HashMap<_, _>>();
let mut new_episode_meta_list: Vec<MikanEpisodeMeta> = vec![];
let mikan_client = ctx.mikan();
for to_insert_rss_item in rss_item_list.into_iter().filter(|rss_item| {
!existed_episode_hash2id_map.contains_key(&rss_item.mikan_episode_id)
}) {
let episode_meta = scrape_mikan_episode_meta_from_episode_homepage_url(
mikan_client,
to_insert_rss_item.homepage,
)
.await?;
new_episode_meta_list.push(episode_meta);
}
(new_episode_meta_list, existed_episode_hash2id_map)
};
// subscribe existed but not subscribed episode and bangumi
let (existed_episode_id_list, existed_episode_bangumi_id_set): (Vec<i32>, HashSet<i32>) =
existed_episode_hash2id_map.into_values().unzip();
try_join!(
subscription_episode::Model::add_episodes_for_subscription(
ctx,
existed_episode_id_list.into_iter(),
subscriber_id,
subscription_id,
),
subscription_bangumi::Model::add_bangumis_for_subscription(
ctx,
existed_episode_bangumi_id_set.into_iter(),
subscriber_id,
subscription_id,
),
)?;
let new_episode_meta_list_group_by_bangumi_hash: HashMap<
MikanBangumiHash,
Vec<MikanEpisodeMeta>,
> = {
let mut m = hashmap! {};
for episode_meta in new_episode_meta_list {
let bangumi_hash = episode_meta.bangumi_hash();
m.entry(bangumi_hash)
.or_insert_with(Vec::new)
.push(episode_meta);
}
m
};
for (group_bangumi_hash, group_episode_meta_list) in new_episode_meta_list_group_by_bangumi_hash
{
let first_episode_meta = group_episode_meta_list.first().unwrap();
let group_bangumi_model = bangumi::Model::get_or_insert_from_mikan(
ctx,
group_bangumi_hash,
subscriber_id,
subscription_id,
async || {
let bangumi_meta: MikanBangumiMeta = first_episode_meta.clone().into();
let bangumi_am = bangumi::ActiveModel::from_mikan_bangumi_meta(
ctx,
bangumi_meta,
subscriber_id,
subscription_id,
)
.await?;
Ok(bangumi_am)
},
)
.await?;
let group_episode_creation_list = group_episode_meta_list
.into_iter()
.map(|episode_meta| (&group_bangumi_model, episode_meta));
episodes::Model::add_mikan_episodes_for_subscription(
ctx,
group_episode_creation_list.into_iter(),
subscriber_id,
subscription_id,
)
.await?;
}
Ok(())
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanSubscriberSubscription {
pub id: i32,
pub mikan_subscription_token: String,
pub subscriber_id: i32,
}
impl MikanSubscriberSubscription {
#[tracing::instrument(skip(ctx))]
pub async fn pull_subscription(
&self,
ctx: Arc<dyn AppContextTrait>,
) -> RecorderResult<Vec<MikanBangumiMeta>> {
let mikan_client = ctx.mikan();
let db = ctx.db();
let to_insert_episode_meta_list: Vec<MikanEpisodeMeta> = {
let rss_item_list = self.pull_rss_items(ctx.clone()).await?;
let existed_episode_token_list = episodes::Model::get_existed_mikan_episode_list(
ctx.as_ref(),
rss_item_list.iter().map(|s| MikanEpisodeHash {
mikan_episode_token: s.mikan_episode_id.clone(),
}),
self.subscriber_id,
self.id,
)
.await?
.into_iter()
.map(|(id, hash)| (hash.mikan_episode_token, id))
.collect::<HashMap<_, _>>();
let mut to_insert_episode_meta_list = vec![];
for to_insert_rss_item in rss_item_list.into_iter().filter(|rss_item| {
!existed_episode_token_list.contains_key(&rss_item.mikan_episode_id)
}) {
let episode_meta = scrape_mikan_episode_meta_from_episode_homepage_url(
mikan_client,
to_insert_rss_item.homepage,
)
.await?;
to_insert_episode_meta_list.push(episode_meta);
}
subscription_episode::Model::add_episodes_for_subscription(
ctx.as_ref(),
existed_episode_token_list.into_values(),
self.subscriber_id,
self.id,
)
.await?;
to_insert_episode_meta_list
};
let new_episode_meta_bangumi_map = {
let bangumi_hash_map = to_insert_episode_meta_list
.iter()
.map(|episode_meta| (episode_meta.bangumi_hash(), episode_meta))
.collect::<HashMap<_, _>>();
let existed_bangumi_set = bangumi::Model::get_existed_mikan_bangumi_list(
ctx.as_ref(),
bangumi_hash_map.keys().cloned(),
self.subscriber_id,
self.id,
)
.await?
.map(|(_, bangumi_hash)| bangumi_hash)
.collect::<HashSet<_>>();
let mut to_insert_bangumi_list = vec![];
for (bangumi_hash, episode_meta) in bangumi_hash_map.iter() {
if !existed_bangumi_set.contains(&bangumi_hash) {
let bangumi_meta: MikanBangumiMeta = (*episode_meta).clone().into();
let bangumi_active_model = bangumi::ActiveModel::from_mikan_bangumi_meta(
ctx.as_ref(),
bangumi_meta,
self.subscriber_id,
self.id,
)
.await?;
to_insert_bangumi_list.push(bangumi_active_model);
}
}
bangumi::Entity::insert_many(to_insert_bangumi_list)
.on_conflict_do_nothing()
.exec(db)
.await?;
let mut new_episode_meta_bangumi_map: HashMap<MikanBangumiHash, bangumi::Model> =
hashmap! {};
};
todo!()
#[async_trait::async_trait]
impl SubscriptionTrait for MikanSubscriberSubscription {
fn get_subscriber_id(&self) -> i32 {
self.subscriber_id
}
#[tracing::instrument(skip(ctx))]
pub async fn pull_rss_items(
fn get_subscription_id(&self) -> i32 {
self.id
}
async fn sync_feeds(&self, ctx: Arc<dyn AppContextTrait>) -> RecorderResult<()> {
let rss_item_list = self.get_rss_item_list(ctx.as_ref()).await?;
sync_mikan_feeds_from_rss_item_list(
ctx.as_ref(),
rss_item_list,
self.get_subscriber_id(),
self.get_subscription_id(),
)
.await?;
Ok(())
}
async fn sync_sources(&self, _ctx: Arc<dyn AppContextTrait>) -> RecorderResult<()> {
Ok(())
}
fn try_from_model(model: &subscriptions::Model) -> RecorderResult<Self> {
let source_url = Url::parse(&model.source_url)?;
let meta = MikanSubscriberSubscriptionRssUrlMeta::from_rss_url(&source_url)
.with_whatever_context::<_, String, RecorderError>(|| {
format!(
"MikanSubscriberSubscription should extract mikan_subscription_token from \
source_url = {}, subscription_id = {}",
source_url, model.id
)
})?;
Ok(Self {
id: model.id,
mikan_subscription_token: meta.mikan_subscription_token,
subscriber_id: model.subscriber_id,
})
}
}
impl MikanSubscriberSubscription {
#[tracing::instrument(err, skip(ctx))]
async fn get_rss_item_list(
&self,
ctx: Arc<dyn AppContextTrait>,
ctx: &dyn AppContextTrait,
) -> RecorderResult<Vec<MikanRssItem>> {
let mikan_base_url = ctx.mikan().base_url().clone();
let rss_url = build_mikan_subscriber_subscription_rss_url(
@@ -160,25 +220,6 @@ impl MikanSubscriberSubscription {
}
Ok(result)
}
pub fn try_from_model(model: &subscriptions::Model) -> RecorderResult<Self> {
let source_url = Url::parse(&model.source_url)?;
let meta = MikanSubscriberSubscriptionRssUrlMeta::from_url(&source_url)
.with_whatever_context::<_, String, RecorderError>(|| {
format!(
"MikanSubscriberSubscription should extract mikan_subscription_token from \
source_url = {}, subscription_id = {}",
source_url, model.id
)
})?;
Ok(Self {
id: model.id,
mikan_subscription_token: meta.mikan_subscription_token,
subscriber_id: model.subscriber_id,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, InputObject, SimpleObject)]
@@ -190,77 +231,60 @@ pub struct MikanSeasonSubscription {
pub subscriber_id: i32,
}
impl MikanSeasonSubscription {
#[tracing::instrument]
pub fn pull_bangumi_meta_stream(
&self,
ctx: Arc<dyn AppContextTrait>,
) -> impl Stream<Item = RecorderResult<MikanBangumiMeta>> {
let credential_id = self.credential_id;
let year = self.year;
let season_str = self.season_str.clone();
try_stream! {
let mikan_base_url = ctx.mikan().base_url().clone();
let mikan_client = ctx.mikan()
.fork_with_credential(ctx.clone(), credential_id)
.await?;
let mikan_season_flow_url = build_mikan_season_flow_url(mikan_base_url.clone(), year, season_str);
let content = fetch_html(&mikan_client, mikan_season_flow_url.clone()).await?;
let mut bangumi_indices_meta = {
let html = Html::parse_document(&content);
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment(&html, &mikan_base_url)
};
if bangumi_indices_meta.is_empty() && !mikan_client.has_login().await? {
mikan_client.login().await?;
let content = fetch_html(&mikan_client, mikan_season_flow_url).await?;
let html = Html::parse_document(&content);
bangumi_indices_meta =
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment(&html, &mikan_base_url);
}
mikan_client
.sync_credential_cookies(ctx.clone(), credential_id)
.await?;
for bangumi_index in bangumi_indices_meta {
let bangumi_title = bangumi_index.bangumi_title.clone();
let bangumi_expand_subscribed_fragment_url = build_mikan_bangumi_expand_subscribed_url(
mikan_base_url.clone(),
&bangumi_index.mikan_bangumi_id,
);
let bangumi_expand_subscribed_fragment =
fetch_html(&mikan_client, bangumi_expand_subscribed_fragment_url).await?;
let bangumi_meta = {
let html = Html::parse_document(&bangumi_expand_subscribed_fragment);
extract_mikan_bangumi_meta_from_expand_subscribed_fragment(
&html,
bangumi_index,
mikan_base_url.clone(),
)
.with_whatever_context::<_, String, RecorderError>(|| {
format!("failed to extract mikan bangumi fansub of title = {bangumi_title}")
})
}?;
yield bangumi_meta;
}
mikan_client
.sync_credential_cookies(ctx, credential_id)
.await?;
}
#[async_trait::async_trait]
impl SubscriptionTrait for MikanSeasonSubscription {
fn get_subscriber_id(&self) -> i32 {
self.subscriber_id
}
pub fn try_from_model(model: &subscriptions::Model) -> RecorderResult<Self> {
fn get_subscription_id(&self) -> i32 {
self.id
}
async fn sync_feeds(&self, ctx: Arc<dyn AppContextTrait>) -> RecorderResult<()> {
let rss_item_list = self.get_rss_item_list(ctx.as_ref()).await?;
sync_mikan_feeds_from_rss_item_list(
ctx.as_ref(),
rss_item_list,
self.get_subscriber_id(),
self.get_subscription_id(),
)
.await?;
Ok(())
}
async fn sync_sources(&self, ctx: Arc<dyn AppContextTrait>) -> RecorderResult<()> {
let bangumi_meta_list = self.get_bangumi_meta_list(ctx.clone()).await?;
let mikan_base_url = ctx.mikan().base_url();
let rss_link_list = bangumi_meta_list
.into_iter()
.map(|bangumi_meta| {
build_mikan_bangumi_subscription_rss_url(
mikan_base_url.clone(),
&bangumi_meta.mikan_bangumi_id,
Some(&bangumi_meta.mikan_fansub_id),
)
.to_string()
})
.collect_vec();
subscriptions::Entity::update_many()
.set(subscriptions::ActiveModel {
source_urls: Set(Some(rss_link_list)),
..Default::default()
})
.filter(subscription_bangumi::Column::SubscriptionId.eq(self.id))
.exec(ctx.db())
.await?;
Ok(())
}
fn try_from_model(model: &subscriptions::Model) -> RecorderResult<Self> {
let source_url = Url::parse(&model.source_url)?;
let source_url_meta = MikanSeasonFlowUrlMeta::from_url(&source_url)
@@ -291,6 +315,68 @@ impl MikanSeasonSubscription {
}
}
impl MikanSeasonSubscription {
#[tracing::instrument(err, skip(ctx))]
async fn get_bangumi_meta_list(
&self,
ctx: Arc<dyn AppContextTrait>,
) -> RecorderResult<Vec<MikanBangumiMeta>> {
let credential_id = self.credential_id;
let year = self.year;
let season_str = self.season_str;
let mikan_base_url = ctx.mikan().base_url().clone();
let mikan_season_flow_url = build_mikan_season_flow_url(mikan_base_url, year, season_str);
scrape_mikan_bangumi_meta_list_from_season_flow_url(
ctx,
mikan_season_flow_url,
credential_id,
)
.await
}
#[tracing::instrument(err, skip(ctx))]
async fn get_rss_item_list(
&self,
ctx: &dyn AppContextTrait,
) -> RecorderResult<Vec<MikanRssItem>> {
let db = ctx.db();
let subscribed_bangumi_list = bangumi::Entity::find()
.filter(Condition::all().add(subscription_bangumi::Column::SubscriptionId.eq(self.id)))
.join_rev(
JoinType::InnerJoin,
subscription_bangumi::Relation::Bangumi.def(),
)
.all(db)
.await?;
let mut rss_item_list = vec![];
for subscribed_bangumi in subscribed_bangumi_list {
let rss_url = subscribed_bangumi
.rss_link
.with_whatever_context::<_, String, RecorderError>(|| {
format!(
"MikanSeasonSubscription rss_link is required, subscription_id = {}",
self.id
)
})?;
let bytes = fetch_bytes(ctx.mikan(), rss_url).await?;
let channel = rss::Channel::read_from(&bytes[..])?;
for (idx, item) in channel.items.into_iter().enumerate() {
let item = MikanRssItem::try_from(item).inspect_err(
|error| tracing::warn!(error = %error, "failed to extract rss item idx = {}", idx),
)?;
rss_item_list.push(item);
}
}
Ok(rss_item_list)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, InputObject, SimpleObject)]
pub struct MikanBangumiSubscription {
pub id: i32,
@@ -299,35 +385,38 @@ pub struct MikanBangumiSubscription {
pub subscriber_id: i32,
}
impl MikanBangumiSubscription {
#[tracing::instrument]
pub fn pull_rss_items(
&self,
ctx: Arc<dyn AppContextTrait>,
) -> impl Stream<Item = RecorderResult<MikanRssItem>> {
let mikan_bangumi_id = self.mikan_bangumi_id.clone();
let mikan_fansub_id = self.mikan_fansub_id.clone();
try_stream! {
let mikan_base_url = ctx.mikan().base_url().clone();
let rss_url = build_mikan_bangumi_subscription_rss_url(mikan_base_url.clone(), &mikan_bangumi_id, Some(&mikan_fansub_id));
let bytes = fetch_bytes(ctx.mikan(), rss_url).await?;
let channel = rss::Channel::read_from(&bytes[..])?;
for (idx, item) in channel.items.into_iter().enumerate() {
let item = MikanRssItem::try_from(item).inspect_err(
|error| tracing::warn!(error = %error, "failed to extract rss item idx = {}", idx),
)?;
yield item
}
}
#[async_trait::async_trait]
impl SubscriptionTrait for MikanBangumiSubscription {
fn get_subscriber_id(&self) -> i32 {
self.subscriber_id
}
pub fn try_from_model(model: &subscriptions::Model) -> RecorderResult<Self> {
fn get_subscription_id(&self) -> i32 {
self.id
}
async fn sync_feeds(&self, ctx: Arc<dyn AppContextTrait>) -> RecorderResult<()> {
let rss_item_list = self.get_rss_item_list(ctx.as_ref()).await?;
sync_mikan_feeds_from_rss_item_list(
ctx.as_ref(),
rss_item_list,
<Self as SubscriptionTrait>::get_subscriber_id(self),
<Self as SubscriptionTrait>::get_subscription_id(self),
)
.await?;
Ok(())
}
async fn sync_sources(&self, _ctx: Arc<dyn AppContextTrait>) -> RecorderResult<()> {
Ok(())
}
fn try_from_model(model: &subscriptions::Model) -> RecorderResult<Self> {
let source_url = Url::parse(&model.source_url)?;
let meta = MikanBangumiRssUrlMeta::from_url(&source_url)
let meta = MikanBangumiHash::from_rss_url(&source_url)
.with_whatever_context::<_, String, RecorderError>(|| {
format!(
"MikanBangumiSubscription need to extract bangumi id and fansub id from \
@@ -345,96 +434,133 @@ impl MikanBangumiSubscription {
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
impl MikanBangumiSubscription {
#[tracing::instrument(err, skip(ctx))]
async fn get_rss_item_list(
&self,
ctx: &dyn AppContextTrait,
) -> RecorderResult<Vec<MikanRssItem>> {
let mikan_base_url = ctx.mikan().base_url().clone();
let rss_url = build_mikan_bangumi_subscription_rss_url(
mikan_base_url.clone(),
&self.mikan_bangumi_id,
Some(&self.mikan_fansub_id),
);
let bytes = fetch_bytes(ctx.mikan(), rss_url).await?;
use downloader::bittorrent::BITTORRENT_MIME_TYPE;
use rstest::rstest;
use url::Url;
let channel = rss::Channel::read_from(&bytes[..])?;
use crate::{
errors::RecorderResult,
extract::mikan::{
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanRssChannel,
extract_mikan_rss_channel_from_rss_link,
},
test_utils::mikan::build_testing_mikan_client,
};
#[rstest]
#[tokio::test]
async fn test_parse_mikan_rss_channel_from_rss_link() -> RecorderResult<()> {
let mut mikan_server = mockito::Server::new_async().await;
let mikan_base_url = Url::parse(&mikan_server.url())?;
let mikan_client = build_testing_mikan_client(mikan_base_url.clone()).await?;
{
let bangumi_rss_url =
mikan_base_url.join("/RSS/Bangumi?bangumiId=3141&subgroupid=370")?;
let bangumi_rss_mock = mikan_server
.mock("GET", bangumi_rss_url.path())
.with_body_from_file("tests/resources/mikan/Bangumi-3141-370.rss")
.match_query(mockito::Matcher::Any)
.create_async()
.await;
let channel = scrape_mikan_rss_channel_from_rss_link(&mikan_client, bangumi_rss_url)
.await
.expect("should get mikan channel from rss url");
assert_matches!(
&channel,
MikanRssChannel::Bangumi(MikanBangumiRssChannel { .. })
);
assert_matches!(&channel.name(), Some("葬送的芙莉莲"));
let items = channel.items();
let first_sub_item = items
.first()
.expect("mikan subscriptions should have at least one subs");
assert_eq!(first_sub_item.mime, BITTORRENT_MIME_TYPE);
assert!(
&first_sub_item
.homepage
.as_str()
.starts_with("https://mikanani.me/Home/Episode")
);
let name = first_sub_item.title.as_str();
assert!(name.contains("葬送的芙莉莲"));
bangumi_rss_mock.expect(1);
let mut result = vec![];
for (idx, item) in channel.items.into_iter().enumerate() {
let item = MikanRssItem::try_from(item).inspect_err(
|error| tracing::warn!(error = %error, "failed to extract rss item idx = {}", idx),
)?;
result.push(item);
}
{
let bangumi_rss_url = mikan_base_url.join("/RSS/Bangumi?bangumiId=3416")?;
let bangumi_rss_mock = mikan_server
.mock("GET", bangumi_rss_url.path())
.match_query(mockito::Matcher::Any)
.with_body_from_file("tests/resources/mikan/Bangumi-3416.rss")
.create_async()
.await;
let channel = scrape_mikan_rss_channel_from_rss_link(&mikan_client, bangumi_rss_url)
.await
.expect("should get mikan channel from rss url");
assert_matches!(
&channel,
MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel { .. })
);
assert_matches!(&channel.name(), Some("叹气的亡灵想隐退"));
bangumi_rss_mock.expect(1);
}
Ok(())
Ok(result)
}
}
// #[cfg(test)]
// mod tests {
// use std::assert_matches::assert_matches;
// use downloader::bittorrent::BITTORRENT_MIME_TYPE;
// use rstest::rstest;
// use url::Url;
// use crate::{
// errors::RecorderResult,
// extract::mikan::{
// MikanBangumiIndexRssChannel, MikanBangumiRssChannel,
// MikanRssChannel, build_mikan_bangumi_subscription_rss_url,
// extract_mikan_rss_channel_from_rss_link, },
// test_utils::mikan::build_testing_mikan_client,
// };
// #[rstest]
// #[tokio::test]
// async fn test_parse_mikan_rss_channel_from_rss_link() ->
// RecorderResult<()> { let mut mikan_server =
// mockito::Server::new_async().await;
// let mikan_base_url = Url::parse(&mikan_server.url())?;
// let mikan_client =
// build_testing_mikan_client(mikan_base_url.clone()).await?;
// {
// let bangumi_rss_url = build_mikan_bangumi_subscription_rss_url(
// mikan_base_url.clone(),
// "3141",
// Some("370"),
// );
// let bangumi_rss_mock = mikan_server
// .mock("GET", bangumi_rss_url.path())
//
// .with_body_from_file("tests/resources/mikan/Bangumi-3141-370.rss")
// .match_query(mockito::Matcher::Any)
// .create_async()
// .await;
// let channel =
// scrape_mikan_rss_channel_from_rss_link(&mikan_client, bangumi_rss_url)
// .await
// .expect("should get mikan channel from rss url");
// assert_matches!(
// &channel,
// MikanRssChannel::Bangumi(MikanBangumiRssChannel { .. })
// );
// assert_matches!(&channel.name(), Some("葬送的芙莉莲"));
// let items = channel.items();
// let first_sub_item = items
// .first()
// .expect("mikan subscriptions should have at least one subs");
// assert_eq!(first_sub_item.mime, BITTORRENT_MIME_TYPE);
// assert!(
// &first_sub_item
// .homepage
// .as_str()
// .starts_with("https://mikanani.me/Home/Episode")
// );
// let name = first_sub_item.title.as_str();
// assert!(name.contains("葬送的芙莉莲"));
// bangumi_rss_mock.expect(1);
// }
// {
// let bangumi_rss_url =
// mikan_base_url.join("/RSS/Bangumi?bangumiId=3416")?;
// let bangumi_rss_mock = mikan_server
// .mock("GET", bangumi_rss_url.path())
// .match_query(mockito::Matcher::Any)
//
// .with_body_from_file("tests/resources/mikan/Bangumi-3416.rss")
// .create_async()
// .await;
// let channel =
// scrape_mikan_rss_channel_from_rss_link(&mikan_client, bangumi_rss_url)
// .await
// .expect("should get mikan channel from rss url");
// assert_matches!(
// &channel,
// MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel {
// .. }) );
// assert_matches!(&channel.name(), Some("叹气的亡灵想隐退"));
// bangumi_rss_mock.expect(1);
// }
// Ok(())
// }
// }

View File

@@ -2,29 +2,132 @@ use std::{borrow::Cow, fmt, str::FromStr, sync::Arc};
use async_stream::try_stream;
use bytes::Bytes;
use chrono::DateTime;
use downloader::bittorrent::defs::BITTORRENT_MIME_TYPE;
use fetch::{html::fetch_html, image::fetch_image};
use futures::{Stream, TryStreamExt, pin_mut};
use html_escape::decode_html_entities;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use snafu::FromString;
use snafu::{FromString, OptionExt};
use tracing::instrument;
use url::Url;
use super::{
MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH, MIKAN_POSTER_BUCKET_KEY,
MIKAN_SEASON_FLOW_PAGE_PATH, MikanBangumiRssUrlMeta, MikanClient,
};
use crate::{
app::AppContextTrait,
errors::app_error::{RecorderError, RecorderResult},
extract::{
html::{extract_background_image_src_from_style_attr, extract_inner_text_from_element_ref},
media::extract_image_src_from_str,
mikan::{
MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH, MIKAN_POSTER_BUCKET_KEY,
MIKAN_SEASON_FLOW_PAGE_PATH, MikanClient,
},
},
storage::{StorageContentCategory, StorageServiceTrait},
};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanRssItem {
pub title: String,
pub homepage: Url,
pub url: Url,
pub content_length: Option<u64>,
pub mime: String,
pub pub_date: Option<i64>,
pub mikan_episode_id: String,
}
impl TryFrom<rss::Item> for MikanRssItem {
type Error = RecorderError;
fn try_from(item: rss::Item) -> Result<Self, Self::Error> {
let enclosure = item.enclosure.ok_or_else(|| {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("enclosure"))
})?;
let mime_type = enclosure.mime_type;
if mime_type != BITTORRENT_MIME_TYPE {
return Err(RecorderError::MimeError {
expected: String::from(BITTORRENT_MIME_TYPE),
found: mime_type.to_string(),
desc: String::from("MikanRssItem"),
});
}
let title = item.title.ok_or_else(|| {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("title:title"))
})?;
let enclosure_url = Url::parse(&enclosure.url).map_err(|err| {
RecorderError::from_mikan_rss_invalid_field_and_source(
"enclosure_url:enclosure.link".into(),
err,
)
})?;
let homepage = item
.link
.and_then(|link| Url::parse(&link).ok())
.ok_or_else(|| {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("homepage:link"))
})?;
let MikanEpisodeHash {
mikan_episode_id, ..
} = MikanEpisodeHash::from_homepage_url(&homepage).ok_or_else(|| {
RecorderError::from_mikan_rss_invalid_field(Cow::Borrowed("mikan_episode_id"))
})?;
Ok(MikanRssItem {
title,
homepage,
url: enclosure_url,
content_length: enclosure.length.parse().ok(),
mime: mime_type,
pub_date: item
.pub_date
.and_then(|s| DateTime::parse_from_rfc2822(&s).ok())
.map(|s| s.timestamp_millis()),
mikan_episode_id,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanSubscriberSubscriptionRssUrlMeta {
pub mikan_subscription_token: String,
}
impl MikanSubscriberSubscriptionRssUrlMeta {
pub fn from_rss_url(url: &Url) -> Option<Self> {
if url.path() == "/RSS/MyBangumi" {
url.query_pairs().find(|(k, _)| k == "token").map(|(_, v)| {
MikanSubscriberSubscriptionRssUrlMeta {
mikan_subscription_token: v.to_string(),
}
})
} else {
None
}
}
pub fn build_rss_url(self, mikan_base_url: Url) -> Url {
build_mikan_subscriber_subscription_rss_url(mikan_base_url, &self.mikan_subscription_token)
}
}
pub fn build_mikan_subscriber_subscription_rss_url(
mikan_base_url: Url,
mikan_subscription_token: &str,
) -> Url {
let mut url = mikan_base_url;
url.set_path("/RSS/MyBangumi");
url.query_pairs_mut()
.append_pair("token", mikan_subscription_token);
url
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Eq)]
pub struct MikanBangumiIndexMeta {
pub homepage: Url,
@@ -147,6 +250,26 @@ impl MikanBangumiIndexHash {
None
}
}
pub fn build_homepage_url(self, mikan_base_url: Url) -> Url {
build_mikan_bangumi_homepage_url(mikan_base_url, &self.mikan_bangumi_id, None)
}
}
pub fn build_mikan_bangumi_subscription_rss_url(
mikan_base_url: Url,
mikan_bangumi_id: &str,
mikan_fansub_id: Option<&str>,
) -> Url {
let mut url = mikan_base_url;
url.set_path("/RSS/Bangumi");
url.query_pairs_mut()
.append_pair("bangumiId", mikan_bangumi_id);
if let Some(mikan_fansub_id) = mikan_fansub_id {
url.query_pairs_mut()
.append_pair("subgroupid", mikan_fansub_id);
};
url
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
@@ -170,24 +293,70 @@ impl MikanBangumiHash {
None
}
}
pub fn from_rss_url(url: &Url) -> Option<Self> {
if url.path() == "/RSS/Bangumi" {
if let (Some(mikan_fansub_id), Some(mikan_bangumi_id)) = (
url.query_pairs()
.find(|(k, _)| k == "subgroupid")
.map(|(_, v)| v.to_string()),
url.query_pairs()
.find(|(k, _)| k == "bangumiId")
.map(|(_, v)| v.to_string()),
) {
Some(Self {
mikan_bangumi_id,
mikan_fansub_id,
})
} else {
None
}
} else {
None
}
}
pub fn build_rss_url(self, mikan_base_url: Url) -> Url {
build_mikan_bangumi_subscription_rss_url(
mikan_base_url,
&self.mikan_bangumi_id,
Some(&self.mikan_fansub_id),
)
}
pub fn build_homepage_url(self, mikan_base_url: Url) -> Url {
build_mikan_bangumi_homepage_url(
mikan_base_url,
&self.mikan_bangumi_id,
Some(&self.mikan_fansub_id),
)
}
}
pub fn build_mikan_episode_homepage_url(mikan_base_url: Url, mikan_episode_id: &str) -> Url {
let mut url = mikan_base_url;
url.set_path(&format!("/Home/Episode/{mikan_episode_id}"));
url
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct MikanEpisodeHash {
pub mikan_episode_token: String,
pub mikan_episode_id: String,
}
impl MikanEpisodeHash {
pub fn from_homepage_url(url: &Url) -> Option<Self> {
if url.path().starts_with("/Home/Episode/") {
let mikan_episode_id = url.path().replace("/Home/Episode/", "");
Some(Self {
mikan_episode_token: mikan_episode_id,
})
Some(Self { mikan_episode_id })
} else {
None
}
}
pub fn build_homepage_url(self, mikan_base_url: Url) -> Url {
build_mikan_episode_homepage_url(mikan_base_url, &self.mikan_episode_id)
}
}
#[derive(async_graphql::Enum, Clone, Debug, Copy, Serialize, Deserialize, PartialEq, Eq)]
@@ -227,8 +396,7 @@ impl FromStr for MikanSeasonStr {
"" => Ok(MikanSeasonStr::Autumn),
"" => Ok(MikanSeasonStr::Winter),
_ => Err(RecorderError::without_source(format!(
"MikanSeasonStr must be one of '春', '夏', '秋', '冬', but got '{}'",
s
"MikanSeasonStr must be one of '春', '夏', '秋', '冬', but got '{s}'"
))),
}
}
@@ -284,12 +452,6 @@ pub fn build_mikan_season_flow_url(
url
}
pub fn build_mikan_episode_homepage_url(mikan_base_url: Url, mikan_episode_id: &str) -> Url {
let mut url = mikan_base_url;
url.set_path(&format!("/Home/Episode/{mikan_episode_id}"));
url
}
pub fn build_mikan_bangumi_expand_subscribed_url(
mikan_base_url: Url,
mikan_bangumi_id: &str,
@@ -322,7 +484,7 @@ pub fn extract_mikan_episode_meta_from_episode_homepage_html(
RecorderError::from_mikan_meta_missing_field(Cow::Borrowed("bangumi_title"))
})?;
let MikanBangumiRssUrlMeta {
let MikanBangumiHash {
mikan_bangumi_id,
mikan_fansub_id,
..
@@ -331,7 +493,7 @@ pub fn extract_mikan_episode_meta_from_episode_homepage_html(
.next()
.and_then(|el| el.value().attr("href"))
.and_then(|s| mikan_episode_homepage_url.join(s).ok())
.and_then(|rss_link_url| MikanBangumiRssUrlMeta::from_url(&rss_link_url))
.and_then(|rss_link_url| MikanBangumiHash::from_rss_url(&rss_link_url))
.ok_or_else(|| {
RecorderError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_bangumi_id"))
})?;
@@ -345,8 +507,7 @@ pub fn extract_mikan_episode_meta_from_episode_homepage_html(
})?;
let MikanEpisodeHash {
mikan_episode_token,
..
mikan_episode_id, ..
} = MikanEpisodeHash::from_homepage_url(&mikan_episode_homepage_url).ok_or_else(|| {
RecorderError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_episode_id"))
})?;
@@ -436,9 +597,9 @@ pub fn extract_mikan_bangumi_index_meta_from_bangumi_homepage_html(
.next()
.and_then(|el| el.value().attr("href"))
.and_then(|s| mikan_bangumi_homepage_url.join(s).ok())
.and_then(|rss_link_url| MikanBangumiRssUrlMeta::from_url(&rss_link_url))
.and_then(|rss_link_url| MikanBangumiHash::from_rss_url(&rss_link_url))
.map(
|MikanBangumiRssUrlMeta {
|MikanBangumiHash {
mikan_bangumi_id, ..
}| mikan_bangumi_id,
)
@@ -734,10 +895,86 @@ pub fn extract_mikan_bangumi_meta_from_expand_subscribed_fragment(
}
}
pub fn scrape_mikan_bangumi_meta_stream_from_season_flow_url(
ctx: Arc<dyn AppContextTrait>,
mikan_season_flow_url: Url,
credential_id: i32,
) -> impl Stream<Item = RecorderResult<MikanBangumiMeta>> {
try_stream! {
let mikan_base_url = ctx.mikan().base_url().clone();
let mikan_client = ctx.mikan().fork_with_credential(ctx.clone(), credential_id).await?;
let content = fetch_html(&mikan_client, mikan_season_flow_url.clone()).await?;
let mut bangumi_indices_meta = {
let html = Html::parse_document(&content);
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment(&html, &mikan_base_url)
};
if bangumi_indices_meta.is_empty() && !mikan_client.has_login().await? {
mikan_client.login().await?;
let content = fetch_html(&mikan_client, mikan_season_flow_url).await?;
let html = Html::parse_document(&content);
bangumi_indices_meta =
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment(&html, &mikan_base_url);
}
mikan_client
.sync_credential_cookies(ctx.clone(), credential_id)
.await?;
for bangumi_index in bangumi_indices_meta {
let bangumi_title = bangumi_index.bangumi_title.clone();
let bangumi_expand_subscribed_fragment_url = build_mikan_bangumi_expand_subscribed_url(
mikan_base_url.clone(),
&bangumi_index.mikan_bangumi_id,
);
let bangumi_expand_subscribed_fragment =
fetch_html(&mikan_client, bangumi_expand_subscribed_fragment_url).await?;
let bangumi_meta = {
let html = Html::parse_document(&bangumi_expand_subscribed_fragment);
extract_mikan_bangumi_meta_from_expand_subscribed_fragment(
&html,
bangumi_index,
mikan_base_url.clone(),
)
.with_whatever_context::<_, String, RecorderError>(|| {
format!("failed to extract mikan bangumi fansub of title = {bangumi_title}")
})
}?;
yield bangumi_meta;
}
mikan_client
.sync_credential_cookies(ctx, credential_id)
.await?;
}
}
pub async fn scrape_mikan_bangumi_meta_list_from_season_flow_url(
ctx: Arc<dyn AppContextTrait>,
mikan_season_flow_url: Url,
credential_id: i32,
) -> RecorderResult<Vec<MikanBangumiMeta>> {
let stream = scrape_mikan_bangumi_meta_stream_from_season_flow_url(
ctx,
mikan_season_flow_url,
credential_id,
);
pin_mut!(stream);
stream.try_collect().await
}
#[cfg(test)]
mod test {
#![allow(unused_variables)]
use std::fs;
use std::{fs, sync::Arc};
use rstest::{fixture, rstest};
use tracing::Level;
@@ -1035,7 +1272,6 @@ mod test {
build_mikan_season_flow_url(mikan_base_url.clone(), 2025, MikanSeasonStr::Spring);
let bangumi_meta_list = scrape_mikan_bangumi_meta_list_from_season_flow_url(
mikan_client,
app_ctx.clone(),
mikan_season_flow_url,
credential.id,