refactor: refactor subscription

This commit is contained in:
2025-05-11 01:41:11 +08:00
parent d4bdc677a9
commit ed2c1038e6
15 changed files with 850 additions and 463 deletions

View File

@@ -3,6 +3,7 @@ mod config;
mod constants;
mod credential;
mod rss;
mod subscription;
mod web;
pub use client::MikanClient;
@@ -14,11 +15,12 @@ pub use constants::{
};
pub use credential::MikanCredentialForm;
pub use rss::{
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanBangumiRssUrlMeta, MikanRssChannel,
MikanRssItem, MikanSubscriberAggregationRssUrlMeta, MikanSubscriberStreamRssChannel,
build_mikan_bangumi_rss_url, build_mikan_subscriber_aggregation_rss_url,
extract_mikan_bangumi_id_from_rss_url, extract_mikan_rss_channel_from_rss_link,
extract_mikan_subscriber_aggregation_id_from_rss_link,
MikanBangumiRssChannel, MikanBangumiRssUrlMeta, MikanRssChannel, MikanRssItem,
MikanSubscriberRssChannel, MikanSubscriberSubscriptionRssUrlMeta,
build_mikan_bangumi_subscription_rss_url, build_mikan_subscriber_subscription_rss_url,
};
pub use subscription::{
MikanBangumiSubscription, MikanSeasonSubscription, MikanSubscriberSubscription,
};
pub use web::{
MikanBangumiHomepageUrlMeta, MikanBangumiIndexHomepageUrlMeta, MikanBangumiIndexMeta,
@@ -26,10 +28,9 @@ pub use web::{
MikanSeasonFlowUrlMeta, MikanSeasonStr, build_mikan_bangumi_expand_subscribed_url,
build_mikan_bangumi_homepage_url, build_mikan_episode_homepage_url,
build_mikan_season_flow_url, extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
extract_mikan_bangumi_meta_from_expand_subscribed_fragment,
extract_mikan_episode_meta_from_episode_homepage_html,
scrape_mikan_bangumi_meta_from_bangumi_homepage_url,
scrape_mikan_bangumi_meta_list_from_season_flow_url,
scrape_mikan_bangumi_meta_stream_from_season_flow_url,
scrape_mikan_episode_meta_from_episode_homepage_url, scrape_mikan_poster_data_from_image_url,
scrape_mikan_poster_meta_from_image_url,
};

View File

@@ -1,5 +1,6 @@
use std::borrow::Cow;
use bytes::Bytes;
use chrono::DateTime;
use downloader::bittorrent::defs::BITTORRENT_MIME_TYPE;
use fetch::{FetchError, IntoUrl, bytes::fetch_bytes};
@@ -34,16 +35,8 @@ pub struct MikanBangumiRssChannel {
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanBangumiIndexRssChannel {
pub name: String,
pub url: Url,
pub mikan_bangumi_id: String,
pub items: Vec<MikanRssItem>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanSubscriberStreamRssChannel {
pub mikan_aggregation_id: String,
pub struct MikanSubscriberRssChannel {
pub mikan_subscription_token: String,
pub url: Url,
pub items: Vec<MikanRssItem>,
}
@@ -51,40 +44,35 @@ pub struct MikanSubscriberStreamRssChannel {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum MikanRssChannel {
Bangumi(MikanBangumiRssChannel),
BangumiIndex(MikanBangumiIndexRssChannel),
SubscriberStream(MikanSubscriberStreamRssChannel),
Subscriber(MikanSubscriberRssChannel),
}
impl MikanRssChannel {
pub fn items(&self) -> &[MikanRssItem] {
match &self {
Self::Bangumi(MikanBangumiRssChannel { items, .. })
| Self::BangumiIndex(MikanBangumiIndexRssChannel { items, .. })
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { items, .. }) => items,
| Self::Subscriber(MikanSubscriberRssChannel { items, .. }) => items,
}
}
pub fn into_items(self) -> Vec<MikanRssItem> {
match self {
Self::Bangumi(MikanBangumiRssChannel { items, .. })
| Self::BangumiIndex(MikanBangumiIndexRssChannel { items, .. })
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { items, .. }) => items,
| Self::Subscriber(MikanSubscriberRssChannel { items, .. }) => items,
}
}
pub fn name(&self) -> Option<&str> {
match &self {
Self::Bangumi(MikanBangumiRssChannel { name, .. })
| Self::BangumiIndex(MikanBangumiIndexRssChannel { name, .. }) => Some(name.as_str()),
Self::SubscriberStream(MikanSubscriberStreamRssChannel { .. }) => None,
Self::Bangumi(MikanBangumiRssChannel { name, .. }) => Some(name.as_str()),
Self::Subscriber(MikanSubscriberRssChannel { .. }) => None,
}
}
pub fn url(&self) -> &Url {
match &self {
Self::Bangumi(MikanBangumiRssChannel { url, .. })
| Self::BangumiIndex(MikanBangumiIndexRssChannel { url, .. })
| Self::SubscriberStream(MikanSubscriberStreamRssChannel { url, .. }) => url,
| Self::Subscriber(MikanSubscriberRssChannel { url, .. }) => url,
}
}
}
@@ -148,20 +136,58 @@ impl TryFrom<rss::Item> for MikanRssItem {
#[derive(Debug, Clone)]
pub struct MikanBangumiRssUrlMeta {
pub mikan_bangumi_id: String,
pub mikan_fansub_id: Option<String>,
pub mikan_fansub_id: String,
}
#[derive(Debug, Clone)]
pub struct MikanSubscriberAggregationRssUrlMeta {
pub mikan_aggregation_id: String,
impl MikanBangumiRssUrlMeta {
pub fn from_url(url: &Url) -> Option<Self> {
if url.path() == "/RSS/Bangumi" {
if let (Some(mikan_fansub_id), Some(mikan_bangumi_id)) = (
url.query_pairs()
.find(|(k, _)| k == "subgroupid")
.map(|(_, v)| v.to_string()),
url.query_pairs()
.find(|(k, _)| k == "bangumiId")
.map(|(_, v)| v.to_string()),
) {
Some(MikanBangumiRssUrlMeta {
mikan_bangumi_id,
mikan_fansub_id,
})
} else {
None
}
} else {
None
}
}
}
pub fn build_mikan_bangumi_rss_url(
mikan_base_url: impl IntoUrl,
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MikanSubscriberSubscriptionRssUrlMeta {
pub mikan_subscription_token: String,
}
impl MikanSubscriberSubscriptionRssUrlMeta {
pub fn from_url(url: &Url) -> Option<Self> {
if url.path() == "/RSS/MyBangumi" {
url.query_pairs().find(|(k, _)| k == "token").map(|(_, v)| {
MikanSubscriberSubscriptionRssUrlMeta {
mikan_subscription_token: v.to_string(),
}
})
} else {
None
}
}
}
pub fn build_mikan_bangumi_subscription_rss_url(
mikan_base_url: Url,
mikan_bangumi_id: &str,
mikan_fansub_id: Option<&str>,
) -> RecorderResult<Url> {
let mut url = mikan_base_url.into_url().map_err(FetchError::from)?;
) -> Url {
let mut url = mikan_base_url;
url.set_path("/RSS/Bangumi");
url.query_pairs_mut()
.append_pair("bangumiId", mikan_bangumi_id);
@@ -169,246 +195,16 @@ pub fn build_mikan_bangumi_rss_url(
url.query_pairs_mut()
.append_pair("subgroupid", mikan_fansub_id);
};
Ok(url)
url
}
pub fn build_mikan_subscriber_aggregation_rss_url(
mikan_base_url: &str,
mikan_aggregation_id: &str,
) -> RecorderResult<Url> {
let mut url = Url::parse(mikan_base_url)?;
pub fn build_mikan_subscriber_subscription_rss_url(
mikan_base_url: Url,
mikan_subscription_token: &str,
) -> Url {
let mut url = mikan_base_url;
url.set_path("/RSS/MyBangumi");
url.query_pairs_mut()
.append_pair("token", mikan_aggregation_id);
Ok(url)
}
pub fn extract_mikan_bangumi_id_from_rss_url(url: &Url) -> Option<MikanBangumiRssUrlMeta> {
if url.path() == "/RSS/Bangumi" {
url.query_pairs()
.find(|(k, _)| k == "bangumiId")
.map(|(_, v)| MikanBangumiRssUrlMeta {
mikan_bangumi_id: v.to_string(),
mikan_fansub_id: url
.query_pairs()
.find(|(k, _)| k == "subgroupid")
.map(|(_, v)| v.to_string()),
})
} else {
None
}
}
pub fn extract_mikan_subscriber_aggregation_id_from_rss_link(
url: &Url,
) -> Option<MikanSubscriberAggregationRssUrlMeta> {
if url.path() == "/RSS/MyBangumi" {
url.query_pairs().find(|(k, _)| k == "token").map(|(_, v)| {
MikanSubscriberAggregationRssUrlMeta {
mikan_aggregation_id: v.to_string(),
}
})
} else {
None
}
}
#[instrument(skip_all, fields(channel_rss_link = channel_rss_link.as_str()))]
pub async fn extract_mikan_rss_channel_from_rss_link(
http_client: &MikanClient,
channel_rss_link: impl IntoUrl,
) -> RecorderResult<MikanRssChannel> {
let bytes = fetch_bytes(http_client, channel_rss_link.as_str()).await?;
let channel = rss::Channel::read_from(&bytes[..])?;
let channel_link = Url::parse(channel.link())?;
if let Some(MikanBangumiRssUrlMeta {
mikan_bangumi_id,
mikan_fansub_id,
}) = extract_mikan_bangumi_id_from_rss_url(&channel_link)
{
tracing::trace!(
mikan_bangumi_id,
mikan_fansub_id,
"MikanBangumiRssLink extracting..."
);
let channel_name = channel.title().replace("Mikan Project - ", "");
let items = channel
.items
.into_iter()
.enumerate()
.flat_map(|(idx, item)| {
MikanRssItem::try_from(item).inspect_err(
|error| tracing::warn!(error = %error, "failed to extract rss item idx = {}", idx),
)
})
.collect_vec();
if let Some(mikan_fansub_id) = mikan_fansub_id {
tracing::trace!(
channel_name,
channel_link = channel_link.as_str(),
mikan_bangumi_id,
mikan_fansub_id,
"MikanBangumiRssChannel extracted"
);
Ok(MikanRssChannel::Bangumi(MikanBangumiRssChannel {
name: channel_name,
mikan_bangumi_id,
mikan_fansub_id,
url: channel_link,
items,
}))
} else {
tracing::trace!(
channel_name,
channel_link = channel_link.as_str(),
mikan_bangumi_id,
"MikanBangumiIndexRssChannel extracted"
);
Ok(MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel {
name: channel_name,
mikan_bangumi_id,
url: channel_link,
items,
}))
}
} else if let Some(MikanSubscriberAggregationRssUrlMeta {
mikan_aggregation_id,
..
}) = extract_mikan_subscriber_aggregation_id_from_rss_link(&channel_link)
{
tracing::trace!(
mikan_aggregation_id,
"MikanSubscriberAggregationRssLink extracting..."
);
let items = channel
.items
.into_iter()
.enumerate()
.flat_map(|(idx, item)| {
MikanRssItem::try_from(item).inspect_err(
|error| tracing::warn!(error = %error, "failed to extract rss item idx = {}", idx),
)
})
.collect_vec();
tracing::trace!(
channel_link = channel_link.as_str(),
mikan_aggregation_id,
"MikanSubscriberAggregationRssChannel extracted"
);
Ok(MikanRssChannel::SubscriberStream(
MikanSubscriberStreamRssChannel {
mikan_aggregation_id,
items,
url: channel_link,
},
))
} else {
Err(RecorderError::MikanRssInvalidFormatError).inspect_err(|error| {
tracing::warn!(error = %error);
})
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use downloader::bittorrent::BITTORRENT_MIME_TYPE;
use rstest::rstest;
use url::Url;
use crate::{
errors::RecorderResult,
extract::mikan::{
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanRssChannel,
extract_mikan_rss_channel_from_rss_link,
},
test_utils::mikan::build_testing_mikan_client,
};
#[rstest]
#[tokio::test]
async fn test_parse_mikan_rss_channel_from_rss_link() -> RecorderResult<()> {
let mut mikan_server = mockito::Server::new_async().await;
let mikan_base_url = Url::parse(&mikan_server.url())?;
let mikan_client = build_testing_mikan_client(mikan_base_url.clone()).await?;
{
let bangumi_rss_url =
mikan_base_url.join("/RSS/Bangumi?bangumiId=3141&subgroupid=370")?;
let bangumi_rss_mock = mikan_server
.mock("GET", bangumi_rss_url.path())
.with_body_from_file("tests/resources/mikan/Bangumi-3141-370.rss")
.match_query(mockito::Matcher::Any)
.create_async()
.await;
let channel = extract_mikan_rss_channel_from_rss_link(&mikan_client, bangumi_rss_url)
.await
.expect("should get mikan channel from rss url");
assert_matches!(
&channel,
MikanRssChannel::Bangumi(MikanBangumiRssChannel { .. })
);
assert_matches!(&channel.name(), Some("葬送的芙莉莲"));
let items = channel.items();
let first_sub_item = items
.first()
.expect("mikan subscriptions should have at least one subs");
assert_eq!(first_sub_item.mime, BITTORRENT_MIME_TYPE);
assert!(
&first_sub_item
.homepage
.as_str()
.starts_with("https://mikanani.me/Home/Episode")
);
let name = first_sub_item.title.as_str();
assert!(name.contains("葬送的芙莉莲"));
bangumi_rss_mock.expect(1);
}
{
let bangumi_rss_url = mikan_base_url.join("/RSS/Bangumi?bangumiId=3416")?;
let bangumi_rss_mock = mikan_server
.mock("GET", bangumi_rss_url.path())
.match_query(mockito::Matcher::Any)
.with_body_from_file("tests/resources/mikan/Bangumi-3416.rss")
.create_async()
.await;
let channel = extract_mikan_rss_channel_from_rss_link(&mikan_client, bangumi_rss_url)
.await
.expect("should get mikan channel from rss url");
assert_matches!(
&channel,
MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel { .. })
);
assert_matches!(&channel.name(), Some("叹气的亡灵想隐退"));
bangumi_rss_mock.expect(1);
}
Ok(())
}
.append_pair("token", mikan_subscription_token);
url
}

View File

@@ -0,0 +1,483 @@
use std::{
collections::{HashMap, HashSet},
sync::Arc,
};
use async_graphql::{InputObject, SimpleObject};
use async_stream::try_stream;
use fetch::{fetch_bytes, fetch_html};
use futures::Stream;
use itertools::Itertools;
use maplit::hashmap;
use scraper::Html;
use sea_orm::{ColumnTrait, EntityTrait, IntoSimpleExpr, QueryFilter, QuerySelect, prelude::Expr};
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
use url::Url;
use crate::{
app::AppContextTrait,
errors::{RecorderError, RecorderResult},
extract::mikan::{
MikanBangumiHomepageUrlMeta, MikanBangumiMeta, MikanBangumiRssUrlMeta, MikanEpisodeMeta,
MikanRssItem, MikanSeasonFlowUrlMeta, MikanSeasonStr,
MikanSubscriberSubscriptionRssUrlMeta, build_mikan_bangumi_expand_subscribed_url,
build_mikan_bangumi_subscription_rss_url, build_mikan_season_flow_url,
build_mikan_subscriber_subscription_rss_url,
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment,
extract_mikan_bangumi_meta_from_expand_subscribed_fragment,
scrape_mikan_episode_meta_from_episode_homepage_url,
},
migrations::defs::Bangumi,
models::{bangumi, episodes, subscriptions},
};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, InputObject, SimpleObject)]
pub struct MikanSubscriberSubscription {
pub id: i32,
pub mikan_subscription_token: String,
pub subscriber_id: i32,
}
impl MikanSubscriberSubscription {
#[tracing::instrument(skip(ctx))]
pub async fn pull_subscription(
&self,
ctx: Arc<dyn AppContextTrait>,
) -> RecorderResult<Vec<MikanBangumiMeta>> {
let mikan_client = ctx.mikan();
let new_episode_meta_list: Vec<MikanEpisodeMeta> = {
let rss_item_list = self.pull_rss_items(ctx.clone()).await?;
let existed_item_set = episodes::Entity::find()
.select_only()
.column(episodes::Column::MikanEpisodeId)
.filter(
episodes::Column::SubscriberId.eq(self.subscriber_id).add(
episodes::Column::MikanEpisodeId
.is_in(rss_item_list.iter().map(|s| s.mikan_episode_id.clone())),
),
)
.into_tuple::<(String,)>()
.all(ctx.db())
.await?
.into_iter()
.map(|(value,)| value)
.collect::<HashSet<_>>();
let mut result = vec![];
for rss_item in rss_item_list
.into_iter()
.filter(|rss_item| !existed_item_set.contains(&rss_item.mikan_episode_id))
{
let episode_meta = scrape_mikan_episode_meta_from_episode_homepage_url(
mikan_client,
rss_item.homepage,
)
.await?;
}
result
};
{
let mut new_bangumi_hash_map = new_episode_meta_list
.iter()
.map(|episode_meta| {
(
MikanBangumiHomepageUrlMeta {
mikan_bangumi_id: episode_meta.mikan_bangumi_id.clone(),
mikan_fansub_id: episode_meta.mikan_fansub_id.clone(),
},
episode_meta,
)
})
.collect::<HashMap<_, _>>();
let mut new_bangumi_meta_map: HashMap<MikanBangumiHomepageUrlMeta, bangumi::Model> =
hashmap! {};
for bangumi_model in bangumi::Entity::find()
.filter({
Expr::tuple([
bangumi::Column::MikanBangumiId.into_simple_expr(),
bangumi::Column::MikanFansubId.into_simple_expr(),
bangumi::Column::SubscriberId.into_simple_expr(),
])
.in_tuples(new_bangumi_hash_map.iter().map(
|(bangumi_meta, _)| {
(
bangumi_meta.mikan_bangumi_id.clone(),
bangumi_meta.mikan_fansub_id.clone(),
self.subscriber_id,
)
},
))
})
.all(ctx.db())
.await?
{
let bangumi_hash = MikanBangumiHomepageUrlMeta {
mikan_bangumi_id: bangumi_model.mikan_bangumi_id.unwrap(),
mikan_fansub_id: bangumi_model.mikan_fansub_id.unwrap(),
};
new_bangumi_hash_map.remove(&bangumi_hash);
new_bangumi_meta_map.insert(bangumi_hash, bangumi_model);
}
for (bangumi_hash, episode_meta) in new_bangumi_hash_map {
let bangumi_meta: MikanBangumiMeta = episode_meta.clone().into();
let bangumi_active_model = bangumi::ActiveModel::from_mikan_bangumi_meta(
ctx.clone(),
bangumi_meta,
self.subscriber_id,
)
.with_whatever_context::<_, String, RecorderError>(|_| {
format!(
"failed to create bangumi active model from mikan bangumi meta, \
bangumi_meta = {:?}",
bangumi_meta
)
})?;
new_bangumi_meta_map.insert(bangumi_hash, bangumi_active_model);
}
}
let mut new_bangumi_meta_map: HashMap<MikanBangumiHomepageUrlMeta, bangumi::Model> = {
let mut map = hashmap! {};
for bangumi_model in existed_bangumi_list {
let hash = MikanBangumiHomepageUrlMeta {
mikan_bangumi_id: bangumi_model.mikan_bangumi_id.unwrap(),
mikan_fansub_id: bangumi_model.mikan_fansub_id.unwrap(),
};
new_bangumi_hash_map.remove(&hash);
map.insert(hash, bangumi_model);
}
map
};
for bangumi_hash in new_bangumi_hash_map {
bangumi::Entity::insert(bangumi::ActiveModel {
raw_name: ActiveValue::Set(bangumi_meta.bangumi_title.clone()),
display_name: ActiveValue::Set(bangumi_meta.bangumi_title.clone()),
..Default::default()
});
}
bangumi::Entity::insert_many(new_bangumi_hash_map.values().map(|bangumi_meta| {
bangumi::ActiveModel {
raw_name: ActiveValue::Set(bangumi_meta.bangumi_title.clone()),
display_name: ActiveValue::Set(bangumi_meta.bangumi_title.clone()),
..Default::default()
}
}));
todo!()
}
#[tracing::instrument(skip(ctx))]
pub async fn pull_rss_items(
&self,
ctx: Arc<dyn AppContextTrait>,
) -> RecorderResult<Vec<MikanRssItem>> {
let mikan_base_url = ctx.mikan().base_url().clone();
let rss_url = build_mikan_subscriber_subscription_rss_url(
mikan_base_url.clone(),
&self.mikan_subscription_token,
);
let bytes = fetch_bytes(ctx.mikan(), rss_url).await?;
let channel = rss::Channel::read_from(&bytes[..])?;
let mut result = vec![];
for (idx, item) in channel.items.into_iter().enumerate() {
let item = MikanRssItem::try_from(item).inspect_err(
|error| tracing::warn!(error = %error, "failed to extract rss item idx = {}", idx),
)?;
result.push(item);
}
Ok(result)
}
pub fn try_from_model(model: &subscriptions::Model) -> RecorderResult<Self> {
let source_url = Url::parse(&model.source_url)?;
let meta = MikanSubscriberSubscriptionRssUrlMeta::from_url(&source_url)
.with_whatever_context::<_, String, RecorderError>(|| {
format!(
"MikanSubscriberSubscription should extract mikan_subscription_token from \
source_url = {}, subscription_id = {}",
source_url, model.id
)
})?;
Ok(Self {
id: model.id,
mikan_subscription_token: meta.mikan_subscription_token,
subscriber_id: model.subscriber_id,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, InputObject, SimpleObject)]
pub struct MikanSeasonSubscription {
pub id: i32,
pub year: i32,
pub season_str: MikanSeasonStr,
pub credential_id: i32,
pub subscriber_id: i32,
}
impl MikanSeasonSubscription {
#[tracing::instrument]
pub fn pull_bangumi_meta_stream(
&self,
ctx: Arc<dyn AppContextTrait>,
) -> impl Stream<Item = RecorderResult<MikanBangumiMeta>> {
let credential_id = self.credential_id;
let year = self.year;
let season_str = self.season_str.clone();
try_stream! {
let mikan_base_url = ctx.mikan().base_url().clone();
let mikan_client = ctx.mikan()
.fork_with_credential(ctx.clone(), credential_id)
.await?;
let mikan_season_flow_url = build_mikan_season_flow_url(mikan_base_url.clone(), year, season_str);
let content = fetch_html(&mikan_client, mikan_season_flow_url.clone()).await?;
let mut bangumi_indices_meta = {
let html = Html::parse_document(&content);
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment(&html, &mikan_base_url)
};
if bangumi_indices_meta.is_empty() && !mikan_client.has_login().await? {
mikan_client.login().await?;
let content = fetch_html(&mikan_client, mikan_season_flow_url).await?;
let html = Html::parse_document(&content);
bangumi_indices_meta =
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment(&html, &mikan_base_url);
}
mikan_client
.sync_credential_cookies(ctx.clone(), credential_id)
.await?;
for bangumi_index in bangumi_indices_meta {
let bangumi_title = bangumi_index.bangumi_title.clone();
let bangumi_expand_subscribed_fragment_url = build_mikan_bangumi_expand_subscribed_url(
mikan_base_url.clone(),
&bangumi_index.mikan_bangumi_id,
);
let bangumi_expand_subscribed_fragment =
fetch_html(&mikan_client, bangumi_expand_subscribed_fragment_url).await?;
let bangumi_meta = {
let html = Html::parse_document(&bangumi_expand_subscribed_fragment);
extract_mikan_bangumi_meta_from_expand_subscribed_fragment(
&html,
bangumi_index,
mikan_base_url.clone(),
)
.with_whatever_context::<_, String, RecorderError>(|| {
format!("failed to extract mikan bangumi fansub of title = {bangumi_title}")
})
}?;
yield bangumi_meta;
}
mikan_client
.sync_credential_cookies(ctx, credential_id)
.await?;
}
}
pub fn try_from_model(model: &subscriptions::Model) -> RecorderResult<Self> {
let source_url = Url::parse(&model.source_url)?;
let source_url_meta = MikanSeasonFlowUrlMeta::from_url(&source_url)
.with_whatever_context::<_, String, RecorderError>(|| {
format!(
"MikanSeasonSubscription should extract season_str and year from source_url, \
source_url = {}, subscription_id = {}",
source_url, model.id
)
})?;
let credential_id = model
.credential_id
.with_whatever_context::<_, String, RecorderError>(|| {
format!(
"MikanSeasonSubscription credential_id is required, subscription_id = {}",
model.id
)
})?;
Ok(Self {
id: model.id,
year: source_url_meta.year,
season_str: source_url_meta.season_str,
credential_id,
subscriber_id: model.subscriber_id,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, InputObject, SimpleObject)]
pub struct MikanBangumiSubscription {
pub id: i32,
pub mikan_bangumi_id: String,
pub mikan_fansub_id: String,
pub subscriber_id: i32,
}
impl MikanBangumiSubscription {
#[tracing::instrument]
pub fn pull_rss_items(
&self,
ctx: Arc<dyn AppContextTrait>,
) -> impl Stream<Item = RecorderResult<MikanRssItem>> {
let mikan_bangumi_id = self.mikan_bangumi_id.clone();
let mikan_fansub_id = self.mikan_fansub_id.clone();
try_stream! {
let mikan_base_url = ctx.mikan().base_url().clone();
let rss_url = build_mikan_bangumi_subscription_rss_url(mikan_base_url.clone(), &mikan_bangumi_id, Some(&mikan_fansub_id));
let bytes = fetch_bytes(ctx.mikan(), rss_url).await?;
let channel = rss::Channel::read_from(&bytes[..])?;
for (idx, item) in channel.items.into_iter().enumerate() {
let item = MikanRssItem::try_from(item).inspect_err(
|error| tracing::warn!(error = %error, "failed to extract rss item idx = {}", idx),
)?;
yield item
}
}
}
pub fn try_from_model(model: &subscriptions::Model) -> RecorderResult<Self> {
let source_url = Url::parse(&model.source_url)?;
let meta = MikanBangumiRssUrlMeta::from_url(&source_url)
.with_whatever_context::<_, String, RecorderError>(|| {
format!(
"MikanBangumiSubscription need to extract bangumi id and fansub id from \
source_url = {}, subscription_id = {}",
source_url, model.id
)
})?;
Ok(Self {
id: model.id,
mikan_bangumi_id: meta.mikan_bangumi_id,
mikan_fansub_id: meta.mikan_fansub_id,
subscriber_id: model.subscriber_id,
})
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use downloader::bittorrent::BITTORRENT_MIME_TYPE;
use rstest::rstest;
use url::Url;
use crate::{
errors::RecorderResult,
extract::mikan::{
MikanBangumiIndexRssChannel, MikanBangumiRssChannel, MikanRssChannel,
extract_mikan_rss_channel_from_rss_link,
},
test_utils::mikan::build_testing_mikan_client,
};
#[rstest]
#[tokio::test]
async fn test_parse_mikan_rss_channel_from_rss_link() -> RecorderResult<()> {
let mut mikan_server = mockito::Server::new_async().await;
let mikan_base_url = Url::parse(&mikan_server.url())?;
let mikan_client = build_testing_mikan_client(mikan_base_url.clone()).await?;
{
let bangumi_rss_url =
mikan_base_url.join("/RSS/Bangumi?bangumiId=3141&subgroupid=370")?;
let bangumi_rss_mock = mikan_server
.mock("GET", bangumi_rss_url.path())
.with_body_from_file("tests/resources/mikan/Bangumi-3141-370.rss")
.match_query(mockito::Matcher::Any)
.create_async()
.await;
let channel = scrape_mikan_rss_channel_from_rss_link(&mikan_client, bangumi_rss_url)
.await
.expect("should get mikan channel from rss url");
assert_matches!(
&channel,
MikanRssChannel::Bangumi(MikanBangumiRssChannel { .. })
);
assert_matches!(&channel.name(), Some("葬送的芙莉莲"));
let items = channel.items();
let first_sub_item = items
.first()
.expect("mikan subscriptions should have at least one subs");
assert_eq!(first_sub_item.mime, BITTORRENT_MIME_TYPE);
assert!(
&first_sub_item
.homepage
.as_str()
.starts_with("https://mikanani.me/Home/Episode")
);
let name = first_sub_item.title.as_str();
assert!(name.contains("葬送的芙莉莲"));
bangumi_rss_mock.expect(1);
}
{
let bangumi_rss_url = mikan_base_url.join("/RSS/Bangumi?bangumiId=3416")?;
let bangumi_rss_mock = mikan_server
.mock("GET", bangumi_rss_url.path())
.match_query(mockito::Matcher::Any)
.with_body_from_file("tests/resources/mikan/Bangumi-3416.rss")
.create_async()
.await;
let channel = scrape_mikan_rss_channel_from_rss_link(&mikan_client, bangumi_rss_url)
.await
.expect("should get mikan channel from rss url");
assert_matches!(
&channel,
MikanRssChannel::BangumiIndex(MikanBangumiIndexRssChannel { .. })
);
assert_matches!(&channel.name(), Some("叹气的亡灵想隐退"));
bangumi_rss_mock.expect(1);
}
Ok(())
}
}

View File

@@ -1,4 +1,4 @@
use std::{borrow::Cow, fmt, sync::Arc};
use std::{borrow::Cow, fmt, str::FromStr, sync::Arc};
use async_stream::try_stream;
use bytes::Bytes;
@@ -7,14 +7,13 @@ use futures::{Stream, TryStreamExt, pin_mut};
use html_escape::decode_html_entities;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use snafu::FromString;
use tracing::instrument;
use url::Url;
use super::{
MIKAN_BANGUMI_EXPAND_SUBSCRIBED_PAGE_PATH, MIKAN_POSTER_BUCKET_KEY,
MIKAN_SEASON_FLOW_PAGE_PATH, MikanBangumiRssUrlMeta, MikanClient,
extract_mikan_bangumi_id_from_rss_url,
};
use crate::{
app::AppContextTrait,
@@ -77,6 +76,19 @@ impl MikanBangumiMeta {
}
}
impl From<MikanEpisodeMeta> for MikanBangumiMeta {
fn from(episode_meta: MikanEpisodeMeta) -> Self {
Self {
homepage: episode_meta.homepage,
origin_poster_src: episode_meta.origin_poster_src,
bangumi_title: episode_meta.bangumi_title,
mikan_bangumi_id: episode_meta.mikan_bangumi_id,
mikan_fansub_id: episode_meta.mikan_fansub_id,
fansub: episode_meta.fansub,
}
}
}
impl MikanBangumiMeta {
pub fn from_bangumi_index_and_fansub_meta(
bangumi_index_meta: MikanBangumiIndexMeta,
@@ -128,7 +140,7 @@ impl MikanBangumiIndexHomepageUrlMeta {
}
}
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct MikanBangumiHomepageUrlMeta {
pub mikan_bangumi_id: String,
pub mikan_fansub_id: String,
@@ -194,12 +206,49 @@ impl fmt::Display for MikanSeasonStr {
}
}
impl FromStr for MikanSeasonStr {
type Err = RecorderError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"" => Ok(MikanSeasonStr::Spring),
"" => Ok(MikanSeasonStr::Summer),
"" => Ok(MikanSeasonStr::Autumn),
"" => Ok(MikanSeasonStr::Winter),
_ => Err(RecorderError::without_source(format!(
"MikanSeasonStr must be one of '春', '夏', '秋', '冬', but got '{}'",
s
))),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct MikanSeasonFlowUrlMeta {
pub year: i32,
pub season_str: MikanSeasonStr,
}
impl MikanSeasonFlowUrlMeta {
pub fn from_url(url: &Url) -> Option<Self> {
if url.path().starts_with(MIKAN_SEASON_FLOW_PAGE_PATH) {
if let (Some(year), Some(season_str)) = (
url.query_pairs()
.find(|(key, _)| key == "year")
.and_then(|(_, value)| value.parse::<i32>().ok()),
url.query_pairs()
.find(|(key, _)| key == "seasonStr")
.and_then(|(_, value)| MikanSeasonStr::from_str(&value).ok()),
) {
Some(Self { year, season_str })
} else {
None
}
} else {
None
}
}
}
pub fn build_mikan_bangumi_homepage_url(
mikan_base_url: Url,
mikan_bangumi_id: &str,
@@ -271,15 +320,11 @@ pub fn extract_mikan_episode_meta_from_episode_homepage_html(
.next()
.and_then(|el| el.value().attr("href"))
.and_then(|s| mikan_episode_homepage_url.join(s).ok())
.and_then(|rss_link_url| extract_mikan_bangumi_id_from_rss_url(&rss_link_url))
.and_then(|rss_link_url| MikanBangumiRssUrlMeta::from_url(&rss_link_url))
.ok_or_else(|| {
RecorderError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_bangumi_id"))
})?;
let mikan_fansub_id = mikan_fansub_id.ok_or_else(|| {
RecorderError::from_mikan_meta_missing_field(Cow::Borrowed("mikan_fansub_id"))
})?;
let episode_title = html
.select(&Selector::parse("title").unwrap())
.next()
@@ -379,7 +424,7 @@ pub fn extract_mikan_bangumi_index_meta_from_bangumi_homepage_html(
.next()
.and_then(|el| el.value().attr("href"))
.and_then(|s| mikan_bangumi_homepage_url.join(s).ok())
.and_then(|rss_link_url| extract_mikan_bangumi_id_from_rss_url(&rss_link_url))
.and_then(|rss_link_url| MikanBangumiRssUrlMeta::from_url(&rss_link_url))
.map(
|MikanBangumiRssUrlMeta {
mikan_bangumi_id, ..
@@ -677,87 +722,6 @@ pub fn extract_mikan_bangumi_meta_from_expand_subscribed_fragment(
}
}
pub fn scrape_mikan_bangumi_meta_stream_from_season_flow_url(
ctx: Arc<dyn AppContextTrait>,
mikan_season_flow_url: Url,
credential_id: i32,
) -> impl Stream<Item = RecorderResult<MikanBangumiMeta>> {
try_stream! {
let mikan_client = ctx.mikan()
.fork_with_credential(ctx.clone(), credential_id)
.await?;
let mikan_base_url = mikan_client.base_url();
let content = fetch_html(&mikan_client, mikan_season_flow_url.clone()).await?;
let mut bangumi_indices_meta = {
let html = Html::parse_document(&content);
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment(&html, mikan_base_url)
};
if bangumi_indices_meta.is_empty() && !mikan_client.has_login().await? {
mikan_client.login().await?;
let content = fetch_html(&mikan_client, mikan_season_flow_url).await?;
let html = Html::parse_document(&content);
bangumi_indices_meta =
extract_mikan_bangumi_index_meta_list_from_season_flow_fragment(&html, mikan_base_url);
}
mikan_client
.sync_credential_cookies(ctx.clone(), credential_id)
.await?;
for bangumi_index in bangumi_indices_meta {
let bangumi_title = bangumi_index.bangumi_title.clone();
let bangumi_expand_subscribed_fragment_url = build_mikan_bangumi_expand_subscribed_url(
mikan_base_url.clone(),
&bangumi_index.mikan_bangumi_id,
);
let bangumi_expand_subscribed_fragment =
fetch_html(&mikan_client, bangumi_expand_subscribed_fragment_url).await?;
let bangumi_meta = {
let html = Html::parse_document(&bangumi_expand_subscribed_fragment);
extract_mikan_bangumi_meta_from_expand_subscribed_fragment(
&html,
bangumi_index,
mikan_base_url.clone(),
)
.with_whatever_context::<_, String, RecorderError>(|| {
format!("failed to extract mikan bangumi fansub of title = {bangumi_title}")
})
}?;
yield bangumi_meta;
}
mikan_client
.sync_credential_cookies(ctx, credential_id)
.await?;
}
}
#[instrument(err, skip_all, fields(mikan_season_flow_url = mikan_season_flow_url.as_str(), credential_id = credential_id))]
pub async fn scrape_mikan_bangumi_meta_list_from_season_flow_url(
_mikan_client: &MikanClient,
ctx: Arc<dyn AppContextTrait>,
mikan_season_flow_url: Url,
credential_id: i32,
) -> RecorderResult<Vec<MikanBangumiMeta>> {
let stream = scrape_mikan_bangumi_meta_stream_from_season_flow_url(
ctx,
mikan_season_flow_url,
credential_id,
);
pin_mut!(stream);
let bangumi_metas = stream.try_collect().await?;
Ok(bangumi_metas)
}
#[cfg(test)]
mod test {
#![allow(unused_variables)]