Normalize links and images based on post's URL

This commit is contained in:
2024-07-22 11:27:15 -07:00
parent b5468bced2
commit 1106377550
7 changed files with 89 additions and 15 deletions

View File

@@ -7,6 +7,7 @@ use std::{
use async_graphql::connection::{self, Connection, Edge};
use log::info;
use sqlx::postgres::PgPool;
use url::Url;
const TAG_PREFIX: &'static str = "News/";
const THREAD_PREFIX: &'static str = "news:";
@@ -14,6 +15,7 @@ const THREAD_PREFIX: &'static str = "news:";
use crate::{
error::ServerError,
graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary},
sanitize_html,
};
pub fn is_newsreader_search(query: &str) -> bool {
@@ -174,14 +176,54 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
} else {
vec!["unread".to_string(), site.clone()]
};
let default_homepage = "http://no-homepage";
let homepage = Url::parse(
&r.homepage
.map(|h| {
if h.is_empty() {
default_homepage.to_string()
} else {
h
}
})
.unwrap_or(default_homepage.to_string()),
)?;
let link = Url::parse(
&r.link
.as_ref()
.map(|h| {
if h.is_empty() {
default_homepage.to_string()
} else {
h.to_string()
}
})
.unwrap_or(default_homepage.to_string()),
)?;
let addr = r.link.as_ref().map(|link| {
if link.contains('@') {
link.clone()
} else {
if let Ok(url) = homepage.join(&link) {
url.to_string()
} else {
link.clone()
}
}
});
let html = r.summary.unwrap_or("NO SUMMARY".to_string());
// TODO: add site specific cleanups. For example:
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolet
let html = sanitize_html(&html, "", &link)?;
let body = Body::Html(Html {
html: r.summary.unwrap_or("NO SUMMARY".to_string()),
html,
content_tree: "".to_string(),
});
let title = r.title.unwrap_or("NO TITLE".to_string());
let from = Some(Email {
name: r.name,
addr: r.link,
addr: addr.map(|a| a.to_string()),
});
Ok(Thread {
thread_id,