Normalize links and images based on post's URL

This commit is contained in:
2024-07-22 11:27:15 -07:00
parent b5468bced2
commit 1106377550
7 changed files with 89 additions and 15 deletions

View File

@@ -9,6 +9,7 @@ use log::error;
use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings};
use maplit::{hashmap, hashset};
use thiserror::Error;
use url::Url;
#[derive(Error, Debug)]
pub enum SanitizeError {
@@ -46,7 +47,11 @@ pub fn linkify_html(text: &str) -> String {
// html contains the content to be cleaned, and cid_prefix is used to resolve mixed part image
// referrences
pub fn sanitize_html(html: &str, cid_prefix: &str) -> Result<String, SanitizeError> {
pub fn sanitize_html(
html: &str,
cid_prefix: &str,
base_url: &Url,
) -> Result<String, SanitizeError> {
let element_content_handlers = vec![
// Open links in new tab
element!("a[href]", |el| {
@@ -54,6 +59,22 @@ pub fn sanitize_html(html: &str, cid_prefix: &str) -> Result<String, SanitizeErr
Ok(())
}),
// Make links with relative URLs absolute
element!("a[href]", |el| {
if let Some(Ok(href)) = el.get_attribute("href").map(|href| base_url.join(&href)) {
el.set_attribute("href", &href.as_str()).unwrap();
}
Ok(())
}),
// Make images with relative srcs absolute
element!("img[src]", |el| {
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
el.set_attribute("src", &src.as_str()).unwrap();
}
Ok(())
}),
// Replace mixed part CID images with URL
element!("img[src]", |el| {
let src = el