server: strip style & script tags, also handle some retryable errors on slurp
This commit is contained in:
@@ -258,27 +258,28 @@ pub async fn refresh<'ctx>(pool: &PgPool, cacher: &FilesystemCacher) -> Result<b
|
||||
async fn update_search_summary(
|
||||
pool: &PgPool,
|
||||
cacher: &FilesystemCacher,
|
||||
link: Url,
|
||||
link: String,
|
||||
body: String,
|
||||
id: i32,
|
||||
) -> Result<(), ServerError> {
|
||||
let body_transformers: Vec<Box<dyn Transformer>> = vec![
|
||||
Box::new(SlurpContents {
|
||||
cacher,
|
||||
inline_css: true,
|
||||
site_selectors: slurp_contents_selectors(),
|
||||
}),
|
||||
Box::new(StripHtml),
|
||||
];
|
||||
let slurp_contents = SlurpContents {
|
||||
cacher,
|
||||
inline_css: true,
|
||||
site_selectors: slurp_contents_selectors(),
|
||||
};
|
||||
let strip_html = StripHtml;
|
||||
|
||||
info!("adding {link} to search index");
|
||||
let mut body = body;
|
||||
let link = Some(link);
|
||||
for t in body_transformers.iter() {
|
||||
if t.should_run(&link, &body) {
|
||||
body = t.transform(&link, &body).await?;
|
||||
if let Ok(link) = Url::parse(&link) {
|
||||
let link = Some(link);
|
||||
if slurp_contents.should_run(&link, &body) {
|
||||
body = slurp_contents.transform(&link, &body).await?;
|
||||
}
|
||||
} else {
|
||||
error!("failed to parse link: {}", link);
|
||||
}
|
||||
body = strip_html.transform(&None, &body).await?;
|
||||
sqlx::query!(
|
||||
"UPDATE post SET search_summary = $1 WHERE id = $2",
|
||||
body,
|
||||
@@ -294,16 +295,12 @@ pub async fn refresh<'ctx>(pool: &PgPool, cacher: &FilesystemCacher) -> Result<b
|
||||
.await?
|
||||
.into_iter()
|
||||
.filter_map(|r| {
|
||||
let Ok(link) = Url::parse(&r.link) else {
|
||||
error!("failed to parse link: {}", r.link);
|
||||
return None;
|
||||
};
|
||||
let Some(body) = r.clean_summary else {
|
||||
error!("clean_summary missing for {}", r.link);
|
||||
return None;
|
||||
};
|
||||
let id = r.id;
|
||||
Some(update_search_summary(pool, cacher, link, body, id))
|
||||
Some(update_search_summary(pool, cacher, r.link, body, id))
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user