server: filter out bad urls when indexing search summary

This commit is contained in:
Bill Thiede 2025-01-29 16:53:38 -08:00
parent 2008457911
commit eb4f2d8b5d

View File

@ -2,7 +2,7 @@ use std::collections::HashMap;
use cacher::FilesystemCacher; use cacher::FilesystemCacher;
use futures::{stream::FuturesUnordered, StreamExt}; use futures::{stream::FuturesUnordered, StreamExt};
use log::info; use log::{error, info};
use maplit::hashmap; use maplit::hashmap;
use scraper::Selector; use scraper::Selector;
use shared::compute_color; use shared::compute_color;
@ -293,11 +293,17 @@ pub async fn refresh<'ctx>(pool: &PgPool, cacher: &FilesystemCacher) -> Result<b
.fetch_all(pool) .fetch_all(pool)
.await? .await?
.into_iter() .into_iter()
.map(|r| { .filter_map(|r| {
let link = Url::parse(&r.link).expect("failed to parse link"); let Ok(link) = Url::parse(&r.link) else {
let body = r.clean_summary.unwrap_or("NO SUMMARY".to_string()); error!("failed to parse link: {}", r.link);
return None;
};
let Some(body) = r.clean_summary else {
error!("clean_summary missing for {}", r.link);
return None;
};
let id = r.id; let id = r.id;
update_search_summary(pool, cacher, link, body, id) Some(update_search_summary(pool, cacher, link, body, id))
}) })
.collect(); .collect();