server: use fetched contents of news for search index
This commit is contained in:
@@ -19,6 +19,7 @@ use lol_html::{
|
||||
RewriteStrSettings,
|
||||
};
|
||||
use maplit::{hashmap, hashset};
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use sqlx::types::time::PrimitiveDateTime;
|
||||
use thiserror::Error;
|
||||
@@ -105,6 +106,8 @@ impl Transformer for StripHtml {
|
||||
..RewriteStrSettings::default()
|
||||
},
|
||||
)?;
|
||||
let re = Regex::new(r"\s+").expect("failed to parse regex");
|
||||
let text = re.replace_all(&text, " ").to_string();
|
||||
|
||||
Ok(text)
|
||||
}
|
||||
@@ -250,13 +253,13 @@ impl Transformer for AddOutlink {
|
||||
}
|
||||
}
|
||||
|
||||
struct SlurpContents {
|
||||
cacher: Arc<Mutex<FilesystemCacher>>,
|
||||
struct SlurpContents<'c> {
|
||||
cacher: &'c FilesystemCacher,
|
||||
inline_css: bool,
|
||||
site_selectors: HashMap<String, Vec<Selector>>,
|
||||
}
|
||||
|
||||
impl SlurpContents {
|
||||
impl<'c> SlurpContents<'c> {
|
||||
fn get_selectors(&self, link: &Url) -> Option<&[Selector]> {
|
||||
for (host, selector) in self.site_selectors.iter() {
|
||||
if link.host_str().map(|h| h.contains(host)).unwrap_or(false) {
|
||||
@@ -268,7 +271,7 @@ impl SlurpContents {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Transformer for SlurpContents {
|
||||
impl<'c> Transformer for SlurpContents<'c> {
|
||||
fn should_run(&self, link: &Option<Url>, html: &str) -> bool {
|
||||
let mut will_slurp = false;
|
||||
if let Some(link) = link {
|
||||
@@ -294,7 +297,7 @@ impl Transformer for SlurpContents {
|
||||
let Some(selectors) = self.get_selectors(&link) else {
|
||||
return Ok(html.to_string());
|
||||
};
|
||||
let cacher = self.cacher.lock().await;
|
||||
let cacher = self.cacher;
|
||||
let body = if let Some(body) = cacher.get(link.as_str()) {
|
||||
String::from_utf8_lossy(&body).to_string()
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user