server: strip html from search index of summaries

This commit is contained in:
Bill Thiede 2025-01-19 17:20:18 -08:00
parent f90ff72316
commit c693d4e78a
2 changed files with 16 additions and 0 deletions

View File

@ -0,0 +1,5 @@
-- Add down migration script here
DROP INDEX post_summary_idx;
CREATE INDEX post_summary_idx ON post USING gin (
to_tsvector('english', summary)
);

View File

@ -0,0 +1,11 @@
-- Something like this around summary in the idx w/ tsvector
DROP INDEX post_summary_idx;
CREATE INDEX post_summary_idx ON post USING gin (to_tsvector(
'english',
regexp_replace(
regexp_replace(summary, '<[^>]+>', ' ', 'g'),
'\s+',
' ',
'g'
)
));