From c693d4e78ae27e4f29bb1e5d93f778de1cb3b97a Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Sun, 19 Jan 2025 17:20:18 -0800 Subject: [PATCH] server: strip html from search index of summaries --- .../20250117225159_strip-html-index.down.sql | 5 +++++ .../migrations/20250117225159_strip-html-index.up.sql | 11 +++++++++++ 2 files changed, 16 insertions(+) create mode 100644 server/migrations/20250117225159_strip-html-index.down.sql create mode 100644 server/migrations/20250117225159_strip-html-index.up.sql diff --git a/server/migrations/20250117225159_strip-html-index.down.sql b/server/migrations/20250117225159_strip-html-index.down.sql new file mode 100644 index 0000000..4b01805 --- /dev/null +++ b/server/migrations/20250117225159_strip-html-index.down.sql @@ -0,0 +1,5 @@ +-- Add down migration script here +DROP INDEX post_summary_idx; +CREATE INDEX post_summary_idx ON post USING gin ( + to_tsvector('english', summary) +); diff --git a/server/migrations/20250117225159_strip-html-index.up.sql b/server/migrations/20250117225159_strip-html-index.up.sql new file mode 100644 index 0000000..4e99e01 --- /dev/null +++ b/server/migrations/20250117225159_strip-html-index.up.sql @@ -0,0 +1,11 @@ +-- Something like this around summary in the idx w/ tsvector +DROP INDEX post_summary_idx; +CREATE INDEX post_summary_idx ON post USING gin (to_tsvector( + 'english', + regexp_replace( + regexp_replace(summary, '<[^>]+>', ' ', 'g'), + '\s+', + ' ', + 'g' + ) +));