server: add ability to slurp contents from site

This commit is contained in:
Bill Thiede 2024-08-25 19:37:53 -07:00
parent d98d429b5c
commit 71de3ef8ae
4 changed files with 661 additions and 82 deletions

543
Cargo.lock generated
View File

@ -72,7 +72,7 @@ version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e6d1c7838db705c9b756557ee27c384ce695a1c51a6fe528784cb1c6840170" checksum = "64e6d1c7838db705c9b756557ee27c384ce695a1c51a6fe528784cb1c6840170"
dependencies = [ dependencies = [
"html5ever", "html5ever 0.26.0",
"maplit", "maplit",
"once_cell", "once_cell",
"tendril", "tendril",
@ -272,6 +272,12 @@ dependencies = [
"bytemuck", "bytemuck",
] ]
[[package]]
name = "atomic-waker"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]] [[package]]
name = "atomic_hooks" name = "atomic_hooks"
version = "0.1.6" version = "0.1.6"
@ -328,6 +334,12 @@ version = "0.21.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]] [[package]]
name = "base64ct" name = "base64ct"
version = "1.6.0" version = "1.6.0"
@ -586,11 +598,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0250ac93bbccb4f0a892507a4580178edddef5e8267650e294b4fe00597b0da8" checksum = "0250ac93bbccb4f0a892507a4580178edddef5e8267650e294b4fe00597b0da8"
dependencies = [ dependencies = [
"cssparser 0.31.2", "cssparser 0.31.2",
"html5ever", "html5ever 0.26.0",
"indexmap 2.2.6", "indexmap 2.2.6",
"pico-args", "pico-args",
"rayon", "rayon",
"reqwest", "reqwest 0.11.27",
"rustc-hash", "rustc-hash",
"selectors 0.25.0", "selectors 0.25.0",
"smallvec", "smallvec",
@ -838,6 +850,12 @@ dependencies = [
"paste", "paste",
] ]
[[package]]
name = "ego-tree"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642"
[[package]] [[package]]
name = "either" name = "either"
version = "1.13.0" version = "1.13.0"
@ -941,6 +959,21 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foreign-types"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
dependencies = [
"foreign-types-shared",
]
[[package]]
name = "foreign-types-shared"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
[[package]] [[package]]
name = "form_urlencoded" name = "form_urlencoded"
version = "1.2.1" version = "1.2.1"
@ -1102,6 +1135,15 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "getopts"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
dependencies = [
"unicode-width",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.1.16" version = "0.1.16"
@ -1316,6 +1358,25 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "h2"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205"
dependencies = [
"atomic-waker",
"bytes",
"fnv",
"futures-core",
"futures-sink",
"http 1.1.0",
"indexmap 2.2.6",
"slab",
"tokio",
"tokio-util",
"tracing",
]
[[package]] [[package]]
name = "handlebars" name = "handlebars"
version = "4.5.0" version = "4.5.0"
@ -1444,12 +1505,26 @@ checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
dependencies = [ dependencies = [
"log", "log",
"mac", "mac",
"markup5ever", "markup5ever 0.11.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "html5ever"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
dependencies = [
"log",
"mac",
"markup5ever 0.12.1",
"proc-macro2",
"quote",
"syn 2.0.69",
]
[[package]] [[package]]
name = "http" name = "http"
version = "0.2.12" version = "0.2.12"
@ -1483,6 +1558,29 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
] ]
[[package]]
name = "http-body"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
dependencies = [
"bytes",
"http 1.1.0",
]
[[package]]
name = "http-body-util"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
dependencies = [
"bytes",
"futures-util",
"http 1.1.0",
"http-body 1.0.1",
"pin-project-lite",
]
[[package]] [[package]]
name = "httparse" name = "httparse"
version = "1.9.4" version = "1.9.4"
@ -1511,9 +1609,9 @@ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
"futures-util", "futures-util",
"h2", "h2 0.3.26",
"http 0.2.12", "http 0.2.12",
"http-body", "http-body 0.4.6",
"httparse", "httparse",
"httpdate", "httpdate",
"itoa 1.0.11", "itoa 1.0.11",
@ -1525,6 +1623,26 @@ dependencies = [
"want", "want",
] ]
[[package]]
name = "hyper"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"h2 0.4.6",
"http 1.1.0",
"http-body 1.0.1",
"httparse",
"itoa 1.0.11",
"pin-project-lite",
"smallvec",
"tokio",
"want",
]
[[package]] [[package]]
name = "hyper-rustls" name = "hyper-rustls"
version = "0.24.2" version = "0.24.2"
@ -1533,10 +1651,63 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
dependencies = [ dependencies = [
"futures-util", "futures-util",
"http 0.2.12", "http 0.2.12",
"hyper", "hyper 0.14.29",
"rustls", "rustls 0.21.12",
"tokio", "tokio",
"tokio-rustls", "tokio-rustls 0.24.1",
]
[[package]]
name = "hyper-rustls"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155"
dependencies = [
"futures-util",
"http 1.1.0",
"hyper 1.4.1",
"hyper-util",
"rustls 0.23.12",
"rustls-pki-types",
"tokio",
"tokio-rustls 0.26.0",
"tower-service",
]
[[package]]
name = "hyper-tls"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
dependencies = [
"bytes",
"http-body-util",
"hyper 1.4.1",
"hyper-util",
"native-tls",
"tokio",
"tokio-native-tls",
"tower-service",
]
[[package]]
name = "hyper-util"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"http 1.1.0",
"http-body 1.0.1",
"hyper 1.4.1",
"pin-project-lite",
"socket2",
"tokio",
"tower",
"tower-service",
"tracing",
] ]
[[package]] [[package]]
@ -1855,6 +2026,20 @@ dependencies = [
"tendril", "tendril",
] ]
[[package]]
name = "markup5ever"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
dependencies = [
"log",
"phf 0.11.2",
"phf_codegen 0.11.2",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]] [[package]]
name = "matchers" name = "matchers"
version = "0.1.0" version = "0.1.0"
@ -1974,6 +2159,23 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "native-tls"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466"
dependencies = [
"libc",
"log",
"openssl",
"openssl-probe",
"openssl-sys",
"schannel",
"security-framework",
"security-framework-sys",
"tempfile",
]
[[package]] [[package]]
name = "new_debug_unreachable" name = "new_debug_unreachable"
version = "1.0.6" version = "1.0.6"
@ -2100,6 +2302,50 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "openssl"
version = "0.10.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1"
dependencies = [
"bitflags 2.6.0",
"cfg-if",
"foreign-types",
"libc",
"once_cell",
"openssl-macros",
"openssl-sys",
]
[[package]]
name = "openssl-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.69",
]
[[package]]
name = "openssl-probe"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-sys"
version = "0.9.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]] [[package]]
name = "owning_ref" name = "owning_ref"
version = "0.4.1" version = "0.4.1"
@ -2296,6 +2542,16 @@ dependencies = [
"phf_shared 0.10.0", "phf_shared 0.10.0",
] ]
[[package]]
name = "phf_codegen"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
dependencies = [
"phf_generator 0.11.2",
"phf_shared 0.11.2",
]
[[package]] [[package]]
name = "phf_generator" name = "phf_generator"
version = "0.8.0" version = "0.8.0"
@ -2760,11 +3016,11 @@ dependencies = [
"encoding_rs", "encoding_rs",
"futures-core", "futures-core",
"futures-util", "futures-util",
"h2", "h2 0.3.26",
"http 0.2.12", "http 0.2.12",
"http-body", "http-body 0.4.6",
"hyper", "hyper 0.14.29",
"hyper-rustls", "hyper-rustls 0.24.2",
"ipnet", "ipnet",
"js-sys", "js-sys",
"log", "log",
@ -2772,15 +3028,15 @@ dependencies = [
"once_cell", "once_cell",
"percent-encoding", "percent-encoding",
"pin-project-lite", "pin-project-lite",
"rustls", "rustls 0.21.12",
"rustls-pemfile", "rustls-pemfile 1.0.4",
"serde", "serde",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
"sync_wrapper", "sync_wrapper 0.1.2",
"system-configuration", "system-configuration 0.5.1",
"tokio", "tokio",
"tokio-rustls", "tokio-rustls 0.24.1",
"tower-service", "tower-service",
"url", "url",
"wasm-bindgen", "wasm-bindgen",
@ -2790,6 +3046,50 @@ dependencies = [
"winreg", "winreg",
] ]
[[package]]
name = "reqwest"
version = "0.12.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63"
dependencies = [
"base64 0.22.1",
"bytes",
"encoding_rs",
"futures-channel",
"futures-core",
"futures-util",
"h2 0.4.6",
"http 1.1.0",
"http-body 1.0.1",
"http-body-util",
"hyper 1.4.1",
"hyper-rustls 0.27.2",
"hyper-tls",
"hyper-util",
"ipnet",
"js-sys",
"log",
"mime",
"native-tls",
"once_cell",
"percent-encoding",
"pin-project-lite",
"rustls-pemfile 2.1.3",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper 1.0.1",
"system-configuration 0.6.1",
"tokio",
"tokio-native-tls",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"windows-registry",
]
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.8" version = "0.17.8"
@ -2887,7 +3187,7 @@ dependencies = [
"either", "either",
"futures", "futures",
"http 0.2.12", "http 0.2.12",
"hyper", "hyper 0.14.29",
"indexmap 2.2.6", "indexmap 2.2.6",
"log", "log",
"memchr", "memchr",
@ -2966,10 +3266,23 @@ checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
dependencies = [ dependencies = [
"log", "log",
"ring", "ring",
"rustls-webpki", "rustls-webpki 0.101.7",
"sct", "sct",
] ]
[[package]]
name = "rustls"
version = "0.23.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044"
dependencies = [
"once_cell",
"rustls-pki-types",
"rustls-webpki 0.102.6",
"subtle",
"zeroize",
]
[[package]] [[package]]
name = "rustls-pemfile" name = "rustls-pemfile"
version = "1.0.4" version = "1.0.4"
@ -2979,6 +3292,22 @@ dependencies = [
"base64 0.21.7", "base64 0.21.7",
] ]
[[package]]
name = "rustls-pemfile"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "196fe16b00e106300d3e45ecfcb764fa292a535d7326a29a5875c579c7417425"
dependencies = [
"base64 0.22.1",
"rustls-pki-types",
]
[[package]]
name = "rustls-pki-types"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0"
[[package]] [[package]]
name = "rustls-webpki" name = "rustls-webpki"
version = "0.101.7" version = "0.101.7"
@ -2989,6 +3318,17 @@ dependencies = [
"untrusted", "untrusted",
] ]
[[package]]
name = "rustls-webpki"
version = "0.102.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e"
dependencies = [
"ring",
"rustls-pki-types",
"untrusted",
]
[[package]] [[package]]
name = "rustversion" name = "rustversion"
version = "1.0.17" version = "1.0.17"
@ -3001,6 +3341,15 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "schannel"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534"
dependencies = [
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "scoped-tls" name = "scoped-tls"
version = "1.0.1" version = "1.0.1"
@ -3013,6 +3362,22 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b90460b31bfe1fc07be8262e42c665ad97118d4585869de9345a84d501a9eaf0"
dependencies = [
"ahash 0.8.11",
"cssparser 0.31.2",
"ego-tree",
"getopts",
"html5ever 0.27.0",
"once_cell",
"selectors 0.25.0",
"tendril",
]
[[package]] [[package]]
name = "sct" name = "sct"
version = "0.7.1" version = "0.7.1"
@ -3023,6 +3388,29 @@ dependencies = [
"untrusted", "untrusted",
] ]
[[package]]
name = "security-framework"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags 2.6.0",
"core-foundation",
"core-foundation-sys",
"libc",
"security-framework-sys",
]
[[package]]
name = "security-framework-sys"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75da29fe9b9b08fe9d6b22b5b4bcbc75d8db3aa31e639aa56bb62e9d46bfceaf"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]] [[package]]
name = "seed" name = "seed"
version = "0.10.0" version = "0.10.0"
@ -3175,6 +3563,7 @@ dependencies = [
"anyhow", "anyhow",
"async-graphql", "async-graphql",
"async-graphql-rocket", "async-graphql-rocket",
"async-trait",
"css-inline", "css-inline",
"glog", "glog",
"html-escape", "html-escape",
@ -3185,8 +3574,10 @@ dependencies = [
"maplit", "maplit",
"memmap", "memmap",
"notmuch", "notmuch",
"reqwest 0.12.7",
"rocket", "rocket",
"rocket_cors", "rocket_cors",
"scraper",
"serde", "serde",
"serde_json", "serde_json",
"shared", "shared",
@ -3676,6 +4067,15 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
[[package]]
name = "sync_wrapper"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
dependencies = [
"futures-core",
]
[[package]] [[package]]
name = "system-configuration" name = "system-configuration"
version = "0.5.1" version = "0.5.1"
@ -3684,7 +4084,18 @@ checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
dependencies = [ dependencies = [
"bitflags 1.2.1", "bitflags 1.2.1",
"core-foundation", "core-foundation",
"system-configuration-sys", "system-configuration-sys 0.5.0",
]
[[package]]
name = "system-configuration"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [
"bitflags 2.6.0",
"core-foundation",
"system-configuration-sys 0.6.0",
] ]
[[package]] [[package]]
@ -3697,6 +4108,16 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "system-configuration-sys"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.10.1" version = "3.10.1"
@ -3839,13 +4260,34 @@ dependencies = [
"syn 2.0.69", "syn 2.0.69",
] ]
[[package]]
name = "tokio-native-tls"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
dependencies = [
"native-tls",
"tokio",
]
[[package]] [[package]]
name = "tokio-rustls" name = "tokio-rustls"
version = "0.24.1" version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
dependencies = [ dependencies = [
"rustls", "rustls 0.21.12",
"tokio",
]
[[package]]
name = "tokio-rustls"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
dependencies = [
"rustls 0.23.12",
"rustls-pki-types",
"tokio", "tokio",
] ]
@ -3942,6 +4384,27 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "tower"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
dependencies = [
"futures-core",
"futures-util",
"pin-project",
"pin-project-lite",
"tokio",
"tower-layer",
"tower-service",
]
[[package]]
name = "tower-layer"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
[[package]] [[package]]
name = "tower-service" name = "tower-service"
version = "0.3.2" version = "0.3.2"
@ -4099,6 +4562,12 @@ version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
[[package]]
name = "unicode-width"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d"
[[package]] [[package]]
name = "unicode-xid" name = "unicode-xid"
version = "0.2.4" version = "0.2.4"
@ -4393,6 +4862,36 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "windows-registry"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
dependencies = [
"windows-result",
"windows-strings",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-result"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-strings"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
dependencies = [
"windows-result",
"windows-targets 0.52.6",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"

View File

@ -11,6 +11,7 @@ ammonia = "3.3.0"
anyhow = "1.0.79" anyhow = "1.0.79"
async-graphql = { version = "6.0.11", features = ["log"] } async-graphql = { version = "6.0.11", features = ["log"] }
async-graphql-rocket = "6.0.11" async-graphql-rocket = "6.0.11"
async-trait = "0.1.81"
css-inline = "0.13.0" css-inline = "0.13.0"
glog = "0.1.0" glog = "0.1.0"
html-escape = "0.2.13" html-escape = "0.2.13"
@ -21,8 +22,10 @@ mailparse = "0.15.0"
maplit = "1.0.2" maplit = "1.0.2"
memmap = "0.7.0" memmap = "0.7.0"
notmuch = { path = "../notmuch" } notmuch = { path = "../notmuch" }
reqwest = { version = "0.12.7", features = ["blocking"] }
rocket = { version = "0.5.0-rc.2", features = [ "json" ] } rocket = { version = "0.5.0-rc.2", features = [ "json" ] }
rocket_cors = "0.6.0" rocket_cors = "0.6.0"
scraper = "0.20.0"
serde = { version = "1.0.147", features = ["derive"] } serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.87" serde_json = "1.0.87"
shared = { path = "../shared" } shared = { path = "../shared" }

View File

@ -3,13 +3,15 @@ pub mod graphql;
pub mod newsreader; pub mod newsreader;
pub mod nm; pub mod nm;
use std::{convert::Infallible, str::FromStr}; use std::{collections::HashMap, convert::Infallible, str::FromStr};
use async_trait::async_trait;
use css_inline::{CSSInliner, InlineError, InlineOptions}; use css_inline::{CSSInliner, InlineError, InlineOptions};
use linkify::{LinkFinder, LinkKind}; use linkify::{LinkFinder, LinkKind};
use log::{error, info}; use log::{error, info, warn};
use lol_html::{element, errors::RewritingError, rewrite_str, text, RewriteStrSettings}; use lol_html::{element, errors::RewritingError, rewrite_str, text, RewriteStrSettings};
use maplit::{hashmap, hashset}; use maplit::{hashmap, hashset};
use scraper::{error::SelectorErrorKind, Html, Selector};
use thiserror::Error; use thiserror::Error;
use url::Url; use url::Url;
@ -19,23 +21,28 @@ use crate::newsreader::{
const NON_EXISTENT_SITE_NAME: &'static str = "NO-SUCH-SITE"; const NON_EXISTENT_SITE_NAME: &'static str = "NO-SUCH-SITE";
// TODO: figure out how to use Cow // TODO: figure out how to use Cow
trait Transformer { #[async_trait]
fn should_run(&self, _html: &str) -> bool { trait Transformer: Send + Sync {
fn should_run(&self, addr: &Option<Url>, _html: &str) -> bool {
true true
} }
// TODO: should html be something like `html_escape` uses: // TODO: should html be something like `html_escape` uses:
// <S: ?Sized + AsRef<str>>(text: &S) -> Cow<str> // <S: ?Sized + AsRef<str>>(text: &S) -> Cow<str>
fn transform(&self, html: &str) -> Result<String, TransformError>; async fn transform(&self, addr: &Option<Url>, html: &str) -> Result<String, TransformError>;
} }
// TODO: how would we make this more generic to allow good implementations of Transformer outside // TODO: how would we make this more generic to allow good implementations of Transformer outside
// of this module? // of this module?
#[derive(Error, Debug)] #[derive(Error, Debug)]
pub enum TransformError { pub enum TransformError {
#[error("lol-html rewrite error")] #[error("lol-html rewrite error: {0}")]
RewritingError(#[from] RewritingError), RewritingError(#[from] RewritingError),
#[error("css inline error")] #[error("css inline error: {0}")]
InlineError(#[from] InlineError), InlineError(#[from] InlineError),
#[error("failed to fetch url error: {0}")]
ReqwestError(#[from] reqwest::Error),
#[error("failed to parse HTML: {0}")]
HtmlParsingError(String),
} }
struct SanitizeHtml<'a> { struct SanitizeHtml<'a> {
@ -43,31 +50,34 @@ struct SanitizeHtml<'a> {
base_url: &'a Option<Url>, base_url: &'a Option<Url>,
} }
#[async_trait]
impl<'a> Transformer for SanitizeHtml<'a> { impl<'a> Transformer for SanitizeHtml<'a> {
fn transform(&self, html: &str) -> Result<String, TransformError> { async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
Ok(sanitize_html(html, self.cid_prefix, self.base_url)?) Ok(sanitize_html(html, self.cid_prefix, self.base_url)?)
} }
} }
struct EscapeHtml; struct EscapeHtml;
#[async_trait]
impl Transformer for EscapeHtml { impl Transformer for EscapeHtml {
fn should_run(&self, html: &str) -> bool { fn should_run(&self, _: &Option<Url>, html: &str) -> bool {
html.contains("&") html.contains("&")
} }
fn transform(&self, html: &str) -> Result<String, TransformError> { async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
Ok(html_escape::decode_html_entities(html).to_string()) Ok(html_escape::decode_html_entities(html).to_string())
} }
} }
struct StripHtml; struct StripHtml;
#[async_trait]
impl Transformer for StripHtml { impl Transformer for StripHtml {
fn should_run(&self, html: &str) -> bool { fn should_run(&self, _: &Option<Url>, html: &str) -> bool {
// Lame test // Lame test
html.contains("<") html.contains("<")
} }
fn transform(&self, html: &str) -> Result<String, TransformError> { async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
let mut text = String::new(); let mut text = String::new();
let element_content_handlers = vec![text!("*", |t| { let element_content_handlers = vec![text!("*", |t| {
text += t.as_str(); text += t.as_str();
@ -87,8 +97,9 @@ impl Transformer for StripHtml {
struct InlineStyle; struct InlineStyle;
#[async_trait]
impl Transformer for InlineStyle { impl Transformer for InlineStyle {
fn transform(&self, html: &str) -> Result<String, TransformError> { async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
let css = concat!( let css = concat!(
"/* chrome-default.css */\n", "/* chrome-default.css */\n",
include_str!("chrome-default.css"), include_str!("chrome-default.css"),
@ -118,29 +129,78 @@ impl Transformer for InlineStyle {
} }
} }
struct AddOutlink(Option<url::Url>); struct AddOutlink;
#[async_trait]
impl Transformer for AddOutlink { impl Transformer for AddOutlink {
fn should_run(&self, html: &str) -> bool { fn should_run(&self, link: &Option<Url>, html: &str) -> bool {
if let Some(link) = &self.0 { if let Some(link) = link {
return link.scheme().starts_with("http") && !html.contains(link.as_str()); link.scheme().starts_with("http") && !html.contains(link.as_str())
} else {
false
} }
false
} }
fn transform(&self, html: &str) -> Result<String, TransformError> { async fn transform(&self, link: &Option<Url>, html: &str) -> Result<String, TransformError> {
if let Some(url) = &self.0 { if let Some(link) = link {
Ok(format!( Ok(format!(
r#" r#"
{html} {html}
<div><a href="{}">View on site</a></div> <div><a href="{}">View on site</a></div>
"#, "#,
url link
)) ))
} else { } else {
Ok(html.to_string()) Ok(html.to_string())
} }
} }
} }
struct SlurpContents {
site_selectors: HashMap<String, Vec<Selector>>,
}
impl SlurpContents {
fn get_selectors(&self, link: &Url) -> Option<&[Selector]> {
for (host, selector) in self.site_selectors.iter() {
if link.host_str().map(|h| h.contains(host)).unwrap_or(false) {
return Some(&selector);
}
}
None
}
}
#[async_trait]
impl Transformer for SlurpContents {
fn should_run(&self, link: &Option<Url>, html: &str) -> bool {
if let Some(link) = link {
return self.get_selectors(link).is_some();
}
false
}
async fn transform(&self, link: &Option<Url>, html: &str) -> Result<String, TransformError> {
let Some(link) = link else {
return Ok(html.to_string());
};
let Some(selectors) = self.get_selectors(&link) else {
return Ok(html.to_string());
};
let body = reqwest::get(link.as_str()).await?.text().await?;
let doc = Html::parse_document(&body);
let mut results = Vec::new();
for selector in selectors {
if let Some(frag) = doc.select(&selector).next() {
results.push(frag.html())
} else {
warn!("couldn't find '{:?}' in {}", selector, link);
return Ok(html.to_string());
}
}
Ok(results.join("<br><br>"))
}
}
pub fn linkify_html(text: &str) -> String { pub fn linkify_html(text: &str) -> String {
let mut finder = LinkFinder::new(); let mut finder = LinkFinder::new();
let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]); let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]);

View File

@ -1,6 +1,8 @@
use std::hash::{DefaultHasher, Hash, Hasher}; use std::hash::{DefaultHasher, Hash, Hasher};
use log::info; use log::info;
use maplit::hashmap;
use scraper::Selector;
use sqlx::postgres::PgPool; use sqlx::postgres::PgPool;
use url::Url; use url::Url;
@ -13,7 +15,7 @@ use crate::{
compute_offset_limit, compute_offset_limit,
error::ServerError, error::ServerError,
graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary}, graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary},
AddOutlink, EscapeHtml, InlineStyle, SanitizeHtml, StripHtml, Transformer, AddOutlink, EscapeHtml, InlineStyle, SanitizeHtml, SlurpContents, StripHtml, Transformer,
}; };
pub fn is_newsreader_search(query: &str) -> bool { pub fn is_newsreader_search(query: &str) -> bool {
@ -89,36 +91,34 @@ pub async fn search(
.fetch_all(pool) .fetch_all(pool)
.await?; .await?;
Ok(rows let mut res = Vec::new();
.into_iter() for (i, r) in rows.into_iter().enumerate() {
.enumerate() let site = r.site.unwrap_or("UNKOWN TAG".to_string());
.map(|(i, r)| { let mut tags = vec![format!("{TAG_PREFIX}{site}")];
let site = r.site.unwrap_or("UNKOWN TAG".to_string()); if !r.is_read.unwrap_or(true) {
let mut tags = vec![format!("{TAG_PREFIX}{site}")]; tags.push("unread".to_string());
if !r.is_read.unwrap_or(true) { };
tags.push("unread".to_string()); let mut title = r.title.unwrap_or("NO TITLE".to_string());
}; title = clean_title(&title).await.expect("failed to clean title");
let mut title = r.title.unwrap_or("NO TITLE".to_string()); res.push((
title = clean_title(&title).expect("failed to clean title"); i as i32 + offset,
( ThreadSummary {
i as i32 + offset, thread: format!("{THREAD_PREFIX}{}", r.uid),
ThreadSummary { timestamp: r
thread: format!("{THREAD_PREFIX}{}", r.uid), .date
timestamp: r .expect("post missing date")
.date .assume_utc()
.expect("post missing date") .unix_timestamp() as isize,
.assume_utc() date_relative: "TODO date_relative".to_string(),
.unix_timestamp() as isize, matched: 0,
date_relative: "TODO date_relative".to_string(), total: 1,
matched: 0, authors: r.name.unwrap_or_else(|| site.clone()),
total: 1, subject: title,
authors: r.name.unwrap_or_else(|| site.clone()), tags,
subject: title, },
tags, ));
}, }
) Ok(res)
})
.collect())
} }
pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, ServerError> { pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, ServerError> {
@ -197,8 +197,25 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
// TODO: add site specific cleanups. For example: // TODO: add site specific cleanups. For example:
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div> // * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent // * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
let mut body_tranformers: Vec<Box<dyn Transformer>> = vec![ let body_tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(AddOutlink(link.clone())), // TODO: add a map of urls and selectors
Box::new(SlurpContents {
site_selectors: hashmap![
"hackaday.com".to_string() => vec![
Selector::parse("div.entry-featured-image").unwrap(),
Selector::parse("div.entry-content").unwrap()
],
"mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()],
"natwelch.com".to_string() => vec![
Selector::parse("article div.prose").unwrap(),
],
"slashdot.org".to_string() => vec![
Selector::parse("span.story-byline").unwrap(),
Selector::parse("div.p").unwrap(),
],
],
}),
Box::new(AddOutlink),
Box::new(EscapeHtml), Box::new(EscapeHtml),
Box::new(InlineStyle), Box::new(InlineStyle),
Box::new(SanitizeHtml { Box::new(SanitizeHtml {
@ -207,15 +224,15 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
}), }),
]; ];
for t in body_tranformers.iter() { for t in body_tranformers.iter() {
if t.should_run(&body) { if t.should_run(&link, &body) {
body = t.transform(&body)?; body = t.transform(&link, &body).await?;
} }
} }
let body = Body::Html(Html { let body = Body::Html(Html {
html: body, html: body,
content_tree: "".to_string(), content_tree: "".to_string(),
}); });
let title = clean_title(&r.title.unwrap_or("NO TITLE".to_string()))?; let title = clean_title(&r.title.unwrap_or("NO TITLE".to_string())).await?;
let from = Some(Email { let from = Some(Email {
name: r.name, name: r.name,
addr: addr.map(|a| a.to_string()), addr: addr.map(|a| a.to_string()),
@ -254,7 +271,7 @@ pub async fn set_read_status<'ctx>(
.await?; .await?;
Ok(true) Ok(true)
} }
fn clean_title(title: &str) -> Result<String, ServerError> { async fn clean_title(title: &str) -> Result<String, ServerError> {
// Make title HTML so html parsers work // Make title HTML so html parsers work
let mut title = format!("<html>{title}</html>"); let mut title = format!("<html>{title}</html>");
let title_tranformers: Vec<Box<dyn Transformer>> = let title_tranformers: Vec<Box<dyn Transformer>> =
@ -262,8 +279,8 @@ fn clean_title(title: &str) -> Result<String, ServerError> {
// Make title HTML so html parsers work // Make title HTML so html parsers work
title = format!("<html>{title}</html>"); title = format!("<html>{title}</html>");
for t in title_tranformers.iter() { for t in title_tranformers.iter() {
if t.should_run(&title) { if t.should_run(&None, &title) {
title = t.transform(&title)?; title = t.transform(&None, &title).await?;
} }
} }
Ok(title) Ok(title)