diff --git a/Cargo.lock b/Cargo.lock index 6ddaa30..7364fd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -72,7 +72,7 @@ version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64e6d1c7838db705c9b756557ee27c384ce695a1c51a6fe528784cb1c6840170" dependencies = [ - "html5ever", + "html5ever 0.26.0", "maplit", "once_cell", "tendril", @@ -272,6 +272,12 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "atomic_hooks" version = "0.1.6" @@ -328,6 +334,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "base64ct" version = "1.6.0" @@ -586,11 +598,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0250ac93bbccb4f0a892507a4580178edddef5e8267650e294b4fe00597b0da8" dependencies = [ "cssparser 0.31.2", - "html5ever", + "html5ever 0.26.0", "indexmap 2.2.6", "pico-args", "rayon", - "reqwest", + "reqwest 0.11.27", "rustc-hash", "selectors 0.25.0", "smallvec", @@ -838,6 +850,12 @@ dependencies = [ "paste", ] +[[package]] +name = "ego-tree" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642" + [[package]] name = "either" version = "1.13.0" @@ -941,6 +959,21 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1102,6 +1135,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -1316,6 +1358,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "h2" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", + "indexmap 2.2.6", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "handlebars" version = "4.5.0" @@ -1444,12 +1505,26 @@ checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" dependencies = [ "log", "mac", - "markup5ever", + "markup5ever 0.11.0", "proc-macro2", "quote", "syn 1.0.109", ] +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever 0.12.1", + "proc-macro2", + "quote", + "syn 2.0.69", +] + [[package]] name = "http" version = "0.2.12" @@ -1483,6 +1558,29 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "pin-project-lite", +] + [[package]] name = "httparse" version = "1.9.4" @@ -1511,9 +1609,9 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", + "h2 0.3.26", "http 0.2.12", - "http-body", + "http-body 0.4.6", "httparse", "httpdate", "itoa 1.0.11", @@ -1525,6 +1623,26 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "httparse", + "itoa 1.0.11", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -1533,10 +1651,63 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.12", - "hyper", - "rustls", + "hyper 0.14.29", + "rustls 0.21.12", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +dependencies = [ + "futures-util", + "http 1.1.0", + "hyper 1.4.1", + "hyper-util", + "rustls 0.23.12", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.0", + "tower-service", +] + +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper 1.4.1", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "hyper 1.4.1", + "pin-project-lite", + "socket2", + "tokio", + "tower", + "tower-service", + "tracing", ] [[package]] @@ -1855,6 +2026,20 @@ dependencies = [ "tendril", ] +[[package]] +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf 0.11.2", + "phf_codegen 0.11.2", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "matchers" version = "0.1.0" @@ -1974,6 +2159,23 @@ dependencies = [ "version_check", ] +[[package]] +name = "native-tls" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -2100,6 +2302,50 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "openssl" +version = "0.10.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.69", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "owning_ref" version = "0.4.1" @@ -2296,6 +2542,16 @@ dependencies = [ "phf_shared 0.10.0", ] +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + [[package]] name = "phf_generator" version = "0.8.0" @@ -2760,11 +3016,11 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2", + "h2 0.3.26", "http 0.2.12", - "http-body", - "hyper", - "hyper-rustls", + "http-body 0.4.6", + "hyper 0.14.29", + "hyper-rustls 0.24.2", "ipnet", "js-sys", "log", @@ -2772,15 +3028,15 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", - "rustls-pemfile", + "rustls 0.21.12", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", - "system-configuration", + "sync_wrapper 0.1.2", + "system-configuration 0.5.1", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", "tower-service", "url", "wasm-bindgen", @@ -2790,6 +3046,50 @@ dependencies = [ "winreg", ] +[[package]] +name = "reqwest" +version = "0.12.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" +dependencies = [ + "base64 0.22.1", + "bytes", + "encoding_rs", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.4.1", + "hyper-rustls 0.27.2", + "hyper-tls", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile 2.1.3", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 1.0.1", + "system-configuration 0.6.1", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "windows-registry", +] + [[package]] name = "ring" version = "0.17.8" @@ -2887,7 +3187,7 @@ dependencies = [ "either", "futures", "http 0.2.12", - "hyper", + "hyper 0.14.29", "indexmap 2.2.6", "log", "memchr", @@ -2966,10 +3266,23 @@ checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" dependencies = [ "log", "ring", - "rustls-webpki", + "rustls-webpki 0.101.7", "sct", ] +[[package]] +name = "rustls" +version = "0.23.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" +dependencies = [ + "once_cell", + "rustls-pki-types", + "rustls-webpki 0.102.6", + "subtle", + "zeroize", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -2979,6 +3292,22 @@ dependencies = [ "base64 0.21.7", ] +[[package]] +name = "rustls-pemfile" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "196fe16b00e106300d3e45ecfcb764fa292a535d7326a29a5875c579c7417425" +dependencies = [ + "base64 0.22.1", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -2989,6 +3318,17 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustls-webpki" +version = "0.102.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.17" @@ -3001,6 +3341,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "scoped-tls" version = "1.0.1" @@ -3013,6 +3362,22 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b90460b31bfe1fc07be8262e42c665ad97118d4585869de9345a84d501a9eaf0" +dependencies = [ + "ahash 0.8.11", + "cssparser 0.31.2", + "ego-tree", + "getopts", + "html5ever 0.27.0", + "once_cell", + "selectors 0.25.0", + "tendril", +] + [[package]] name = "sct" version = "0.7.1" @@ -3023,6 +3388,29 @@ dependencies = [ "untrusted", ] +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.6.0", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75da29fe9b9b08fe9d6b22b5b4bcbc75d8db3aa31e639aa56bb62e9d46bfceaf" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "seed" version = "0.10.0" @@ -3175,6 +3563,7 @@ dependencies = [ "anyhow", "async-graphql", "async-graphql-rocket", + "async-trait", "css-inline", "glog", "html-escape", @@ -3185,8 +3574,10 @@ dependencies = [ "maplit", "memmap", "notmuch", + "reqwest 0.12.7", "rocket", "rocket_cors", + "scraper", "serde", "serde_json", "shared", @@ -3676,6 +4067,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "sync_wrapper" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -3684,7 +4084,18 @@ checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" dependencies = [ "bitflags 1.2.1", "core-foundation", - "system-configuration-sys", + "system-configuration-sys 0.5.0", +] + +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags 2.6.0", + "core-foundation", + "system-configuration-sys 0.6.0", ] [[package]] @@ -3697,6 +4108,16 @@ dependencies = [ "libc", ] +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tempfile" version = "3.10.1" @@ -3839,13 +4260,34 @@ dependencies = [ "syn 2.0.69", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls", + "rustls 0.21.12", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls 0.23.12", + "rustls-pki-types", "tokio", ] @@ -3942,6 +4384,27 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + [[package]] name = "tower-service" version = "0.3.2" @@ -4099,6 +4562,12 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +[[package]] +name = "unicode-width" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" + [[package]] name = "unicode-xid" version = "0.2.4" @@ -4393,6 +4862,36 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/server/Cargo.toml b/server/Cargo.toml index e76de5d..eebf166 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -11,6 +11,7 @@ ammonia = "3.3.0" anyhow = "1.0.79" async-graphql = { version = "6.0.11", features = ["log"] } async-graphql-rocket = "6.0.11" +async-trait = "0.1.81" css-inline = "0.13.0" glog = "0.1.0" html-escape = "0.2.13" @@ -21,8 +22,10 @@ mailparse = "0.15.0" maplit = "1.0.2" memmap = "0.7.0" notmuch = { path = "../notmuch" } +reqwest = { version = "0.12.7", features = ["blocking"] } rocket = { version = "0.5.0-rc.2", features = [ "json" ] } rocket_cors = "0.6.0" +scraper = "0.20.0" serde = { version = "1.0.147", features = ["derive"] } serde_json = "1.0.87" shared = { path = "../shared" } diff --git a/server/src/lib.rs b/server/src/lib.rs index 9e84d05..6c339f8 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -3,13 +3,15 @@ pub mod graphql; pub mod newsreader; pub mod nm; -use std::{convert::Infallible, str::FromStr}; +use std::{collections::HashMap, convert::Infallible, str::FromStr}; +use async_trait::async_trait; use css_inline::{CSSInliner, InlineError, InlineOptions}; use linkify::{LinkFinder, LinkKind}; -use log::{error, info}; +use log::{error, info, warn}; use lol_html::{element, errors::RewritingError, rewrite_str, text, RewriteStrSettings}; use maplit::{hashmap, hashset}; +use scraper::{error::SelectorErrorKind, Html, Selector}; use thiserror::Error; use url::Url; @@ -19,23 +21,28 @@ use crate::newsreader::{ const NON_EXISTENT_SITE_NAME: &'static str = "NO-SUCH-SITE"; // TODO: figure out how to use Cow -trait Transformer { - fn should_run(&self, _html: &str) -> bool { +#[async_trait] +trait Transformer: Send + Sync { + fn should_run(&self, addr: &Option, _html: &str) -> bool { true } // TODO: should html be something like `html_escape` uses: // >(text: &S) -> Cow - fn transform(&self, html: &str) -> Result; + async fn transform(&self, addr: &Option, html: &str) -> Result; } // TODO: how would we make this more generic to allow good implementations of Transformer outside // of this module? #[derive(Error, Debug)] pub enum TransformError { - #[error("lol-html rewrite error")] + #[error("lol-html rewrite error: {0}")] RewritingError(#[from] RewritingError), - #[error("css inline error")] + #[error("css inline error: {0}")] InlineError(#[from] InlineError), + #[error("failed to fetch url error: {0}")] + ReqwestError(#[from] reqwest::Error), + #[error("failed to parse HTML: {0}")] + HtmlParsingError(String), } struct SanitizeHtml<'a> { @@ -43,31 +50,34 @@ struct SanitizeHtml<'a> { base_url: &'a Option, } +#[async_trait] impl<'a> Transformer for SanitizeHtml<'a> { - fn transform(&self, html: &str) -> Result { + async fn transform(&self, _: &Option, html: &str) -> Result { Ok(sanitize_html(html, self.cid_prefix, self.base_url)?) } } struct EscapeHtml; +#[async_trait] impl Transformer for EscapeHtml { - fn should_run(&self, html: &str) -> bool { + fn should_run(&self, _: &Option, html: &str) -> bool { html.contains("&") } - fn transform(&self, html: &str) -> Result { + async fn transform(&self, _: &Option, html: &str) -> Result { Ok(html_escape::decode_html_entities(html).to_string()) } } struct StripHtml; +#[async_trait] impl Transformer for StripHtml { - fn should_run(&self, html: &str) -> bool { + fn should_run(&self, _: &Option, html: &str) -> bool { // Lame test html.contains("<") } - fn transform(&self, html: &str) -> Result { + async fn transform(&self, _: &Option, html: &str) -> Result { let mut text = String::new(); let element_content_handlers = vec![text!("*", |t| { text += t.as_str(); @@ -87,8 +97,9 @@ impl Transformer for StripHtml { struct InlineStyle; +#[async_trait] impl Transformer for InlineStyle { - fn transform(&self, html: &str) -> Result { + async fn transform(&self, _: &Option, html: &str) -> Result { let css = concat!( "/* chrome-default.css */\n", include_str!("chrome-default.css"), @@ -118,29 +129,78 @@ impl Transformer for InlineStyle { } } -struct AddOutlink(Option); +struct AddOutlink; +#[async_trait] impl Transformer for AddOutlink { - fn should_run(&self, html: &str) -> bool { - if let Some(link) = &self.0 { - return link.scheme().starts_with("http") && !html.contains(link.as_str()); + fn should_run(&self, link: &Option, html: &str) -> bool { + if let Some(link) = link { + link.scheme().starts_with("http") && !html.contains(link.as_str()) + } else { + false } - false } - fn transform(&self, html: &str) -> Result { - if let Some(url) = &self.0 { + async fn transform(&self, link: &Option, html: &str) -> Result { + if let Some(link) = link { Ok(format!( r#" {html} "#, - url + link )) } else { Ok(html.to_string()) } } } + +struct SlurpContents { + site_selectors: HashMap>, +} + +impl SlurpContents { + fn get_selectors(&self, link: &Url) -> Option<&[Selector]> { + for (host, selector) in self.site_selectors.iter() { + if link.host_str().map(|h| h.contains(host)).unwrap_or(false) { + return Some(&selector); + } + } + None + } +} + +#[async_trait] +impl Transformer for SlurpContents { + fn should_run(&self, link: &Option, html: &str) -> bool { + if let Some(link) = link { + return self.get_selectors(link).is_some(); + } + false + } + async fn transform(&self, link: &Option, html: &str) -> Result { + let Some(link) = link else { + return Ok(html.to_string()); + }; + let Some(selectors) = self.get_selectors(&link) else { + return Ok(html.to_string()); + }; + let body = reqwest::get(link.as_str()).await?.text().await?; + let doc = Html::parse_document(&body); + + let mut results = Vec::new(); + for selector in selectors { + if let Some(frag) = doc.select(&selector).next() { + results.push(frag.html()) + } else { + warn!("couldn't find '{:?}' in {}", selector, link); + return Ok(html.to_string()); + } + } + Ok(results.join("

")) + } +} + pub fn linkify_html(text: &str) -> String { let mut finder = LinkFinder::new(); let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]); diff --git a/server/src/newsreader.rs b/server/src/newsreader.rs index 404c60a..082d380 100644 --- a/server/src/newsreader.rs +++ b/server/src/newsreader.rs @@ -1,6 +1,8 @@ use std::hash::{DefaultHasher, Hash, Hasher}; use log::info; +use maplit::hashmap; +use scraper::Selector; use sqlx::postgres::PgPool; use url::Url; @@ -13,7 +15,7 @@ use crate::{ compute_offset_limit, error::ServerError, graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary}, - AddOutlink, EscapeHtml, InlineStyle, SanitizeHtml, StripHtml, Transformer, + AddOutlink, EscapeHtml, InlineStyle, SanitizeHtml, SlurpContents, StripHtml, Transformer, }; pub fn is_newsreader_search(query: &str) -> bool { @@ -89,36 +91,34 @@ pub async fn search( .fetch_all(pool) .await?; - Ok(rows - .into_iter() - .enumerate() - .map(|(i, r)| { - let site = r.site.unwrap_or("UNKOWN TAG".to_string()); - let mut tags = vec![format!("{TAG_PREFIX}{site}")]; - if !r.is_read.unwrap_or(true) { - tags.push("unread".to_string()); - }; - let mut title = r.title.unwrap_or("NO TITLE".to_string()); - title = clean_title(&title).expect("failed to clean title"); - ( - i as i32 + offset, - ThreadSummary { - thread: format!("{THREAD_PREFIX}{}", r.uid), - timestamp: r - .date - .expect("post missing date") - .assume_utc() - .unix_timestamp() as isize, - date_relative: "TODO date_relative".to_string(), - matched: 0, - total: 1, - authors: r.name.unwrap_or_else(|| site.clone()), - subject: title, - tags, - }, - ) - }) - .collect()) + let mut res = Vec::new(); + for (i, r) in rows.into_iter().enumerate() { + let site = r.site.unwrap_or("UNKOWN TAG".to_string()); + let mut tags = vec![format!("{TAG_PREFIX}{site}")]; + if !r.is_read.unwrap_or(true) { + tags.push("unread".to_string()); + }; + let mut title = r.title.unwrap_or("NO TITLE".to_string()); + title = clean_title(&title).await.expect("failed to clean title"); + res.push(( + i as i32 + offset, + ThreadSummary { + thread: format!("{THREAD_PREFIX}{}", r.uid), + timestamp: r + .date + .expect("post missing date") + .assume_utc() + .unix_timestamp() as isize, + date_relative: "TODO date_relative".to_string(), + matched: 0, + total: 1, + authors: r.name.unwrap_or_else(|| site.clone()), + subject: title, + tags, + }, + )); + } + Ok(res) } pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result, ServerError> { @@ -197,8 +197,25 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result // * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent - let mut body_tranformers: Vec> = vec![ - Box::new(AddOutlink(link.clone())), + let body_tranformers: Vec> = vec![ + // TODO: add a map of urls and selectors + Box::new(SlurpContents { + site_selectors: hashmap![ + "hackaday.com".to_string() => vec![ + Selector::parse("div.entry-featured-image").unwrap(), + Selector::parse("div.entry-content").unwrap() + ], + "mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()], + "natwelch.com".to_string() => vec![ + Selector::parse("article div.prose").unwrap(), + ], + "slashdot.org".to_string() => vec![ + Selector::parse("span.story-byline").unwrap(), + Selector::parse("div.p").unwrap(), + ], + ], + }), + Box::new(AddOutlink), Box::new(EscapeHtml), Box::new(InlineStyle), Box::new(SanitizeHtml { @@ -207,15 +224,15 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result( .await?; Ok(true) } -fn clean_title(title: &str) -> Result { +async fn clean_title(title: &str) -> Result { // Make title HTML so html parsers work let mut title = format!("{title}"); let title_tranformers: Vec> = @@ -262,8 +279,8 @@ fn clean_title(title: &str) -> Result { // Make title HTML so html parsers work title = format!("{title}"); for t in title_tranformers.iter() { - if t.should_run(&title) { - title = t.transform(&title)?; + if t.should_run(&None, &title) { + title = t.transform(&None, &title).await?; } } Ok(title)