From eb0ee6545d058aa5600530f1f36b9b78d548090d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matou=C5=A1=20Dzivjak?= Date: Tue, 31 Dec 2024 11:46:35 +0100 Subject: [PATCH] chore: cleanup and refactor Use xtask for scraping job, cleanup dependencies. --- .cargo/config.toml | 2 + Cargo.lock | 123 ++++++++++------------------ Cargo.toml | 23 ++++-- mwp-scraper/Cargo.toml | 17 ++-- mwp-scraper/src/{main.rs => lib.rs} | 3 +- mwp-search/Cargo.toml | 7 +- mwp-web/Cargo.toml | 10 ++- xtask/Cargo.toml | 16 ++++ xtask/src/main.rs | 25 ++++++ 9 files changed, 117 insertions(+), 109 deletions(-) create mode 100644 .cargo/config.toml rename mwp-scraper/src/{main.rs => lib.rs} (98%) create mode 100644 xtask/Cargo.toml create mode 100644 xtask/src/main.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..35049cb --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[alias] +xtask = "run --package xtask --" diff --git a/Cargo.lock b/Cargo.lock index ca2216c..c644fbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -294,9 +294,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" @@ -488,9 +488,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.13" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" dependencies = [ "clap_builder", "clap_derive", @@ -498,21 +498,22 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.13" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" dependencies = [ "anstream", "anstyle", "clap_lex", "strsim", + "terminal_size", ] [[package]] name = "clap_derive" -version = "4.5.13" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" dependencies = [ "heck", "proc-macro2", @@ -522,9 +523,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.0" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "codemap" @@ -856,16 +857,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "fs4" version = "0.8.2" @@ -1757,18 +1748,12 @@ dependencies = [ "lol_html", "mwp-content", "mwp-search", - "pulldown-cmark", "regex", "reqwest", "rusqlite", - "serde", - "serde_json", - "sled", "tantivy", "time", - "tokio", "url", - "walkdir", ] [[package]] @@ -1954,17 +1939,6 @@ dependencies = [ "stable_deref_trait", ] -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.1" @@ -1972,21 +1946,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.9", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -1997,7 +1957,7 @@ checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", + "redox_syscall", "smallvec", "windows-targets 0.48.5", ] @@ -2358,15 +2318,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.4.1" @@ -2782,22 +2733,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "sled" -version = "0.34.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" -dependencies = [ - "crc32fast", - "crossbeam-epoch", - "crossbeam-utils", - "fs2", - "fxhash", - "libc", - "log", - "parking_lot 0.11.2", -] - [[package]] name = "smallvec" version = "1.13.2" @@ -3061,11 +2996,21 @@ checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.4.1", + "redox_syscall", "rustix", "windows-sys 0.52.0", ] +[[package]] +name = "terminal_size" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5352447f921fda68cf61b4101566c0bdb5104eff6804d0678e5227580ab6a4e9" +dependencies = [ + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "thin-slice" version = "0.1.1" @@ -3153,7 +3098,7 @@ dependencies = [ "bytes", "libc", "mio 1.0.1", - "parking_lot 0.12.1", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -3607,6 +3552,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -3740,6 +3694,15 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "xtask" +version = "0.0.1" +dependencies = [ + "clap", + "mwp-scraper", + "tokio", +] + [[package]] name = "yoke" version = "0.7.4" diff --git a/Cargo.toml b/Cargo.toml index f0666ba..f686213 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,11 @@ +[workspace.package] +version = "0.1.0" +edition = "2021" +authors = ["Matouš Dzivjak "] +categories = ["wiki", "knowledge-bage"] +repository = "https://github.com/matoous/mwp" +homepage = "https://github.com/matoous/mwp" + [workspace] resolver = "2" members = [ @@ -5,6 +13,7 @@ members = [ "mwp-scraper", "mwp-content", "mwp-search", + "xtask", ] default-members = [ @@ -14,12 +23,8 @@ default-members = [ [profile.release] lto = "thin" -[workspace.package] -name = "mwp" -version = "0.1.0" -edition = "2021" -authors = ["Matouš Dzivjak "] -categories = ["wiki", "knowledge-bage"] -repository = "https://github.com/matoous/mwp" -homepage = "https://github.com/matoous/mwp" -rust-version = "1.70" +[workspace.dependencies] +tantivy = { version = "0.22.0", features = ["mmap"] } +url = { version = "2.5.4", features = ["serde"] } +time = "0.3.37" +rusqlite = { version = "0.31.0", features = ["time", "url", "bundled"]} diff --git a/mwp-scraper/Cargo.toml b/mwp-scraper/Cargo.toml index 5d10b83..9cad275 100644 --- a/mwp-scraper/Cargo.toml +++ b/mwp-scraper/Cargo.toml @@ -8,20 +8,15 @@ repository.workspace = true [dependencies] html-escape = "0.2.13" -lazy_static = "1.5.0" lol_html = "1.2.1" -pulldown-cmark = "0.12.2" +lazy_static = "1.5.0" regex = "1.11.1" reqwest = "0.12.9" -serde = "1.0.217" -serde_json = "1.0.134" -sled = "0.34.7" -tantivy = { version = "0.22.0", features = ["mmap"] } -time = "0.3.37" -tokio = { version = "1.42.0", features= ["full"]} -url = { version = "2.5.4", features = ["serde"] } -walkdir = "2.5.0" -rusqlite = { version = "0.31.0", features = ["time", "url", "bundled"]} + +rusqlite = { workspace = true } +tantivy = { workspace = true } +url = { workspace = true } +time = { workspace = true } mwp-content = { path="../mwp-content" } mwp-search = { path="../mwp-search" } diff --git a/mwp-scraper/src/main.rs b/mwp-scraper/src/lib.rs similarity index 98% rename from mwp-scraper/src/main.rs rename to mwp-scraper/src/lib.rs index 0b7ae25..0b23622 100644 --- a/mwp-scraper/src/main.rs +++ b/mwp-scraper/src/lib.rs @@ -19,8 +19,7 @@ pub async fn scrape(link: &Url) -> Result Result<(), Box> { +pub async fn scrape_all() -> Result<(), Box> { let conn = Connection::open("./db.db3")?; conn.execute( r#" diff --git a/mwp-search/Cargo.toml b/mwp-search/Cargo.toml index 85bbec9..f8f2a39 100644 --- a/mwp-search/Cargo.toml +++ b/mwp-search/Cargo.toml @@ -6,7 +6,8 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -tantivy = { version = "0.22.0", features = ["mmap"] } +tantivy = { workspace = true } +url = { workspace = true } +time = { workspace = true } + mwp-content = { path="../mwp-content" } -time = "0.3.37" -url = "2.5.4" diff --git a/mwp-web/Cargo.toml b/mwp-web/Cargo.toml index 19693ba..70d2ab1 100644 --- a/mwp-web/Cargo.toml +++ b/mwp-web/Cargo.toml @@ -13,15 +13,17 @@ path = "src/main.rs" [dependencies] actix-files = "0.6.6" actix-web = "4.9.0" +static-files = "0.2.4" +actix-web-static-files = "4.0.1" + env_logger = "0.11.6" maud = { version = "0.26.0", features = ["actix-web"] } serde = "1.0.217" serde_json = "1.0.134" -tantivy = "0.22.0" -rusqlite = { version = "0.31.0", features = ["time", "url", "bundled"]} clap = { version = "4.5.13", features = ["derive"]} -static-files = "0.2.4" -actix-web-static-files = "4.0.1" + +rusqlite = { workspace = true } +tantivy = { workspace = true } mwp-content = { path="../mwp-content" } mwp-search = { path="../mwp-search" } diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml new file mode 100644 index 0000000..ffd93a1 --- /dev/null +++ b/xtask/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "xtask" +version = "0.0.1" +edition = "2021" +publish = false + +[dependencies] +clap = { version = "4.5.23", features = [ + "derive", + "string", + "env", + "wrap_help", +] } +tokio = { version = "1.42.0", features= ["full"]} + +mwp-scraper = { path="../mwp-scraper" } diff --git a/xtask/src/main.rs b/xtask/src/main.rs new file mode 100644 index 0000000..8254b64 --- /dev/null +++ b/xtask/src/main.rs @@ -0,0 +1,25 @@ +#![forbid(unsafe_code)] + +use clap::Parser; + +#[derive(Parser)] +#[command(name = "xtask")] +#[command(about = "Helper commands for wiki management")] +enum Xtask { + #[command(about = "Scrape latest content for full-text search")] + Scrape, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let xtask = Xtask::parse(); + + match xtask { + Xtask::Scrape => scrape().await, + } +} + +async fn scrape() -> Result<(), Box> { + mwp_scraper::scrape_all().await?; + Ok(()) +}