diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2a37bd6..296942d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -71,3 +71,31 @@ jobs: run: cargo doc --no-deps --workspace --document-private-items env: RUSTDOCFLAGS: -D warnings + + build: + name: Build and push + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/Cargo.lock b/Cargo.lock index c5bac67..dffee99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1492,6 +1492,8 @@ dependencies = [ "grass", "maud", "mwp-content", + "mwp-search", + "rusqlite", "serde", "serde_json", "tantivy", diff --git a/mwp-content/src/lib.rs b/mwp-content/src/lib.rs index c0550aa..be1e63c 100644 --- a/mwp-content/src/lib.rs +++ b/mwp-content/src/lib.rs @@ -73,7 +73,6 @@ impl Page { let mut link_title = String::new(); let mut links: Vec = Vec::new(); let mut open_tags: Vec = Vec::new(); - let mut got_resources: bool = false; #[allow(clippy::unnecessary_filter_map)] let parser = Parser::new_ext(&content, options).filter_map(|event| match event.clone() { @@ -93,25 +92,10 @@ impl Page { link_title = text.to_string(); } - // once we detect "Resources" section of the markdown we ignore the rest of the document - if text.as_ref() == "Resources" { - got_resources = true; - return Some(event); - } - - if got_resources { - return None; - } - Some(event) } Event::Start(tag) => { open_tags.push(tag); - - if got_resources { - return None; - } - Some(event) } Event::End(tag) => { @@ -137,10 +121,7 @@ impl Page { Some(event) } - event => match got_resources { - true => None, - false => Some(event), - }, + event => Some(event), }); let mut html_output = String::new(); diff --git a/mwp-search/src/lib.rs b/mwp-search/src/lib.rs index 47577f3..dbbde9a 100644 --- a/mwp-search/src/lib.rs +++ b/mwp-search/src/lib.rs @@ -32,25 +32,21 @@ impl Doc { } impl SearchIndex { - pub fn new(dir: &str) -> Result> { - let index = if let Ok(index) = Index::open_in_dir(dir) { - index - } else { - let mut schema_builder = Schema::builder(); - schema_builder.add_text_field("url", STRING | STORED); - schema_builder.add_text_field("title", TEXT | STORED); - schema_builder.add_text_field("body", TEXT); - schema_builder.add_text_field("domain", STRING | STORED); - schema_builder.add_text_field("tags", STRING | STORED | FAST); - let schema = schema_builder.build(); - Index::builder() - .schema(schema) - .settings(tantivy::IndexSettings { - docstore_blocksize: 32_000_000, - ..tantivy::IndexSettings::default() - }) - .create_in_dir(dir)? - }; + pub fn new() -> Result> { + let mut schema_builder = Schema::builder(); + schema_builder.add_text_field("url", STRING | STORED); + schema_builder.add_text_field("title", TEXT | STORED); + schema_builder.add_text_field("body", TEXT); + schema_builder.add_text_field("domain", STRING | STORED); + schema_builder.add_text_field("tags", STRING | STORED | FAST); + let schema = schema_builder.build(); + let index = Index::builder() + .schema(schema) + .settings(tantivy::IndexSettings { + docstore_blocksize: 32_000_000, + ..tantivy::IndexSettings::default() + }) + .create_in_ram()?; Ok(SearchIndex { index }) } diff --git a/mwp-web/Cargo.toml b/mwp-web/Cargo.toml index dd34060..8eef757 100644 --- a/mwp-web/Cargo.toml +++ b/mwp-web/Cargo.toml @@ -14,8 +14,10 @@ maud = { version = "0.26.0", features = ["actix-web"] } serde = "1.0.196" serde_json = "1.0.113" tantivy = "0.21.1" +rusqlite = { version = "0.30.0", features = ["time", "url"]} mwp-content = { path="../mwp-content" } +mwp-search = { path="../mwp-search" } [build-dependencies] grass = "0.13.2" diff --git a/mwp-web/src/main.rs b/mwp-web/src/main.rs index 2452043..16b7084 100644 --- a/mwp-web/src/main.rs +++ b/mwp-web/src/main.rs @@ -6,6 +6,8 @@ use actix_web::{ }; use maud::{html, Markup, PreEscaped}; use mwp_content::Content; +use mwp_search::{Doc, SearchIndex}; +use rusqlite::Connection; use serde::Deserialize; use tantivy::{ query::{AllQuery, QueryParser, TermQuery}, @@ -161,9 +163,6 @@ async fn content_page( } } article { (PreEscaped(html)) } - .links { - (listing(searcher, schema, result.docs)) - } } )) } @@ -185,13 +184,38 @@ impl Guard for ContentGuard { async fn main() -> std::io::Result<()> { env_logger::init_from_env(env_logger::Env::new().default_filter_or("info")); - let index_path = "./index"; - let index = Index::open_in_dir(index_path).unwrap(); + let index = SearchIndex::new().unwrap(); + + let conn = Connection::open("./my_db.db3").unwrap(); + let mut stmt = conn + .prepare("SELECT title, url, domain, body, tags, created_at, scraped_at FROM links") + .unwrap(); + let docs_iter = stmt + .query_map([], |row| { + Ok(Doc { + title: row.get(0)?, + url: row.get(1)?, + domain: row.get(2)?, + body: row.get(3)?, + tags: row.get::>(4).map(|res| { + res.map(|s| s.split(';').map(|s| s.into()).collect::>()) + })?, + created_at: row.get(5)?, + scraped_at: row.get(6)?, + }) + }) + .unwrap(); + + let mut builder = index.builder(); + for doc in docs_iter { + builder.add(doc.unwrap()).unwrap(); + } + builder.commit(); let content = Content::from_dir("../wiki").await; HttpServer::new(move || { App::new() - .app_data(web::Data::new(index.clone())) + .app_data(web::Data::new(index.index.clone())) .app_data(web::Data::new(content.clone())) .service(tag_page) .service(search_page) diff --git a/src/main.rs b/src/main.rs index 71e2b9e..6436b5b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use mwp_content::Link; -use mwp_search::{Doc, SearchIndex}; +use mwp_search::Doc; use rusqlite::Connection; use time::OffsetDateTime; @@ -21,34 +21,7 @@ async fn main() -> Result<(), Box> { (), // empty list of parameters. )?; - let index = SearchIndex::new("./index")?; - let content = mwp_content::Content::from_dir("../wiki").await; - index.add_content(content.values())?; - - let mut stmt = - conn.prepare("SELECT title, url, domain, body, tags, created_at, scraped_at FROM links")?; - let docs_iter = stmt.query_map([], |row| { - Ok(Doc { - title: row.get(0)?, - url: row.get(1)?, - domain: row.get(2)?, - body: row.get(3)?, - tags: row - .get::>(4) - .map(|res| res.map(|s| s.split(';').map(|s| s.into()).collect::>()))?, - created_at: row.get(5)?, - scraped_at: row.get(6)?, - }) - })?; - - let mut builder = index.builder(); - for doc in docs_iter { - builder.add(doc.unwrap())?; - } - builder.commit(); - - return Ok(()); // only needed before links from content are migrated to bookmarking system let links = content