diff --git a/src/report/sqlite/models.rs b/src/report/sqlite/models.rs index bff17d2..81bd7e0 100644 --- a/src/report/sqlite/models.rs +++ b/src/report/sqlite/models.rs @@ -34,34 +34,40 @@ use crate::{error::Result, parsers::json::JsonVal}; /// path: String, /// } /// -/// impl Insertable<2> for File { +/// impl Insertable for File { /// const INSERT_QUERY_PRELUDE: &'static str = "INSERT INTO file (id, path) VALUES "; /// const INSERT_PLACEHOLDER: &'static str = "(?, ?)"; /// -/// fn param_bindings(&self) -> [&dyn rusqlite::ToSql; 2] { -/// [ +/// fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>) { +/// params.extend(&[ /// &self.id as &dyn rusqlite::ToSql, /// &self.path as &dyn rusqlite::ToSql, -/// ] +/// ]) /// } /// } /// ``` /// /// IDs are not assigned automatically; assign your own to models before you /// insert them. -pub trait Insertable { +pub trait Insertable { + /// The first part of the insert query, such as: + /// `INSERT INTO X (Y, Z) VALUES` const INSERT_QUERY_PRELUDE: &'static str; - + /// The placeholders portion, such as `(?, ?)` const INSERT_PLACEHOLDER: &'static str; - fn param_bindings(&self) -> [&dyn rusqlite::ToSql; FIELD_COUNT]; + /// This method is supposed to extend the input `params` with the parameters + /// matching the `INSERT_PLACEHOLDER`. + fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>); - fn insert(model: &Self, conn: &rusqlite::Connection) -> Result<()> { + fn insert(&self, conn: &rusqlite::Connection) -> Result<()> { let mut stmt = conn.prepare_cached( // Maybe turn this in to a lazily-initialized static format!("{}{}", Self::INSERT_QUERY_PRELUDE, Self::INSERT_PLACEHOLDER).as_str(), )?; - stmt.execute(rusqlite::params_from_iter(model.param_bindings()))?; + let mut params = vec![]; + self.extend_params(&mut params); + stmt.execute(params.as_slice())?; Ok(()) } @@ -71,42 +77,43 @@ pub trait Insertable { I: Iterator + ExactSizeIterator, Self: 'a, { - let var_limit = conn.limit(rusqlite::limits::Limit::SQLITE_LIMIT_VARIABLE_NUMBER) as usize; - // If each model takes up `FIELD_COUNT` variables, we can fit `var_limit / - // FIELD_COUNT` complete models in each "page" of our query - let page_size = var_limit / FIELD_COUNT; - - // Integer division tells us how many full pages there are. If there is a - // non-zero remainder, there is one final incomplete page. - let model_count = models.len(); - let page_count = match (model_count / page_size, model_count % page_size) { - (page_count, 0) => page_count, - (page_count, _) => page_count + 1, - }; - - let (mut query, mut previous_page_size) = (String::new(), 0); - for _ in 0..page_count { - // If there are fewer than `page_size` pages left, the iterator will just take - // everything. - let page_iter = models.by_ref().take(page_size); - - // We can reuse our query string if the current page is the same size as the - // last one. If not, we have to rebuild the query string. - let current_page_size = page_iter.len(); - if current_page_size != previous_page_size { - query = format!(" {},", Self::INSERT_PLACEHOLDER).repeat(current_page_size); - query.insert_str(0, Self::INSERT_QUERY_PRELUDE); - // Remove trailing comma - query.pop(); - previous_page_size = current_page_size; + let mut params = vec![]; + // first: insert chunks using a single prepared query + { + // The max number of bindings defaults to 32K in recent sqlite versions. + // That is plenty to use a chunk size of 50: + const CHUNK_SIZE: usize = 50; + let mut chunked_query = String::from(Self::INSERT_QUERY_PRELUDE); + for i in 0..CHUNK_SIZE { + if i > 0 { + chunked_query.push_str(", "); + } + chunked_query.push_str(Self::INSERT_PLACEHOLDER); + } + chunked_query.push(';'); + let mut chunked_stmt = conn.prepare_cached(&chunked_query)?; + + while models.len() >= CHUNK_SIZE { + for row in models.by_ref().take(CHUNK_SIZE) { + row.extend_params(&mut params); + } + chunked_stmt.execute(params.as_slice())?; + params.clear(); } - - let mut stmt = conn.prepare_cached(query.as_str())?; - let params = page_iter.flat_map(|model| model.param_bindings()); - stmt.execute(rusqlite::params_from_iter(params))?; } - conn.flush_prepared_statement_cache(); + // then: insert the remainder + { + let remainder_query = + format!("{}{}", Self::INSERT_QUERY_PRELUDE, Self::INSERT_PLACEHOLDER); + let mut remainder_stmt = conn.prepare_cached(&remainder_query)?; + + for row in models { + row.extend_params(&mut params); + remainder_stmt.execute(params.as_slice())?; + params.clear(); + } + } Ok(()) } @@ -190,15 +197,15 @@ impl<'a> std::convert::TryFrom<&'a rusqlite::Row<'a>> for SourceFile { } } -impl Insertable<2> for SourceFile { +impl Insertable for SourceFile { const INSERT_QUERY_PRELUDE: &'static str = "INSERT INTO source_file (id, path) VALUES "; const INSERT_PLACEHOLDER: &'static str = "(?, ?)"; - fn param_bindings(&self) -> [&dyn rusqlite::ToSql; 2] { - [ + fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>) { + params.extend(&[ &self.id as &dyn rusqlite::ToSql, &self.path as &dyn rusqlite::ToSql, - ] + ]) } } @@ -219,12 +226,12 @@ impl<'a> std::convert::TryFrom<&'a rusqlite::Row<'a>> for CoverageSample { } } -impl Insertable<8> for CoverageSample { +impl Insertable for CoverageSample { const INSERT_QUERY_PRELUDE: &'static str = "INSERT INTO coverage_sample (raw_upload_id, local_sample_id, source_file_id, line_no, coverage_type, hits, hit_branches, total_branches) VALUES "; const INSERT_PLACEHOLDER: &'static str = "(?, ?, ?, ?, ?, ?, ?, ?)"; - fn param_bindings(&self) -> [&dyn rusqlite::ToSql; 8] { - [ + fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>) { + params.extend(&[ &self.raw_upload_id as &dyn rusqlite::ToSql, &self.local_sample_id as &dyn rusqlite::ToSql, &self.source_file_id as &dyn rusqlite::ToSql, @@ -233,7 +240,7 @@ impl Insertable<8> for CoverageSample { &self.hits as &dyn rusqlite::ToSql, &self.hit_branches as &dyn rusqlite::ToSql, &self.total_branches as &dyn rusqlite::ToSql, - ] + ]) } } @@ -253,12 +260,12 @@ impl<'a> std::convert::TryFrom<&'a rusqlite::Row<'a>> for BranchesData { } } -impl Insertable<7> for BranchesData { +impl Insertable for BranchesData { const INSERT_QUERY_PRELUDE: &'static str = "INSERT INTO branches_data (raw_upload_id, local_branch_id, source_file_id, local_sample_id, hits, branch_format, branch) VALUES "; const INSERT_PLACEHOLDER: &'static str = "(?, ?, ?, ?, ?, ?, ?)"; - fn param_bindings(&self) -> [&dyn rusqlite::ToSql; 7] { - [ + fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>) { + params.extend(&[ &self.raw_upload_id as &dyn rusqlite::ToSql, &self.local_branch_id as &dyn rusqlite::ToSql, &self.source_file_id as &dyn rusqlite::ToSql, @@ -266,7 +273,7 @@ impl Insertable<7> for BranchesData { &self.hits as &dyn rusqlite::ToSql, &self.branch_format as &dyn rusqlite::ToSql, &self.branch as &dyn rusqlite::ToSql, - ] + ]) } } @@ -288,12 +295,12 @@ impl<'a> std::convert::TryFrom<&'a rusqlite::Row<'a>> for MethodData { } } -impl Insertable<9> for MethodData { +impl Insertable for MethodData { const INSERT_QUERY_PRELUDE: &'static str = "INSERT INTO method_data (raw_upload_id, local_method_id, source_file_id, local_sample_id, line_no, hit_branches, total_branches, hit_complexity_paths, total_complexity) VALUES "; const INSERT_PLACEHOLDER: &'static str = "(?, ?, ?, ?, ?, ?, ?, ?, ?)"; - fn param_bindings(&self) -> [&dyn rusqlite::ToSql; 9] { - [ + fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>) { + params.extend(&[ &self.raw_upload_id as &dyn rusqlite::ToSql, &self.local_method_id as &dyn rusqlite::ToSql, &self.source_file_id as &dyn rusqlite::ToSql, @@ -303,7 +310,7 @@ impl Insertable<9> for MethodData { &self.total_branches as &dyn rusqlite::ToSql, &self.hit_complexity_paths as &dyn rusqlite::ToSql, &self.total_complexity as &dyn rusqlite::ToSql, - ] + ]) } } @@ -325,12 +332,12 @@ impl<'a> std::convert::TryFrom<&'a rusqlite::Row<'a>> for SpanData { } } -impl Insertable<9> for SpanData { +impl Insertable for SpanData { const INSERT_QUERY_PRELUDE: &'static str = "INSERT INTO span_data (raw_upload_id, local_span_id, source_file_id, local_sample_id, hits, start_line, start_col, end_line, end_col) VALUES "; const INSERT_PLACEHOLDER: &'static str = "(?, ?, ?, ?, ?, ?, ?, ?, ?)"; - fn param_bindings(&self) -> [&dyn rusqlite::ToSql; 9] { - [ + fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>) { + params.extend(&[ &self.raw_upload_id as &dyn rusqlite::ToSql, &self.local_span_id as &dyn rusqlite::ToSql, &self.source_file_id as &dyn rusqlite::ToSql, @@ -340,7 +347,7 @@ impl Insertable<9> for SpanData { &self.start_col as &dyn rusqlite::ToSql, &self.end_line as &dyn rusqlite::ToSql, &self.end_col as &dyn rusqlite::ToSql, - ] + ]) } } @@ -357,18 +364,18 @@ impl<'a> std::convert::TryFrom<&'a rusqlite::Row<'a>> for ContextAssoc { } } -impl Insertable<4> for ContextAssoc { +impl Insertable for ContextAssoc { const INSERT_QUERY_PRELUDE: &'static str = "INSERT INTO context_assoc (context_id, raw_upload_id, local_sample_id, local_span_id) VALUES "; const INSERT_PLACEHOLDER: &'static str = "(?, ?, ?, ?)"; - fn param_bindings(&self) -> [&dyn rusqlite::ToSql; 4] { - [ + fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>) { + params.extend(&[ &self.context_id as &dyn rusqlite::ToSql, &self.raw_upload_id as &dyn rusqlite::ToSql, &self.local_sample_id as &dyn rusqlite::ToSql, &self.local_span_id as &dyn rusqlite::ToSql, - ] + ]) } } @@ -384,17 +391,17 @@ impl<'a> std::convert::TryFrom<&'a rusqlite::Row<'a>> for Context { } } -impl Insertable<3> for Context { +impl Insertable for Context { const INSERT_QUERY_PRELUDE: &'static str = "INSERT INTO context (id, context_type, name) VALUES "; const INSERT_PLACEHOLDER: &'static str = "(?, ?, ?)"; - fn param_bindings(&self) -> [&dyn rusqlite::ToSql; 3] { - [ + fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>) { + params.extend(&[ &self.id as &dyn rusqlite::ToSql, &self.context_type as &dyn rusqlite::ToSql, &self.name as &dyn rusqlite::ToSql, - ] + ]) } } @@ -481,15 +488,15 @@ mod tests { data: String, } - impl Insertable<2> for TestModel { + impl Insertable for TestModel { const INSERT_QUERY_PRELUDE: &'static str = "INSERT INTO test (id, data) VALUES "; const INSERT_PLACEHOLDER: &'static str = "(?, ?)"; - fn param_bindings(&self) -> [&dyn rusqlite::ToSql; 2] { - [ + fn extend_params<'a>(&'a self, params: &mut Vec<&'a dyn rusqlite::ToSql>) { + params.extend(&[ &self.id as &dyn rusqlite::ToSql, &self.data as &dyn rusqlite::ToSql, - ] + ]) } } @@ -548,8 +555,8 @@ mod tests { data: "foo".to_string(), }; - >::insert(&model, &ctx.report.conn).unwrap(); - let duplicate_result = >::insert(&model, &ctx.report.conn); + model.insert(&ctx.report.conn).unwrap(); + let duplicate_result = model.insert(&ctx.report.conn); let test_models = list_test_models(&ctx.report); assert_eq!(test_models, vec![model]); @@ -565,39 +572,17 @@ mod tests { fn test_test_model_multi_insert() { let ctx = setup(); - // We lower the limit to force the multi_insert pagination logic to kick in. - // We'll use 5 models, each with 2 variables, so we need 10 variables total. - // Setting the limit to 4 should wind up using multiple pages. - let _ = ctx - .report - .conn - .set_limit(rusqlite::limits::Limit::SQLITE_LIMIT_VARIABLE_NUMBER, 4); - - let models_to_insert = vec![ - TestModel { - id: 1, - data: "foo".to_string(), - }, - TestModel { - id: 2, - data: "bar".to_string(), - }, - TestModel { - id: 3, - data: "baz".to_string(), - }, - TestModel { - id: 4, - data: "abc".to_string(), - }, - TestModel { - id: 5, - data: "def".to_string(), - }, - ]; - - >::multi_insert(models_to_insert.iter(), &ctx.report.conn) - .unwrap(); + // Our chunk-size is set to 50, so inserting more than twice that will use + // multiple chunks, as well as using single inserts for the remainder. + + let models_to_insert: Vec<_> = (0..111) + .map(|id| TestModel { + id, + data: format!("Test {id}"), + }) + .collect(); + + TestModel::multi_insert(models_to_insert.iter(), &ctx.report.conn).unwrap(); let test_models = list_test_models(&ctx.report); assert_eq!(test_models, models_to_insert); @@ -612,8 +597,8 @@ mod tests { path: "src/report/report.rs".to_string(), }; - >::insert(&model, &ctx.report.conn).unwrap(); - let duplicate_result = >::insert(&model, &ctx.report.conn); + model.insert(&ctx.report.conn).unwrap(); + let duplicate_result = model.insert(&ctx.report.conn); let files = ctx.report.list_files().unwrap(); assert_eq!(files, vec![model]); @@ -635,8 +620,8 @@ mod tests { name: "test_upload".to_string(), }; - >::insert(&model, &ctx.report.conn).unwrap(); - let duplicate_result = >::insert(&model, &ctx.report.conn); + model.insert(&ctx.report.conn).unwrap(); + let duplicate_result = model.insert(&ctx.report.conn); let contexts = ctx.report.list_contexts().unwrap(); assert_eq!(contexts, vec![model]); @@ -671,7 +656,7 @@ mod tests { ..Default::default() }; - >::insert(&model, &report.conn).unwrap(); + model.insert(&report.conn).unwrap(); let assoc: ContextAssoc = report .conn .query_row( @@ -712,8 +697,8 @@ mod tests { ..Default::default() }; - >::insert(&model, &report.conn).unwrap(); - let duplicate_result = >::insert(&model, &report.conn); + model.insert(&report.conn).unwrap(); + let duplicate_result = model.insert(&report.conn); let samples = report.list_coverage_samples().unwrap(); assert_eq!(samples, vec![model]); @@ -739,15 +724,13 @@ mod tests { let report = report_builder.build().unwrap(); let local_sample_id = rand::random(); - >::insert( - &CoverageSample { - raw_upload_id: raw_upload.id, - local_sample_id, - source_file_id: source_file.id, - ..Default::default() - }, - &report.conn, - ) + CoverageSample { + raw_upload_id: raw_upload.id, + local_sample_id, + source_file_id: source_file.id, + ..Default::default() + } + .insert(&report.conn) .unwrap(); let model = BranchesData { @@ -758,8 +741,8 @@ mod tests { ..Default::default() }; - >::insert(&model, &report.conn).unwrap(); - let duplicate_result = >::insert(&model, &report.conn); + model.insert(&report.conn).unwrap(); + let duplicate_result = model.insert(&report.conn); let branch: BranchesData = report .conn @@ -805,8 +788,8 @@ mod tests { ..Default::default() }; - >::insert(&model, &report.conn).unwrap(); - let duplicate_result = >::insert(&model, &report.conn); + model.insert(&report.conn).unwrap(); + let duplicate_result = model.insert(&report.conn); let method: MethodData = report .conn @@ -844,8 +827,8 @@ mod tests { ..Default::default() }; - >::insert(&model, &report.conn).unwrap(); - let duplicate_result = >::insert(&model, &report.conn); + model.insert(&report.conn).unwrap(); + let duplicate_result = model.insert(&report.conn); let branch: SpanData = report .conn diff --git a/src/report/sqlite/report_builder.rs b/src/report/sqlite/report_builder.rs index 3889cdd..49095ff 100644 --- a/src/report/sqlite/report_builder.rs +++ b/src/report/sqlite/report_builder.rs @@ -235,7 +235,7 @@ impl<'a> ReportBuilder for SqliteReportBuilderTx<'a> { id: seahash::hash(path.as_bytes()) as i64, path, }; - >::insert(&model, &self.conn)?; + model.insert(&self.conn)?; Ok(model) } @@ -249,7 +249,7 @@ impl<'a> ReportBuilder for SqliteReportBuilderTx<'a> { context_type, name: name.to_string(), }; - >::insert(&model, &self.conn)?; + model.insert(&self.conn)?; Ok(model) } @@ -259,7 +259,7 @@ impl<'a> ReportBuilder for SqliteReportBuilderTx<'a> { ) -> Result { // TODO handle error sample.local_sample_id = self.id_sequence.next().unwrap(); - >::insert(&sample, &self.conn)?; + sample.insert(&self.conn)?; Ok(sample) } @@ -270,10 +270,7 @@ impl<'a> ReportBuilder for SqliteReportBuilderTx<'a> { for sample in &mut samples { sample.local_sample_id = self.id_sequence.next().unwrap(); } - >::multi_insert( - samples.iter().map(|v| &**v), - &self.conn, - )?; + models::CoverageSample::multi_insert(samples.iter().map(|v| &**v), &self.conn)?; Ok(()) } @@ -283,7 +280,7 @@ impl<'a> ReportBuilder for SqliteReportBuilderTx<'a> { ) -> Result { // TODO handle error branch.local_branch_id = self.id_sequence.next().unwrap(); - >::insert(&branch, &self.conn)?; + branch.insert(&self.conn)?; Ok(branch) } @@ -294,17 +291,14 @@ impl<'a> ReportBuilder for SqliteReportBuilderTx<'a> { for branch in &mut branches { branch.local_branch_id = self.id_sequence.next().unwrap(); } - >::multi_insert( - branches.iter().map(|v| &**v), - &self.conn, - )?; + models::BranchesData::multi_insert(branches.iter().map(|v| &**v), &self.conn)?; Ok(()) } fn insert_method_data(&mut self, mut method: models::MethodData) -> Result { // TODO handle error method.local_method_id = self.id_sequence.next().unwrap(); - >::insert(&method, &self.conn)?; + method.insert(&self.conn)?; Ok(method) } @@ -315,17 +309,14 @@ impl<'a> ReportBuilder for SqliteReportBuilderTx<'a> { for method in &mut methods { method.local_method_id = self.id_sequence.next().unwrap(); } - >::multi_insert( - methods.iter().map(|v| &**v), - &self.conn, - )?; + models::MethodData::multi_insert(methods.iter().map(|v| &**v), &self.conn)?; Ok(()) } fn insert_span_data(&mut self, mut span: models::SpanData) -> Result { // TODO handle error span.local_span_id = self.id_sequence.next().unwrap(); - >::insert(&span, &self.conn)?; + span.insert(&self.conn)?; Ok(span) } @@ -333,20 +324,17 @@ impl<'a> ReportBuilder for SqliteReportBuilderTx<'a> { for span in &mut spans { span.local_span_id = self.id_sequence.next().unwrap(); } - >::multi_insert(spans.iter().map(|v| &**v), &self.conn)?; + models::SpanData::multi_insert(spans.iter().map(|v| &**v), &self.conn)?; Ok(()) } fn associate_context(&mut self, assoc: models::ContextAssoc) -> Result { - >::insert(&assoc, &self.conn)?; + assoc.insert(&self.conn)?; Ok(assoc) } fn multi_associate_context(&mut self, assocs: Vec<&mut models::ContextAssoc>) -> Result<()> { - >::multi_insert( - assocs.iter().map(|v| &**v), - &self.conn, - )?; + models::ContextAssoc::multi_insert(assocs.iter().map(|v| &**v), &self.conn)?; Ok(()) }