diff --git a/hipcheck/src/analysis/analysis.rs b/hipcheck/src/analysis/analysis.rs deleted file mode 100644 index cb493631..00000000 --- a/hipcheck/src/analysis/analysis.rs +++ /dev/null @@ -1,403 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -use crate::analysis::result::*; -use crate::config::AttacksConfigQuery; -use crate::config::CommitConfigQuery; -use crate::config::FuzzConfigQuery; -use crate::config::PracticesConfigQuery; -use crate::data::git::GitProvider; -use crate::error::Error; -use crate::error::Result; -use crate::metric::affiliation::AffiliatedType; -use crate::metric::MetricProvider; -use crate::report::Concern; -use crate::F64; -use std::collections::HashMap; -use std::collections::HashSet; -use std::default::Default; -use std::fmt; -use std::fmt::Display; -use std::fmt::Formatter; -use std::ops::Not; -use std::sync::Arc; - -/// Queries about analyses -#[salsa::query_group(AnalysisProviderStorage)] -pub trait AnalysisProvider: - AttacksConfigQuery - + CommitConfigQuery - + GitProvider - + MetricProvider - + FuzzConfigQuery - + PracticesConfigQuery -{ - /// Returns result of activity analysis - fn activity_analysis(&self) -> Arc; - - /// Returns result of affiliation analysis - fn affiliation_analysis(&self) -> Arc; - - /// Returns result of binary analysis - fn binary_analysis(&self) -> Arc; - - /// Returns result of churn analysis - fn churn_analysis(&self) -> Arc; - - /// Returns result of entropy analysis - fn entropy_analysis(&self) -> Arc; - - /// Returns result of identity analysis - fn identity_analysis(&self) -> Arc; - - /// Returns result of fuzz analysis - fn fuzz_analysis(&self) -> Arc; - - /// Returns result of review analysis - fn review_analysis(&self) -> Arc; - - /// Returns result of typo analysis - fn typo_analysis(&self) -> Arc; -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub enum AnalysisReport { - /// Affiliation analysis result. - Affiliation { - value: u64, - threshold: u64, - outcome: AnalysisOutcome, - concerns: Vec, - }, - /// Binary file analysis result. - Binary { - value: u64, - threshold: u64, - outcome: AnalysisOutcome, - concerns: Vec, - }, - /// Churn analysis result. - Churn { - value: F64, - threshold: F64, - outcome: AnalysisOutcome, - concerns: Vec, - }, - /// Entropy analysis result. - Entropy { - value: F64, - threshold: F64, - outcome: AnalysisOutcome, - concerns: Vec, - }, - /// Identity analysis result. - Identity { - value: F64, - threshold: F64, - outcome: AnalysisOutcome, - concerns: Vec, - }, - /// Fuzz repo analysis result. - Fuzz { - value: bool, - outcome: AnalysisOutcome, - concerns: Vec, - }, - /// Review analysis result. - Review { - value: F64, - threshold: F64, - outcome: AnalysisOutcome, - concerns: Vec, - }, - /// Typo analysis result. - Typo { - value: u64, - threshold: u64, - outcome: AnalysisOutcome, - concerns: Vec, - }, - /// "Result" for a skipped or errored analysis - None { outcome: AnalysisOutcome }, -} - -impl Default for AnalysisReport { - fn default() -> AnalysisReport { - AnalysisReport::None { - outcome: AnalysisOutcome::Skipped, - } - } -} - -#[derive(Debug, Clone, Eq, PartialEq, Default)] -pub enum AnalysisOutcome { - #[default] - Skipped, - Error(Error), - Pass(String), - Fail(String), -} - -impl Display for AnalysisOutcome { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self { - AnalysisOutcome::Skipped => write!(f, "SKIPPED"), - AnalysisOutcome::Error(msg) => write!(f, "ERROR {}", msg), - AnalysisOutcome::Pass(msg) => write!(f, "PASS {}", msg), - AnalysisOutcome::Fail(msg) => write!(f, "FAIL {}", msg), - } - } -} - -pub fn activity_analysis(db: &dyn AnalysisProvider) -> Arc { - let results = match db.activity_metric() { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(results) => results, - }; - let value = results.time_since_last_commit.num_weeks() as u64; - Arc::new(HCAnalysisReport { - outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), - concerns: vec![], - }) -} - -pub fn affiliation_analysis(db: &dyn AnalysisProvider) -> Arc { - let results = match db.affiliation_metric() { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(results) => results, - }; - - let affiliated_iter = results - .affiliations - .iter() - .filter(|a| a.affiliated_type.is_affiliated()); - - let value = affiliated_iter.clone().count() as u64; - - let mut contributor_freq_map = HashMap::new(); - - for affiliation in affiliated_iter { - let commit_view = match db.contributors_for_commit(Arc::clone(&affiliation.commit)) { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(cv) => cv, - }; - - let contributor = match affiliation.affiliated_type { - AffiliatedType::Author => String::from(&commit_view.author.name), - AffiliatedType::Committer => String::from(&commit_view.committer.name), - AffiliatedType::Neither => String::from("Neither"), - AffiliatedType::Both => String::from("Both"), - }; - - let count_commits_for = |contributor| { - db.commits_for_contributor(Arc::clone(contributor)) - .into_iter() - .count() as i64 - }; - - let author_commits = count_commits_for(&commit_view.author); - let committer_commits = count_commits_for(&commit_view.committer); - - let commit_count = match affiliation.affiliated_type { - AffiliatedType::Neither => 0, - AffiliatedType::Both => author_commits + committer_commits, - AffiliatedType::Author => author_commits, - AffiliatedType::Committer => committer_commits, - }; - - // Add string representation of affiliated contributor with count of associated commits - contributor_freq_map.insert(contributor, commit_count); - } - - let concerns = contributor_freq_map - .into_iter() - .map(|(contributor, count)| Concern::Affiliation { contributor, count }) - .collect(); - - Arc::new(HCAnalysisReport { - outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), - concerns, - }) -} - -pub fn binary_analysis(db: &dyn AnalysisProvider) -> Arc { - let results = match db.binary_metric() { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(results) => results, - }; - let value = results.binary_files.len() as u64; - let concerns = results - .binary_files - .clone() - .into_iter() - .map(|binary_file| Concern::Binary { - file_path: binary_file.as_ref().to_string(), - }) - .collect(); - Arc::new(HCAnalysisReport { - outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), - concerns, - }) -} - -pub fn churn_analysis(db: &dyn AnalysisProvider) -> Arc { - let results = match db.churn_metric() { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(results) => results, - }; - let value_threshold = *db.churn_value_threshold(); - let num_flagged = results - .commit_churn_freqs - .iter() - .filter(|c| c.churn.into_inner() > value_threshold) - .count() as u64; - let percent_flagged = num_flagged as f64 / results.commit_churn_freqs.len() as f64; - let value = F64::new(percent_flagged).expect("Percent threshold should never be NaN"); - let concerns = results - .commit_churn_freqs - .iter() - .filter(|c| c.churn.into_inner() > value_threshold) - .map(|cf| Concern::Churn { - commit_hash: cf.commit.hash.clone(), - score: cf.churn.into_inner(), - threshold: value_threshold, - }) - .collect::>(); - Arc::new(HCAnalysisReport { - outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), - concerns, - }) -} - -pub fn entropy_analysis(db: &dyn AnalysisProvider) -> Arc { - let results = match db.entropy_metric() { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(results) => results, - }; - let value_threshold = *db.entropy_value_threshold(); - let num_flagged = results - .commit_entropies - .iter() - .filter(|c| c.entropy.into_inner() > value_threshold) - .count() as u64; - let percent_flagged = num_flagged as f64 / results.commit_entropies.len() as f64; - - let value = F64::new(percent_flagged).expect("Percent threshold should never be NaN"); - let res_concerns = results - .commit_entropies - .iter() - .filter(|c| c.entropy.into_inner() > value_threshold) - .map(|cf| { - db.get_short_hash(Arc::new(cf.commit.hash.clone())) - .map(|commit_hash| Concern::Entropy { - commit_hash: commit_hash.trim().to_owned(), - score: cf.entropy.into_inner(), - threshold: value_threshold, - }) - }) - .collect::>>(); - let concerns = match res_concerns { - Ok(c) => c, - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - }; - - Arc::new(HCAnalysisReport { - outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), - concerns, - }) -} - -pub fn identity_analysis(db: &dyn AnalysisProvider) -> Arc { - let results = match db.identity_metric() { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(results) => results, - }; - let num_flagged = results - .matches - .iter() - .filter(|m| m.identities_match) - .count() as u64; - let percent_flagged = num_flagged as f64 / results.matches.len() as f64; - let value = F64::new(percent_flagged).expect("Percent threshold should never be NaN"); - - Arc::new(HCAnalysisReport { - outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), - concerns: vec![], - }) -} - -pub fn fuzz_analysis(db: &dyn AnalysisProvider) -> Arc { - let results = match db.fuzz_metric() { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(results) => results, - }; - let exists = results.fuzz_result.exists; - - Arc::new(HCAnalysisReport { - outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(exists.into())), - concerns: vec![], - }) -} - -pub fn review_analysis(db: &dyn AnalysisProvider) -> Arc { - let results = match db.review_metric() { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(results) => results, - }; - let num_flagged = results - .pull_reviews - .iter() - .filter(|p| p.has_review.not()) - .count() as u64; - - let percent_flagged = match (num_flagged, results.pull_reviews.len()) { - (flagged, total) if flagged != 0 && total != 0 => { - num_flagged as f64 / results.pull_reviews.len() as f64 - } - _ => 0.0, - }; - let value = F64::new(percent_flagged).expect("Percent threshold should never be NaN"); - - Arc::new(HCAnalysisReport { - outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), - concerns: vec![], - }) -} - -pub fn typo_analysis(db: &dyn AnalysisProvider) -> Arc { - let results = match db.typo_metric() { - Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), - Ok(results) => results, - }; - let num_flagged = results.typos.len() as u64; - - let concerns: Vec<_> = results - .typos - .iter() - .map(|typodep| Concern::Typo { - dependency_name: typodep.dependency.to_string(), - }) - .collect::>() - .into_iter() - .collect(); - - Arc::new(HCAnalysisReport { - outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(num_flagged.into())), - concerns, - }) -} - -fn score_by_threshold(value: T, threshold: T) -> i64 { - if value > threshold { - 1 - } else { - 0 - } -} - -fn score_by_threshold_reversed(value: T, threshold: T) -> i64 { - if value >= threshold { - 0 - } else { - 1 - } -} diff --git a/hipcheck/src/analysis/mod.rs b/hipcheck/src/analysis/mod.rs index a8dcf9c9..a494ec18 100644 --- a/hipcheck/src/analysis/mod.rs +++ b/hipcheck/src/analysis/mod.rs @@ -1,10 +1,407 @@ // SPDX-License-Identifier: Apache-2.0 -#[allow(clippy::module_inception)] -pub mod analysis; pub mod report_builder; pub mod result; pub mod score; -pub use analysis::AnalysisProvider; -pub use analysis::AnalysisProviderStorage; +use crate::analysis::result::*; +use crate::config::AttacksConfigQuery; +use crate::config::CommitConfigQuery; +use crate::config::FuzzConfigQuery; +use crate::config::PracticesConfigQuery; +use crate::data::git::GitProvider; +use crate::error::Error; +use crate::error::Result; +use crate::metric::affiliation::AffiliatedType; +use crate::metric::MetricProvider; +use crate::report::Concern; +use crate::F64; +use std::collections::HashMap; +use std::collections::HashSet; +use std::default::Default; +use std::fmt; +use std::fmt::Display; +use std::fmt::Formatter; +use std::ops::Not; +use std::sync::Arc; + +/// Queries about analyses +#[salsa::query_group(AnalysisProviderStorage)] +pub trait AnalysisProvider: + AttacksConfigQuery + + CommitConfigQuery + + GitProvider + + MetricProvider + + FuzzConfigQuery + + PracticesConfigQuery +{ + /// Returns result of activity analysis + fn activity_analysis(&self) -> Arc; + + /// Returns result of affiliation analysis + fn affiliation_analysis(&self) -> Arc; + + /// Returns result of binary analysis + fn binary_analysis(&self) -> Arc; + + /// Returns result of churn analysis + fn churn_analysis(&self) -> Arc; + + /// Returns result of entropy analysis + fn entropy_analysis(&self) -> Arc; + + /// Returns result of identity analysis + fn identity_analysis(&self) -> Arc; + + /// Returns result of fuzz analysis + fn fuzz_analysis(&self) -> Arc; + + /// Returns result of review analysis + fn review_analysis(&self) -> Arc; + + /// Returns result of typo analysis + fn typo_analysis(&self) -> Arc; +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum AnalysisReport { + /// Affiliation analysis result. + Affiliation { + value: u64, + threshold: u64, + outcome: AnalysisOutcome, + concerns: Vec, + }, + /// Binary file analysis result. + Binary { + value: u64, + threshold: u64, + outcome: AnalysisOutcome, + concerns: Vec, + }, + /// Churn analysis result. + Churn { + value: F64, + threshold: F64, + outcome: AnalysisOutcome, + concerns: Vec, + }, + /// Entropy analysis result. + Entropy { + value: F64, + threshold: F64, + outcome: AnalysisOutcome, + concerns: Vec, + }, + /// Identity analysis result. + Identity { + value: F64, + threshold: F64, + outcome: AnalysisOutcome, + concerns: Vec, + }, + /// Fuzz repo analysis result. + Fuzz { + value: bool, + outcome: AnalysisOutcome, + concerns: Vec, + }, + /// Review analysis result. + Review { + value: F64, + threshold: F64, + outcome: AnalysisOutcome, + concerns: Vec, + }, + /// Typo analysis result. + Typo { + value: u64, + threshold: u64, + outcome: AnalysisOutcome, + concerns: Vec, + }, + /// "Result" for a skipped or errored analysis + None { outcome: AnalysisOutcome }, +} + +impl Default for AnalysisReport { + fn default() -> AnalysisReport { + AnalysisReport::None { + outcome: AnalysisOutcome::Skipped, + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Default)] +pub enum AnalysisOutcome { + #[default] + Skipped, + Error(Error), + Pass(String), + Fail(String), +} + +impl Display for AnalysisOutcome { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + AnalysisOutcome::Skipped => write!(f, "SKIPPED"), + AnalysisOutcome::Error(msg) => write!(f, "ERROR {}", msg), + AnalysisOutcome::Pass(msg) => write!(f, "PASS {}", msg), + AnalysisOutcome::Fail(msg) => write!(f, "FAIL {}", msg), + } + } +} + +pub fn activity_analysis(db: &dyn AnalysisProvider) -> Arc { + let results = match db.activity_metric() { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(results) => results, + }; + let value = results.time_since_last_commit.num_weeks() as u64; + Arc::new(HCAnalysisReport { + outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), + concerns: vec![], + }) +} + +pub fn affiliation_analysis(db: &dyn AnalysisProvider) -> Arc { + let results = match db.affiliation_metric() { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(results) => results, + }; + + let affiliated_iter = results + .affiliations + .iter() + .filter(|a| a.affiliated_type.is_affiliated()); + + let value = affiliated_iter.clone().count() as u64; + + let mut contributor_freq_map = HashMap::new(); + + for affiliation in affiliated_iter { + let commit_view = match db.contributors_for_commit(Arc::clone(&affiliation.commit)) { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(cv) => cv, + }; + + let contributor = match affiliation.affiliated_type { + AffiliatedType::Author => String::from(&commit_view.author.name), + AffiliatedType::Committer => String::from(&commit_view.committer.name), + AffiliatedType::Neither => String::from("Neither"), + AffiliatedType::Both => String::from("Both"), + }; + + let count_commits_for = |contributor| { + db.commits_for_contributor(Arc::clone(contributor)) + .into_iter() + .count() as i64 + }; + + let author_commits = count_commits_for(&commit_view.author); + let committer_commits = count_commits_for(&commit_view.committer); + + let commit_count = match affiliation.affiliated_type { + AffiliatedType::Neither => 0, + AffiliatedType::Both => author_commits + committer_commits, + AffiliatedType::Author => author_commits, + AffiliatedType::Committer => committer_commits, + }; + + // Add string representation of affiliated contributor with count of associated commits + contributor_freq_map.insert(contributor, commit_count); + } + + let concerns = contributor_freq_map + .into_iter() + .map(|(contributor, count)| Concern::Affiliation { contributor, count }) + .collect(); + + Arc::new(HCAnalysisReport { + outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), + concerns, + }) +} + +pub fn binary_analysis(db: &dyn AnalysisProvider) -> Arc { + let results = match db.binary_metric() { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(results) => results, + }; + let value = results.binary_files.len() as u64; + let concerns = results + .binary_files + .clone() + .into_iter() + .map(|binary_file| Concern::Binary { + file_path: binary_file.as_ref().to_string(), + }) + .collect(); + Arc::new(HCAnalysisReport { + outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), + concerns, + }) +} + +pub fn churn_analysis(db: &dyn AnalysisProvider) -> Arc { + let results = match db.churn_metric() { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(results) => results, + }; + let value_threshold = *db.churn_value_threshold(); + let num_flagged = results + .commit_churn_freqs + .iter() + .filter(|c| c.churn.into_inner() > value_threshold) + .count() as u64; + let percent_flagged = num_flagged as f64 / results.commit_churn_freqs.len() as f64; + let value = F64::new(percent_flagged).expect("Percent threshold should never be NaN"); + let concerns = results + .commit_churn_freqs + .iter() + .filter(|c| c.churn.into_inner() > value_threshold) + .map(|cf| Concern::Churn { + commit_hash: cf.commit.hash.clone(), + score: cf.churn.into_inner(), + threshold: value_threshold, + }) + .collect::>(); + Arc::new(HCAnalysisReport { + outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), + concerns, + }) +} + +pub fn entropy_analysis(db: &dyn AnalysisProvider) -> Arc { + let results = match db.entropy_metric() { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(results) => results, + }; + let value_threshold = *db.entropy_value_threshold(); + let num_flagged = results + .commit_entropies + .iter() + .filter(|c| c.entropy.into_inner() > value_threshold) + .count() as u64; + let percent_flagged = num_flagged as f64 / results.commit_entropies.len() as f64; + + let value = F64::new(percent_flagged).expect("Percent threshold should never be NaN"); + let res_concerns = results + .commit_entropies + .iter() + .filter(|c| c.entropy.into_inner() > value_threshold) + .map(|cf| { + db.get_short_hash(Arc::new(cf.commit.hash.clone())) + .map(|commit_hash| Concern::Entropy { + commit_hash: commit_hash.trim().to_owned(), + score: cf.entropy.into_inner(), + threshold: value_threshold, + }) + }) + .collect::>>(); + let concerns = match res_concerns { + Ok(c) => c, + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + }; + + Arc::new(HCAnalysisReport { + outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), + concerns, + }) +} + +pub fn identity_analysis(db: &dyn AnalysisProvider) -> Arc { + let results = match db.identity_metric() { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(results) => results, + }; + let num_flagged = results + .matches + .iter() + .filter(|m| m.identities_match) + .count() as u64; + let percent_flagged = num_flagged as f64 / results.matches.len() as f64; + let value = F64::new(percent_flagged).expect("Percent threshold should never be NaN"); + + Arc::new(HCAnalysisReport { + outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), + concerns: vec![], + }) +} + +pub fn fuzz_analysis(db: &dyn AnalysisProvider) -> Arc { + let results = match db.fuzz_metric() { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(results) => results, + }; + let exists = results.fuzz_result.exists; + + Arc::new(HCAnalysisReport { + outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(exists.into())), + concerns: vec![], + }) +} + +pub fn review_analysis(db: &dyn AnalysisProvider) -> Arc { + let results = match db.review_metric() { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(results) => results, + }; + let num_flagged = results + .pull_reviews + .iter() + .filter(|p| p.has_review.not()) + .count() as u64; + + let percent_flagged = match (num_flagged, results.pull_reviews.len()) { + (flagged, total) if flagged != 0 && total != 0 => { + num_flagged as f64 / results.pull_reviews.len() as f64 + } + _ => 0.0, + }; + let value = F64::new(percent_flagged).expect("Percent threshold should never be NaN"); + + Arc::new(HCAnalysisReport { + outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(value.into())), + concerns: vec![], + }) +} + +pub fn typo_analysis(db: &dyn AnalysisProvider) -> Arc { + let results = match db.typo_metric() { + Err(err) => return Arc::new(HCAnalysisReport::generic_error(err, vec![])), + Ok(results) => results, + }; + let num_flagged = results.typos.len() as u64; + + let concerns: Vec<_> = results + .typos + .iter() + .map(|typodep| Concern::Typo { + dependency_name: typodep.dependency.to_string(), + }) + .collect::>() + .into_iter() + .collect(); + + Arc::new(HCAnalysisReport { + outcome: HCAnalysisOutcome::Completed(HCAnalysisValue::Basic(num_flagged.into())), + concerns, + }) +} + +fn score_by_threshold(value: T, threshold: T) -> i64 { + if value > threshold { + 1 + } else { + 0 + } +} + +fn score_by_threshold_reversed(value: T, threshold: T) -> i64 { + if value >= threshold { + 0 + } else { + 1 + } +} diff --git a/hipcheck/src/analysis/report_builder.rs b/hipcheck/src/analysis/report_builder.rs index b9cedc34..9b58324b 100644 --- a/hipcheck/src/analysis/report_builder.rs +++ b/hipcheck/src/analysis/report_builder.rs @@ -8,8 +8,8 @@ use crate::error::Result; use crate::hc_error; use crate::report::Concern; pub use crate::report::*; -use crate::session::session::Session; -use crate::source::source::SourceQuery; +use crate::session::Session; +use crate::source::SourceQuery; use crate::version::VersionQuery; use std::default::Default; use std::result::Result as StdResult; diff --git a/hipcheck/src/analysis/score.rs b/hipcheck/src/analysis/score.rs index 59d2bae5..1dfa8a90 100644 --- a/hipcheck/src/analysis/score.rs +++ b/hipcheck/src/analysis/score.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 -use crate::analysis::analysis::AnalysisOutcome; use crate::analysis::result::*; +use crate::analysis::AnalysisOutcome; use crate::analysis::AnalysisProvider; use crate::config::{visit_leaves, WeightTree, WeightTreeProvider}; use crate::error::Result; diff --git a/hipcheck/src/cache/mod.rs b/hipcheck/src/cache/mod.rs index 1a270687..269ac9a3 100644 --- a/hipcheck/src/cache/mod.rs +++ b/hipcheck/src/cache/mod.rs @@ -1,4 +1,3 @@ -#[allow(clippy::module_inception)] #[allow(dead_code)] pub mod plugin_cache; pub mod repo_cache; diff --git a/hipcheck/src/cli.rs b/hipcheck/src/cli.rs index 2dd2341d..d0e1d706 100644 --- a/hipcheck/src/cli.rs +++ b/hipcheck/src/cli.rs @@ -9,7 +9,7 @@ use crate::hc_error; use crate::report::Format; use crate::session::pm; use crate::shell::{color_choice::ColorChoice, verbosity::Verbosity}; -use crate::source::source; +use crate::source; use crate::target::{ LocalGitRepo, MavenPackage, Package, PackageHost, Sbom, SbomStandard, TargetSeed, TargetSeedKind, TargetType, ToTargetSeed, ToTargetSeedKind, diff --git a/hipcheck/src/data/git/query/mod.rs b/hipcheck/src/data/git/query/mod.rs index b93bdac1..ef92fa3d 100644 --- a/hipcheck/src/data/git/query/mod.rs +++ b/hipcheck/src/data/git/query/mod.rs @@ -18,7 +18,7 @@ use crate::data::git::SignerKeyView; use crate::data::git::SignerNameView; use crate::data::git::SignerView; use crate::error::Result; -use crate::source::source::SourceQuery; +use crate::source::SourceQuery; use crate::version::VersionQuery; use chrono::prelude::*; use std::sync::Arc; diff --git a/hipcheck/src/data/query/code_quality.rs b/hipcheck/src/data/query/code_quality.rs index f55ba5bd..c74e04dc 100644 --- a/hipcheck/src/data/query/code_quality.rs +++ b/hipcheck/src/data/query/code_quality.rs @@ -7,7 +7,7 @@ use std::rc::Rc; use crate::data::code_quality::get_eslint_report; use crate::data::code_quality::CodeQualityReport; use crate::error::Result; -use crate::source::source::SourceQuery; +use crate::source::SourceQuery; use crate::version::VersionQuery; /// Queries about code quality diff --git a/hipcheck/src/data/query/dependencies.rs b/hipcheck/src/data/query/dependencies.rs index d3b3ea57..58a00274 100644 --- a/hipcheck/src/data/query/dependencies.rs +++ b/hipcheck/src/data/query/dependencies.rs @@ -6,7 +6,7 @@ use crate::data::npm::get_package_file; use crate::data::npm::PackageFile; use crate::data::Dependencies; use crate::error::Result; -use crate::source::source::SourceQuery; +use crate::source::SourceQuery; use crate::version::VersionQuery; use std::sync::Arc; diff --git a/hipcheck/src/data/query/github.rs b/hipcheck/src/data/query/github.rs index ac499ebe..7f8bb43c 100644 --- a/hipcheck/src/data/query/github.rs +++ b/hipcheck/src/data/query/github.rs @@ -4,7 +4,7 @@ use crate::error::Error; use crate::error::Result; -use crate::source::source::SourceQuery; +use crate::source::SourceQuery; use crate::target::KnownRemote; use std::sync::Arc; diff --git a/hipcheck/src/main.rs b/hipcheck/src/main.rs index e7c90098..083eb0e5 100644 --- a/hipcheck/src/main.rs +++ b/hipcheck/src/main.rs @@ -42,7 +42,7 @@ use crate::context::Context as _; use crate::error::Error; use crate::error::Result; use crate::plugin::{Plugin, PluginExecutor, PluginWithConfig}; -use crate::session::session::Session; +use crate::session::Session; use crate::setup::{resolve_and_transform_source, SourceType}; use crate::shell::verbosity::Verbosity; use crate::shell::Shell; diff --git a/hipcheck/src/metric/metric.rs b/hipcheck/src/metric/metric.rs deleted file mode 100644 index b6d757ea..00000000 --- a/hipcheck/src/metric/metric.rs +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -use std::sync::Arc; - -use crate::config::AttacksConfigQuery; -use crate::config::CommitConfigQuery; -use crate::data::git::GitProvider; -use crate::data::DependenciesProvider; -use crate::data::FuzzProvider; -use crate::data::ModuleProvider; -use crate::data::PullRequestReviewProvider; -use crate::error::Result; -use crate::metric::activity::{self, ActivityOutput}; -use crate::metric::affiliation::{self, AffiliationOutput}; -use crate::metric::binary::{self, BinaryOutput}; -use crate::metric::binary_detector::BinaryFile; -use crate::metric::churn::{self, ChurnOutput}; -use crate::metric::commit_trust::{self, CommitTrustOutput}; -use crate::metric::contributor_trust::{self, ContributorTrustOutput}; -use crate::metric::entropy::{self, EntropyOutput}; -use crate::metric::fuzz::{self, FuzzOutput}; -use crate::metric::identity::{self, IdentityOutput}; -use crate::metric::linguist::Linguist; -use crate::metric::module::{self, ModuleOutput}; -use crate::metric::review::{self, ReviewOutput}; -use crate::metric::typo::{self, TypoOutput}; - -/// Queries about metrics -#[salsa::query_group(MetricProviderStorage)] -pub trait MetricProvider: - AttacksConfigQuery - + BinaryFile - + CommitConfigQuery - + DependenciesProvider - + GitProvider - + Linguist - + ModuleProvider - + FuzzProvider - + PullRequestReviewProvider -{ - /// Returns result of activity metric - #[salsa::invoke(activity::activity_metric)] - fn activity_metric(&self) -> Result>; - - /// Returns result of affiliation metric - #[salsa::invoke(affiliation::affiliation_metric)] - fn affiliation_metric(&self) -> Result>; - - /// Returns result of binary metric - #[salsa::invoke(binary::binary_metric)] - fn binary_metric(&self) -> Result>; - - /// Returns result of churn metric - #[salsa::invoke(churn::churn_metric)] - fn churn_metric(&self) -> Result>; - - /// Returns result of contributor trust metric - #[salsa::invoke(commit_trust::commit_trust_metric)] - fn commit_trust_metric(&self) -> Result>; - - /// Returns result of contributor trust metric - #[salsa::invoke(contributor_trust::contributor_trust_metric)] - fn contributor_trust_metric(&self) -> Result>; - - /// Returns result of entropy metric - #[salsa::invoke(entropy::entropy_metric)] - fn entropy_metric(&self) -> Result>; - - /// Returns result of identity metric - #[salsa::invoke(identity::identity_metric)] - fn identity_metric(&self) -> Result>; - - /// Returns result of module analysis. - #[salsa::invoke(module::module_analysis)] - fn module_analysis(&self) -> Result>; - - /// Returns result of fuzz metric - #[salsa::invoke(fuzz::fuzz_metric)] - fn fuzz_metric(&self) -> Result>; - - /// Returns result of review metric - #[salsa::invoke(review::review_metric)] - fn review_metric(&self) -> Result>; - - /// Returns result of typo metric - #[salsa::invoke(typo::typo_metric)] - fn typo_metric(&self) -> Result>; -} diff --git a/hipcheck/src/metric/mod.rs b/hipcheck/src/metric/mod.rs index 2d01b6a4..7907cf30 100644 --- a/hipcheck/src/metric/mod.rs +++ b/hipcheck/src/metric/mod.rs @@ -12,11 +12,92 @@ pub mod fuzz; pub mod identity; pub mod linguist; mod math; -#[allow(clippy::module_inception)] -pub mod metric; pub mod module; pub mod review; pub mod typo; -pub use metric::MetricProvider; -pub use metric::MetricProviderStorage; +use crate::config::AttacksConfigQuery; +use crate::config::CommitConfigQuery; +use crate::data::git::GitProvider; +use crate::data::DependenciesProvider; +use crate::data::FuzzProvider; +use crate::data::ModuleProvider; +use crate::data::PullRequestReviewProvider; +use crate::error::Result; +use crate::metric::activity::ActivityOutput; +use crate::metric::affiliation::AffiliationOutput; +use crate::metric::binary::BinaryOutput; +use crate::metric::binary_detector::BinaryFile; +use crate::metric::churn::ChurnOutput; +use crate::metric::commit_trust::CommitTrustOutput; +use crate::metric::contributor_trust::ContributorTrustOutput; +use crate::metric::entropy::EntropyOutput; +use crate::metric::fuzz::FuzzOutput; +use crate::metric::identity::IdentityOutput; +use crate::metric::linguist::Linguist; +use crate::metric::module::ModuleOutput; +use crate::metric::review::ReviewOutput; +use crate::metric::typo::TypoOutput; +use std::sync::Arc; + +/// Queries about metrics +#[salsa::query_group(MetricProviderStorage)] +pub trait MetricProvider: + AttacksConfigQuery + + BinaryFile + + CommitConfigQuery + + DependenciesProvider + + GitProvider + + Linguist + + ModuleProvider + + FuzzProvider + + PullRequestReviewProvider +{ + /// Returns result of activity metric + #[salsa::invoke(activity::activity_metric)] + fn activity_metric(&self) -> Result>; + + /// Returns result of affiliation metric + #[salsa::invoke(affiliation::affiliation_metric)] + fn affiliation_metric(&self) -> Result>; + + /// Returns result of binary metric + #[salsa::invoke(binary::binary_metric)] + fn binary_metric(&self) -> Result>; + + /// Returns result of churn metric + #[salsa::invoke(churn::churn_metric)] + fn churn_metric(&self) -> Result>; + + /// Returns result of contributor trust metric + #[salsa::invoke(commit_trust::commit_trust_metric)] + fn commit_trust_metric(&self) -> Result>; + + /// Returns result of contributor trust metric + #[salsa::invoke(contributor_trust::contributor_trust_metric)] + fn contributor_trust_metric(&self) -> Result>; + + /// Returns result of entropy metric + #[salsa::invoke(entropy::entropy_metric)] + fn entropy_metric(&self) -> Result>; + + /// Returns result of identity metric + #[salsa::invoke(identity::identity_metric)] + fn identity_metric(&self) -> Result>; + + /// Returns result of module analysis. + #[salsa::invoke(module::module_analysis)] + fn module_analysis(&self) -> Result>; + + /// Returns result of fuzz metric + #[salsa::invoke(fuzz::fuzz_metric)] + fn fuzz_metric(&self) -> Result>; + + /// Returns result of review metric + #[salsa::invoke(review::review_metric)] + fn review_metric(&self) -> Result>; + + /// Returns result of typo metric + #[salsa::invoke(typo::typo_metric)] + fn typo_metric(&self) -> Result>; +} diff --git a/hipcheck/src/session/mod.rs b/hipcheck/src/session/mod.rs index 8d0a45aa..474dd048 100644 --- a/hipcheck/src/session/mod.rs +++ b/hipcheck/src/session/mod.rs @@ -2,6 +2,398 @@ pub mod cyclone_dx; pub mod pm; -#[allow(clippy::module_inception)] -pub mod session; pub mod spdx; + +use crate::analysis::score::ScoringProviderStorage; +use crate::analysis::AnalysisProviderStorage; +use crate::command_util::DependentProgram; +use crate::config::AttacksConfigQueryStorage; +use crate::config::CommitConfigQueryStorage; +use crate::config::Config; +use crate::config::ConfigSource; +use crate::config::ConfigSourceStorage; +use crate::config::FuzzConfigQueryStorage; +use crate::config::LanguagesConfigQueryStorage; +use crate::config::PracticesConfigQueryStorage; +use crate::config::RiskConfigQueryStorage; +use crate::config::WeightTreeQueryStorage; +use crate::context::Context as _; +use crate::data::git::get_git_version; +use crate::data::git::GitProviderStorage; +use crate::data::npm::get_npm_version; +use crate::data::CodeQualityProviderStorage; +use crate::data::DependenciesProviderStorage; +use crate::data::FuzzProviderStorage; +use crate::data::GitHubProviderStorage; +use crate::data::ModuleProvider; +use crate::data::ModuleProviderStorage; +use crate::data::PullRequestReviewProviderStorage; +use crate::error::Error; +use crate::error::Result; +use crate::hc_error; +use crate::metric::binary_detector::BinaryFileStorage; +use crate::metric::linguist::LinguistStorage; +use crate::metric::MetricProviderStorage; +use crate::report::Format; +use crate::report::ReportParams; +use crate::report::ReportParamsStorage; +use crate::session::cyclone_dx::extract_cyclonedx_download_url; +use crate::session::pm::detect_and_extract; +use crate::session::pm::extract_repo_for_maven; +use crate::session::spdx::extract_spdx_download_url; +use crate::shell::spinner_phase::SpinnerPhase; +use crate::shell::Shell; +use crate::source; +use crate::source::SourceQuery; +use crate::source::SourceQueryStorage; +use crate::target::SbomStandard; +use crate::target::{Target, TargetSeed, TargetSeedKind}; +use crate::version::get_version; +use crate::version::VersionQuery; +use crate::version::VersionQueryStorage; +use chrono::prelude::*; +use dotenv::var; +use std::fmt; +use std::path::Path; +use std::path::PathBuf; +use std::rc::Rc; +use std::result::Result as StdResult; +use std::sync::Arc; +use std::time::Duration; +use url::Url; + +/// Immutable configuration and base data for a run of Hipcheck. +#[salsa::database( + AnalysisProviderStorage, + AttacksConfigQueryStorage, + BinaryFileStorage, + CodeQualityProviderStorage, + CommitConfigQueryStorage, + ConfigSourceStorage, + DependenciesProviderStorage, + GitProviderStorage, + GitHubProviderStorage, + LanguagesConfigQueryStorage, + LinguistStorage, + MetricProviderStorage, + ModuleProviderStorage, + FuzzConfigQueryStorage, + FuzzProviderStorage, + PracticesConfigQueryStorage, + PullRequestReviewProviderStorage, + ReportParamsStorage, + RiskConfigQueryStorage, + ScoringProviderStorage, + SourceQueryStorage, + VersionQueryStorage, + WeightTreeQueryStorage +)] +pub struct Session { + // Query storage. + storage: salsa::Storage, +} + +// Required by our query groups +impl salsa::Database for Session {} + +// Cannot be derived because `salsa::Storage` does not implement it +impl fmt::Debug for Session { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Session {{ storage: salsa::Storage }}") + } +} + +impl Session { + // Note that error handling in the constructor for `Session` is a little awkward. + // This is because we want to be able to hand back the `Shell` passed in if setup + // fails, so instead of using the `?` operator, we need to do the returning manually. + // + // You may think we could use `map_err` and the question mark operator to bundle + // the shell with whatever error we have and hand them back, but unfortunately this + // doesn't work. When you use `shell` in the `map_err` closure, you're moving it + // unconditionally, even though `map_err`'s closure is only run in the case of + // an error (in which case you're also returning early), the Rust compiler isn't + // smart enough to figure that out. Maybe this will improve in the future, but for + // now, we have to do it by hand. + + /// Construct a new `Session` which owns all the data needed in later phases. + #[allow(clippy::too_many_arguments)] + pub fn new( + target: &TargetSeed, + config_path: Option, + data_path: Option, + home_dir: Option, + policy_path: Option, + format: Format, + raw_version: &str, + ) -> StdResult { + /*=================================================================== + * Setting up the session. + *-----------------------------------------------------------------*/ + + // Input query setters are implemented on `Session`, not + // `salsa::Storage` + let mut session = Session { + storage: Default::default(), + }; + + /*=================================================================== + * Printing the prelude. + *-----------------------------------------------------------------*/ + + Shell::print_prelude(target.to_string()); + + /*=================================================================== + * Loading current versions of needed software git, npm, and eslint into salsa. + *-----------------------------------------------------------------*/ + + let (git_version, npm_version) = match load_software_versions() { + Ok(results) => results, + Err(err) => return Err(err), + }; + + session.set_git_version(Rc::new(git_version)); + session.set_npm_version(Rc::new(npm_version)); + + /*=================================================================== + * Loading configuration. + *-----------------------------------------------------------------*/ + + // Check if a currently unsuporrted policy file was provided + // TODO: Remove this error once policy files are supported + if policy_path.is_some() { + return Err(hc_error!( + "Policy files are not supported by Hipcheck at this time." + )); + } + + let (config, config_dir, data_dir, hc_github_token) = + match load_config_and_data(config_path.as_deref(), data_path.as_deref()) { + Ok(results) => results, + Err(err) => return Err(err), + }; + + // Set config input queries for use below + session.set_config(Rc::new(config)); + session.set_config_dir(Rc::new(config_dir)); + + // Set data folder location for module analysis + session.set_data_dir(Arc::new(data_dir)); + + // Set github token in salsa + session.set_github_api_token(Some(Rc::new(hc_github_token))); + + /*=================================================================== + * Resolving the Hipcheck home. + *-----------------------------------------------------------------*/ + + let home = match home_dir + .as_deref() + .map(ToOwned::to_owned) + .ok_or_else(|| hc_error!("can't find cache directory")) + { + Ok(results) => results, + Err(err) => return Err(err), + }; + + /*=================================================================== + * Resolving the source. + *-----------------------------------------------------------------*/ + + let target = match load_target(target, &home) { + Ok(results) => results, + Err(err) => return Err(err), + }; + + session.set_target(Arc::new(target)); + + /*=================================================================== + * Resolving the Hipcheck version. + *-----------------------------------------------------------------*/ + + let version = match get_version(raw_version) { + Ok(version) => version, + Err(err) => return Err(err), + }; + + session.set_hc_version(Rc::new(version)); + + /*=================================================================== + * Remaining input queries. + *-----------------------------------------------------------------*/ + + // Set remaining input queries + session.set_format(format); + session.set_started_at(Local::now().into()); + + Ok(session) + } +} + +fn load_software_versions() -> Result<(String, String)> { + let git_version = get_git_version()?; + DependentProgram::Git.check_version(&git_version)?; + + let npm_version = get_npm_version()?; + DependentProgram::Npm.check_version(&npm_version)?; + + Ok((git_version, npm_version)) +} + +fn load_config_and_data( + config_path: Option<&Path>, + data_path: Option<&Path>, +) -> Result<(Config, PathBuf, PathBuf, String)> { + // Start the phase. + let phase = SpinnerPhase::start("loading configuration and data files"); + // Increment the phase into the "running" stage. + phase.inc(); + // Set the spinner phase to tick constantly, 10 times a second. + phase.enable_steady_tick(Duration::from_millis(100)); + + // Resolve the path to the config file. + let valid_config_path = config_path + .ok_or_else(|| hc_error!("Failed to load configuration. Please make sure the path set by the hc_config env variable exists."))?; + + // Load the configuration file. + let config = Config::load_from(valid_config_path) + .context("Failed to load configuration. If you have not yet done so on this system, try running `hc setup`. Otherwise, please make sure the config files are in the config directory.")?; + + // Get the directory the data file is in. + let data_dir = data_path + .ok_or_else(|| hc_error!("Failed to load data files. Please make sure the path set by the hc_data env variable exists."))? + .to_owned(); + + // Resolve the github token file. + let hc_github_token = resolve_token()?; + + phase.finish_successful(); + + Ok(( + config, + valid_config_path.to_path_buf(), + data_dir, + hc_github_token, + )) +} + +fn load_target(seed: &TargetSeed, home: &Path) -> Result { + // Resolve the source specifier into an actual source. + let phase_desc = match seed.kind { + TargetSeedKind::LocalRepo(_) | TargetSeedKind::RemoteRepo(_) => { + "resolving git repository target" + } + TargetSeedKind::Package(_) => "resolving package target", + TargetSeedKind::Sbom(_) => "parsing SBOM document", + TargetSeedKind::MavenPackage(_) => "resolving maven package target", + }; + + let phase = SpinnerPhase::start(phase_desc); + // Set the phase to tick steadily 10 times a second. + phase.enable_steady_tick(Duration::from_millis(100)); + let target = resolve_target(seed, &phase, home)?; + phase.finish_successful(); + + Ok(target) +} + +/// Resolves github token for Hipcheck to query github with. +fn resolve_token() -> Result { + match var("HC_GITHUB_TOKEN") { + Ok(token) => Ok(token), + _ => Ok("".to_string()), + } +} + +/// Resolves the target specifier into an actual target. +fn resolve_target(seed: &TargetSeed, phase: &SpinnerPhase, home: &Path) -> Result { + use TargetSeedKind::*; + #[cfg(feature = "print-timings")] + let _0 = crate::benchmarking::print_scope_time!("resolve_source"); + + match &seed.kind { + RemoteRepo(remote) => { + source::resolve_remote_repo(phase, home, remote.to_owned(), seed.refspec.clone()) + } + LocalRepo(source) => { + // Because other TargetSeedKind variants need to transfer refspec info from the CLI, + // there's overlap with LocalGitRepo.git_ref. Copy CLI refspec here. + let mut source = source.to_owned(); + source.git_ref = seed.refspec.clone().unwrap_or("HEAD".to_owned()); + source::resolve_local_repo(phase, home, source) + } + Package(package) => { + // Attempt to get the git repo URL for the package + let package_git_repo_url = + detect_and_extract(package).context("Could not get git repo URL for package")?; + + // Create Target for a remote git repo originating with a package + let package_git_repo = source::get_remote_repo_from_url(package_git_repo_url)?; + // TargetSeed validation step should have already ensured both refspec and package + // version are not provided, so we can do this + let refspec = if let Some(refspec) = &seed.refspec { + Some(refspec.to_owned()) + } else if package.has_version() { + Some(package.version.to_owned()) + } else { + None + }; + source::resolve_remote_package_repo( + phase, + home, + package_git_repo, + format!("{}@{}", package.name, package.version), + refspec, + ) + } + MavenPackage(package) => { + // Attempt to get the git repo URL for the Maven package + let package_git_repo_url = extract_repo_for_maven(package.url.as_ref()) + .context("Could not get git repo URL for Maven package")?; + + // Create Target for a remote git repo originating with a Maven package + let package_git_repo = source::get_remote_repo_from_url(package_git_repo_url)?; + // We do not currently harvest version info from the maven url + source::resolve_remote_package_repo( + phase, + home, + package_git_repo, + package.url.to_string(), + seed.refspec.clone(), + ) + } + Sbom(sbom) => { + let source = sbom.path.to_str().ok_or(hc_error!( + "SBOM path contained one or more invalid characters" + ))?; + // Attempt to get the download location for the local SBOM package, using the function + // appropriate to the SBOM standard + let download_url = match sbom.standard { + SbomStandard::Spdx => Url::parse(&extract_spdx_download_url(source)?)?, + SbomStandard::CycloneDX => extract_cyclonedx_download_url(source)?, + }; + + // Create a Target for a remote git repo originating with an SBOM + let sbom_git_repo = source::get_remote_repo_from_url(download_url)?; + source::resolve_remote_package_repo( + phase, + home, + sbom_git_repo, + source.to_string(), + seed.refspec.clone(), + ) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::test::with_env_vars; + + #[test] + fn resolve_token_test() { + let vars = vec![("HC_GITHUB_TOKEN", Some("test"))]; + with_env_vars(vars, || assert_eq!(resolve_token().unwrap(), "test")); + } +} diff --git a/hipcheck/src/session/session.rs b/hipcheck/src/session/session.rs deleted file mode 100644 index 005d29ce..00000000 --- a/hipcheck/src/session/session.rs +++ /dev/null @@ -1,399 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -// mod pm; -// mod spdx; - -use crate::analysis::score::ScoringProviderStorage; -use crate::analysis::AnalysisProviderStorage; -use crate::command_util::DependentProgram; -use crate::config::AttacksConfigQueryStorage; -use crate::config::CommitConfigQueryStorage; -use crate::config::Config; -use crate::config::ConfigSource; -use crate::config::ConfigSourceStorage; -use crate::config::FuzzConfigQueryStorage; -use crate::config::LanguagesConfigQueryStorage; -use crate::config::PracticesConfigQueryStorage; -use crate::config::RiskConfigQueryStorage; -use crate::config::WeightTreeQueryStorage; -use crate::context::Context as _; -use crate::data::git::get_git_version; -use crate::data::git::GitProviderStorage; -use crate::data::npm::get_npm_version; -use crate::data::CodeQualityProviderStorage; -use crate::data::DependenciesProviderStorage; -use crate::data::FuzzProviderStorage; -use crate::data::GitHubProviderStorage; -use crate::data::ModuleProvider; -use crate::data::ModuleProviderStorage; -use crate::data::PullRequestReviewProviderStorage; -use crate::error::Error; -use crate::error::Result; -use crate::hc_error; -use crate::metric::binary_detector::BinaryFileStorage; -use crate::metric::linguist::LinguistStorage; -use crate::metric::MetricProviderStorage; -use crate::report::Format; -use crate::report::ReportParams; -use crate::report::ReportParamsStorage; -use crate::session::pm::detect_and_extract; -use crate::session::spdx::extract_spdx_download_url; -use crate::shell::spinner_phase::SpinnerPhase; -use crate::shell::Shell; -use crate::source::source; -use crate::source::source::SourceQuery; -use crate::source::source::SourceQueryStorage; -use crate::target::SbomStandard; -use crate::target::{Target, TargetSeed, TargetSeedKind}; -use crate::version::get_version; -use crate::version::VersionQuery; -use crate::version::VersionQueryStorage; -use chrono::prelude::*; -use dotenv::var; -use std::fmt; -use std::path::Path; -use std::path::PathBuf; -use std::rc::Rc; -use std::result::Result as StdResult; -use std::sync::Arc; -use std::time::Duration; -use url::Url; - -use super::cyclone_dx::extract_cyclonedx_download_url; -use super::pm::extract_repo_for_maven; - -/// Immutable configuration and base data for a run of Hipcheck. -#[salsa::database( - AnalysisProviderStorage, - AttacksConfigQueryStorage, - BinaryFileStorage, - CodeQualityProviderStorage, - CommitConfigQueryStorage, - ConfigSourceStorage, - DependenciesProviderStorage, - GitProviderStorage, - GitHubProviderStorage, - LanguagesConfigQueryStorage, - LinguistStorage, - MetricProviderStorage, - ModuleProviderStorage, - FuzzConfigQueryStorage, - FuzzProviderStorage, - PracticesConfigQueryStorage, - PullRequestReviewProviderStorage, - ReportParamsStorage, - RiskConfigQueryStorage, - ScoringProviderStorage, - SourceQueryStorage, - VersionQueryStorage, - WeightTreeQueryStorage -)] -pub struct Session { - // Query storage. - storage: salsa::Storage, -} - -// Required by our query groups -impl salsa::Database for Session {} - -// Cannot be derived because `salsa::Storage` does not implement it -impl fmt::Debug for Session { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Session {{ storage: salsa::Storage }}") - } -} - -impl Session { - // Note that error handling in the constructor for `Session` is a little awkward. - // This is because we want to be able to hand back the `Shell` passed in if setup - // fails, so instead of using the `?` operator, we need to do the returning manually. - // - // You may think we could use `map_err` and the question mark operator to bundle - // the shell with whatever error we have and hand them back, but unfortunately this - // doesn't work. When you use `shell` in the `map_err` closure, you're moving it - // unconditionally, even though `map_err`'s closure is only run in the case of - // an error (in which case you're also returning early), the Rust compiler isn't - // smart enough to figure that out. Maybe this will improve in the future, but for - // now, we have to do it by hand. - - /// Construct a new `Session` which owns all the data needed in later phases. - #[allow(clippy::too_many_arguments)] - pub fn new( - target: &TargetSeed, - config_path: Option, - data_path: Option, - home_dir: Option, - policy_path: Option, - format: Format, - raw_version: &str, - ) -> StdResult { - /*=================================================================== - * Setting up the session. - *-----------------------------------------------------------------*/ - - // Input query setters are implemented on `Session`, not - // `salsa::Storage` - let mut session = Session { - storage: Default::default(), - }; - - /*=================================================================== - * Printing the prelude. - *-----------------------------------------------------------------*/ - - Shell::print_prelude(target.to_string()); - - /*=================================================================== - * Loading current versions of needed software git, npm, and eslint into salsa. - *-----------------------------------------------------------------*/ - - let (git_version, npm_version) = match load_software_versions() { - Ok(results) => results, - Err(err) => return Err(err), - }; - - session.set_git_version(Rc::new(git_version)); - session.set_npm_version(Rc::new(npm_version)); - - /*=================================================================== - * Loading configuration. - *-----------------------------------------------------------------*/ - - // Check if a currently unsuporrted policy file was provided - // TODO: Remove this error once policy files are supported - if policy_path.is_some() { - return Err(hc_error!( - "Policy files are not supported by Hipcheck at this time." - )); - } - - let (config, config_dir, data_dir, hc_github_token) = - match load_config_and_data(config_path.as_deref(), data_path.as_deref()) { - Ok(results) => results, - Err(err) => return Err(err), - }; - - // Set config input queries for use below - session.set_config(Rc::new(config)); - session.set_config_dir(Rc::new(config_dir)); - - // Set data folder location for module analysis - session.set_data_dir(Arc::new(data_dir)); - - // Set github token in salsa - session.set_github_api_token(Some(Rc::new(hc_github_token))); - - /*=================================================================== - * Resolving the Hipcheck home. - *-----------------------------------------------------------------*/ - - let home = match home_dir - .as_deref() - .map(ToOwned::to_owned) - .ok_or_else(|| hc_error!("can't find cache directory")) - { - Ok(results) => results, - Err(err) => return Err(err), - }; - - /*=================================================================== - * Resolving the source. - *-----------------------------------------------------------------*/ - - let target = match load_target(target, &home) { - Ok(results) => results, - Err(err) => return Err(err), - }; - - session.set_target(Arc::new(target)); - - /*=================================================================== - * Resolving the Hipcheck version. - *-----------------------------------------------------------------*/ - - let version = match get_version(raw_version) { - Ok(version) => version, - Err(err) => return Err(err), - }; - - session.set_hc_version(Rc::new(version)); - - /*=================================================================== - * Remaining input queries. - *-----------------------------------------------------------------*/ - - // Set remaining input queries - session.set_format(format); - session.set_started_at(Local::now().into()); - - Ok(session) - } -} - -fn load_software_versions() -> Result<(String, String)> { - let git_version = get_git_version()?; - DependentProgram::Git.check_version(&git_version)?; - - let npm_version = get_npm_version()?; - DependentProgram::Npm.check_version(&npm_version)?; - - Ok((git_version, npm_version)) -} - -fn load_config_and_data( - config_path: Option<&Path>, - data_path: Option<&Path>, -) -> Result<(Config, PathBuf, PathBuf, String)> { - // Start the phase. - let phase = SpinnerPhase::start("loading configuration and data files"); - // Increment the phase into the "running" stage. - phase.inc(); - // Set the spinner phase to tick constantly, 10 times a second. - phase.enable_steady_tick(Duration::from_millis(100)); - - // Resolve the path to the config file. - let valid_config_path = config_path - .ok_or_else(|| hc_error!("Failed to load configuration. Please make sure the path set by the hc_config env variable exists."))?; - - // Load the configuration file. - let config = Config::load_from(valid_config_path) - .context("Failed to load configuration. If you have not yet done so on this system, try running `hc setup`. Otherwise, please make sure the config files are in the config directory.")?; - - // Get the directory the data file is in. - let data_dir = data_path - .ok_or_else(|| hc_error!("Failed to load data files. Please make sure the path set by the hc_data env variable exists."))? - .to_owned(); - - // Resolve the github token file. - let hc_github_token = resolve_token()?; - - phase.finish_successful(); - - Ok(( - config, - valid_config_path.to_path_buf(), - data_dir, - hc_github_token, - )) -} - -fn load_target(seed: &TargetSeed, home: &Path) -> Result { - // Resolve the source specifier into an actual source. - let phase_desc = match seed.kind { - TargetSeedKind::LocalRepo(_) | TargetSeedKind::RemoteRepo(_) => { - "resolving git repository target" - } - TargetSeedKind::Package(_) => "resolving package target", - TargetSeedKind::Sbom(_) => "parsing SBOM document", - TargetSeedKind::MavenPackage(_) => "resolving maven package target", - }; - - let phase = SpinnerPhase::start(phase_desc); - // Set the phase to tick steadily 10 times a second. - phase.enable_steady_tick(Duration::from_millis(100)); - let target = resolve_target(seed, &phase, home)?; - phase.finish_successful(); - - Ok(target) -} - -/// Resolves github token for Hipcheck to query github with. -fn resolve_token() -> Result { - match var("HC_GITHUB_TOKEN") { - Ok(token) => Ok(token), - _ => Ok("".to_string()), - } -} - -/// Resolves the target specifier into an actual target. -fn resolve_target(seed: &TargetSeed, phase: &SpinnerPhase, home: &Path) -> Result { - use TargetSeedKind::*; - #[cfg(feature = "print-timings")] - let _0 = crate::benchmarking::print_scope_time!("resolve_source"); - - match &seed.kind { - RemoteRepo(remote) => { - source::resolve_remote_repo(phase, home, remote.to_owned(), seed.refspec.clone()) - } - LocalRepo(source) => { - // Because other TargetSeedKind variants need to transfer refspec info from the CLI, - // there's overlap with LocalGitRepo.git_ref. Copy CLI refspec here. - let mut source = source.to_owned(); - source.git_ref = seed.refspec.clone().unwrap_or("HEAD".to_owned()); - source::resolve_local_repo(phase, home, source) - } - Package(package) => { - // Attempt to get the git repo URL for the package - let package_git_repo_url = - detect_and_extract(package).context("Could not get git repo URL for package")?; - - // Create Target for a remote git repo originating with a package - let package_git_repo = source::get_remote_repo_from_url(package_git_repo_url)?; - // TargetSeed validation step should have already ensured both refspec and package - // version are not provided, so we can do this - let refspec = if let Some(refspec) = &seed.refspec { - Some(refspec.to_owned()) - } else if package.has_version() { - Some(package.version.to_owned()) - } else { - None - }; - source::resolve_remote_package_repo( - phase, - home, - package_git_repo, - format!("{}@{}", package.name, package.version), - refspec, - ) - } - MavenPackage(package) => { - // Attempt to get the git repo URL for the Maven package - let package_git_repo_url = extract_repo_for_maven(package.url.as_ref()) - .context("Could not get git repo URL for Maven package")?; - - // Create Target for a remote git repo originating with a Maven package - let package_git_repo = source::get_remote_repo_from_url(package_git_repo_url)?; - // We do not currently harvest version info from the maven url - source::resolve_remote_package_repo( - phase, - home, - package_git_repo, - package.url.to_string(), - seed.refspec.clone(), - ) - } - Sbom(sbom) => { - let source = sbom.path.to_str().ok_or(hc_error!( - "SBOM path contained one or more invalid characters" - ))?; - // Attempt to get the download location for the local SBOM package, using the function - // appropriate to the SBOM standard - let download_url = match sbom.standard { - SbomStandard::Spdx => Url::parse(&extract_spdx_download_url(source)?)?, - SbomStandard::CycloneDX => extract_cyclonedx_download_url(source)?, - }; - - // Create a Target for a remote git repo originating with an SBOM - let sbom_git_repo = source::get_remote_repo_from_url(download_url)?; - source::resolve_remote_package_repo( - phase, - home, - sbom_git_repo, - source.to_string(), - seed.refspec.clone(), - ) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::util::test::with_env_vars; - - #[test] - fn resolve_token_test() { - let vars = vec![("HC_GITHUB_TOKEN", Some("test"))]; - with_env_vars(vars, || assert_eq!(resolve_token().unwrap(), "test")); - } -} diff --git a/hipcheck/src/source/mod.rs b/hipcheck/src/source/mod.rs index 19585d37..cc6eb132 100644 --- a/hipcheck/src/source/mod.rs +++ b/hipcheck/src/source/mod.rs @@ -2,5 +2,335 @@ pub mod git; pub mod query; -#[allow(clippy::module_inception)] -pub mod source; + +use crate::context::Context; +use crate::data::git_command::GitCommand; +use crate::error::Error; +use crate::error::Result; +use crate::hc_error; +use crate::shell::spinner_phase::SpinnerPhase; +pub use crate::source::query::*; +use crate::target::{KnownRemote, LocalGitRepo, RemoteGitRepo, Target}; +use log::debug; +use pathbuf::pathbuf; +use std::path::Path; +use std::path::PathBuf; +use url::Host; +use url::Url; + +/// Resolving is how we ensure we have a valid, ready-to-go source of Git data +/// for the rest of Hipcheck's analysis. The below functions handle the resolution +/// of local or remote repos. +/// +/// If the repo is local, the resolve function will work with the local repository +/// without cloning (all operationsare write-only, so this won't harm the repo at +/// all). +/// +/// If it's a remote source, Hipcheck will clone the source so it can work with a +/// local copy, putting the clone in '/clones'. It also notes whether a +/// remote repo is from a known or unknown host, because some forms of analysis +/// rely on accessing the API's of certain known hosts (currently just GitHub). +/// +/// In either case, it also gets the commit head of the HEAD commit, so we can +/// make sure future operations are all done relative to the HEAD, and that any +/// cached data records what the HEAD was at the time of caching, to enable +/// cache invalidation. + +/// Resolves a specified local git repo into a Target for analysis by Hipcheck +pub fn resolve_local_repo( + phase: &SpinnerPhase, + root: &Path, + local_repo: LocalGitRepo, +) -> Result { + let src = local_repo.path.clone(); + + let specifier = src + .to_str() + .ok_or(hc_error!( + "Path to local repo contained one or more invalid characters" + ))? + .to_string(); + + phase.update_status("copying"); + let path = clone_local_repo_to_cache(src.as_path(), root)?; + let git_ref = git::checkout(&path, Some(local_repo.git_ref.clone()))?; + phase.update_status("trying to get remote"); + let remote = match try_resolve_remote_for_local(&path) { + Ok(remote) => Some(remote), + Err(err) => { + log::debug!("failed to get remote [err='{}']", err); + None + } + }; + + let local = LocalGitRepo { path, git_ref }; + + Ok(Target { + specifier, + local, + remote, + package: None, + }) +} + +/// Creates a RemoteGitRepo struct from a given git URL by idenfitying if it is from a known host (currently only GitHub) or not +pub fn get_remote_repo_from_url(url: Url) -> Result { + match url.host() { + Some(Host::Domain("github.com")) => { + let (owner, repo) = get_github_owner_and_repo(&url)?; + Ok(RemoteGitRepo { + url, + known_remote: Some(KnownRemote::GitHub { owner, repo }), + }) + } + Some(_) => Ok(RemoteGitRepo { + url, + known_remote: None, + }), + None => Err(hc_error!("Target repo URL is missing a host")), + } +} + +/// Resolves a remote git repo originally specified by its remote location into a Target for analysis by Hipcheck +pub fn resolve_remote_repo( + phase: &SpinnerPhase, + root: &Path, + remote_repo: RemoteGitRepo, + refspec: Option, +) -> Result { + // For remote repos originally specified by their URL, the specifier is just that URL + let specifier = remote_repo.url.to_string(); + + let path = match remote_repo.known_remote { + Some(KnownRemote::GitHub { + ref owner, + ref repo, + }) => pathbuf![root, "clones", "github", owner, repo], + _ => { + let clone_dir = build_unknown_remote_clone_dir(&remote_repo.url) + .context("failed to prepare local clone directory")?; + pathbuf![root, "clones", "unknown", &clone_dir] + } + }; + + let git_ref = clone_or_update_remote(phase, &remote_repo.url, &path, refspec)?; + + let local = LocalGitRepo { path, git_ref }; + + Ok(Target { + specifier, + local, + remote: Some(remote_repo), + package: None, + }) +} + +/// Resolves a remote git repo derived from a source other than its remote location (e.g. a package or SPDX file) into a Target for analysis by Hipcheck +pub fn resolve_remote_package_repo( + phase: &SpinnerPhase, + root: &Path, + remote_repo: RemoteGitRepo, + specifier: String, + refspec: Option, +) -> Result { + let mut target = resolve_remote_repo(phase, root, remote_repo, refspec)?; + target.specifier = specifier; + Ok(target) +} + +fn try_resolve_remote_for_local(local: &Path) -> Result { + let url = { + let symbolic_ref = get_symbolic_ref(local)?; + + log::trace!("local source has symbolic ref [ref='{:?}']", symbolic_ref); + + if symbolic_ref.is_empty() { + return Err(Error::msg("no symbolic ref found")); + } + + let upstream = get_upstream_for_ref(local, &symbolic_ref)?; + + log::trace!("local source has upstream [upstream='{:?}']", upstream); + + if upstream.is_empty() { + return Err(Error::msg("no upstream found")); + } + + let remote = get_remote_from_upstream(&upstream) + .ok_or_else(|| hc_error!("failed to get remote name from upstream '{}'", upstream))?; + + log::trace!("local source has remote [remote='{:?}']", remote); + + if remote.is_empty() { + return Err(Error::msg("no remote found")); + } + + let raw = get_url_for_remote(local, remote)?; + + log::trace!("local source remote has url [url='{}']", raw); + + if raw.is_empty() { + return Err(Error::msg("no URL found for remote")); + } + + Url::parse(&raw)? + }; + + let host = url + .host_str() + .ok_or_else(|| hc_error!("no host name in '{}'", url))?; + + match host { + "github.com" => { + let (owner, repo) = get_github_owner_and_repo(&url)?; + Ok(RemoteGitRepo { + url, + known_remote: Some(KnownRemote::GitHub { owner, repo }), + }) + } + _ => Ok(RemoteGitRepo { + url, + known_remote: None, + }), + } +} + +fn get_remote_from_upstream(upstream: &str) -> Option<&str> { + upstream.split('/').next() +} + +pub fn get_github_owner_and_repo(url: &Url) -> Result<(String, String)> { + let mut segments = url + .path_segments() + .ok_or_else(|| Error::msg("GitHub URL missing path for owner and repository"))?; + + let owner = segments + .next() + .ok_or_else(|| Error::msg("GitHub URL missing owner"))? + .to_owned(); + + let repo = segments + .next() + .ok_or_else(|| Error::msg("GitHub URL missing repository"))? + .trim_end_matches(".git") + .to_owned(); + + Ok((owner, repo)) +} + +#[allow(dead_code)] +fn get_github_owner_repo_and_pull_request(url: &Url) -> Result<(String, String, u64)> { + let mut segments = url.path_segments().ok_or_else(|| { + Error::msg("GitHub URL missing path for owner, repository, and pull request number") + })?; + + let owner = segments + .next() + .ok_or_else(|| Error::msg("GitHub URL missing owner"))? + .to_owned(); + + let repo = segments + .next() + .ok_or_else(|| Error::msg("GitHub URL missing repository"))? + .to_owned(); + + let test_pull = segments.next(); + + if test_pull == Some("pull") { + let pull_request = segments + .next() + .ok_or_else(|| Error::msg("GitHub URL missing pull request number"))? + .to_owned(); + let pull_request_number: u64 = pull_request.parse().unwrap(); + debug!("Pull request number: {}", pull_request_number); + + Ok((owner, repo, pull_request_number)) + } else { + Err(Error::msg("GitHub URL not a pull request")) + } +} + +fn build_unknown_remote_clone_dir(url: &Url) -> Result { + let mut dir = String::new(); + + // Add the host to the destination. + // Unfortunately, due to borrowing issues, this is being recomputed here. + let host = url + .host_str() + .ok_or_else(|| Error::msg("remote URL missing host"))?; + dir.push_str(host); + + // Add each of the path segments. + let segments = url + .path_segments() + .ok_or_else(|| Error::msg("remote URL missing path"))?; + + for segment in segments { + dir.push_str("__"); + dir.push_str(segment); + } + + Ok(dir) +} + +fn clone_local_repo_to_cache(src: &Path, root: &Path) -> Result { + let src = src.canonicalize()?; + let hc_data_root = pathbuf![root, "clones"]; + // If src dir is already in HC_CACHE/clones, leave it be. else clone from local fs + if src.starts_with(&hc_data_root) { + return Ok(src); + } + let dest = pathbuf![&hc_data_root, "local", src.file_name().unwrap()]; + if dest.exists() { + std::fs::remove_dir_all(&dest)?; + } + let src_str = src + .to_str() + .ok_or_else(|| hc_error!("source isn't UTF-8 encoded '{}'", src.display()))?; + let dest_str = dest + .to_str() + .ok_or_else(|| hc_error!("destination isn't UTF-8 encoded '{}'", dest.display()))?; + let _output = GitCommand::new_repo(["clone", src_str, dest_str])?.output()?; + Ok(dest) +} + +pub fn clone_or_update_remote( + phase: &SpinnerPhase, + url: &Url, + dest: &Path, + refspec: Option, +) -> Result { + if dest.exists() { + phase.update_status("pulling"); + git::fetch(dest).context("failed to update remote repository")?; + } else { + phase.update_status("cloning"); + git::clone(url, dest).context("failed to clone remote repository")?; + } + git::checkout(dest, refspec) +} + +fn get_symbolic_ref(dest: &Path) -> Result { + let output = GitCommand::for_repo(dest, ["symbolic-ref", "-q", "HEAD"])? + .output() + .context("Git failed to get symbolic ref for HEAD")?; + + Ok(output.trim().to_owned()) +} + +fn get_upstream_for_ref(dest: &Path, symbolic_ref: &str) -> Result { + let output = GitCommand::for_repo( + dest, + ["for-each-ref", "--format=%(upstream:short)", symbolic_ref], + )? + .output() + .context("Git failed to get name of upstream for HEAD")?; + + Ok(output.trim().to_owned()) +} + +fn get_url_for_remote(dest: &Path, remote: &str) -> Result { + let output = GitCommand::for_repo(dest, ["remote", "get-url", remote])?.output()?; + + Ok(output.trim().to_owned()) +} diff --git a/hipcheck/src/source/source.rs b/hipcheck/src/source/source.rs deleted file mode 100644 index c4b066aa..00000000 --- a/hipcheck/src/source/source.rs +++ /dev/null @@ -1,334 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -use super::git; -use crate::context::Context; -use crate::data::git_command::GitCommand; -use crate::error::Error; -use crate::error::Result; -use crate::hc_error; -use crate::shell::spinner_phase::SpinnerPhase; -pub use crate::source::query::*; -use crate::target::{KnownRemote, LocalGitRepo, RemoteGitRepo, Target}; -use log::debug; -use pathbuf::pathbuf; -use std::path::Path; -use std::path::PathBuf; -use url::Host; -use url::Url; - -/// Resolving is how we ensure we have a valid, ready-to-go source of Git data -/// for the rest of Hipcheck's analysis. The below functions handle the resolution -/// of local or remote repos. -/// -/// If the repo is local, the resolve function will work with the local repository -/// without cloning (all operationsare write-only, so this won't harm the repo at -/// all). -/// -/// If it's a remote source, Hipcheck will clone the source so it can work with a -/// local copy, putting the clone in '/clones'. It also notes whether a -/// remote repo is from a known or unknown host, because some forms of analysis -/// rely on accessing the API's of certain known hosts (currently just GitHub). -/// -/// In either case, it also gets the commit head of the HEAD commit, so we can -/// make sure future operations are all done relative to the HEAD, and that any -/// cached data records what the HEAD was at the time of caching, to enable -/// cache invalidation. - -/// Resolves a specified local git repo into a Target for analysis by Hipcheck -pub fn resolve_local_repo( - phase: &SpinnerPhase, - root: &Path, - local_repo: LocalGitRepo, -) -> Result { - let src = local_repo.path.clone(); - - let specifier = src - .to_str() - .ok_or(hc_error!( - "Path to local repo contained one or more invalid characters" - ))? - .to_string(); - - phase.update_status("copying"); - let path = clone_local_repo_to_cache(src.as_path(), root)?; - let git_ref = git::checkout(&path, Some(local_repo.git_ref.clone()))?; - phase.update_status("trying to get remote"); - let remote = match try_resolve_remote_for_local(&path) { - Ok(remote) => Some(remote), - Err(err) => { - log::debug!("failed to get remote [err='{}']", err); - None - } - }; - - let local = LocalGitRepo { path, git_ref }; - - Ok(Target { - specifier, - local, - remote, - package: None, - }) -} - -/// Creates a RemoteGitRepo struct from a given git URL by idenfitying if it is from a known host (currently only GitHub) or not -pub fn get_remote_repo_from_url(url: Url) -> Result { - match url.host() { - Some(Host::Domain("github.com")) => { - let (owner, repo) = get_github_owner_and_repo(&url)?; - Ok(RemoteGitRepo { - url, - known_remote: Some(KnownRemote::GitHub { owner, repo }), - }) - } - Some(_) => Ok(RemoteGitRepo { - url, - known_remote: None, - }), - None => Err(hc_error!("Target repo URL is missing a host")), - } -} - -/// Resolves a remote git repo originally specified by its remote location into a Target for analysis by Hipcheck -pub fn resolve_remote_repo( - phase: &SpinnerPhase, - root: &Path, - remote_repo: RemoteGitRepo, - refspec: Option, -) -> Result { - // For remote repos originally specified by their URL, the specifier is just that URL - let specifier = remote_repo.url.to_string(); - - let path = match remote_repo.known_remote { - Some(KnownRemote::GitHub { - ref owner, - ref repo, - }) => pathbuf![root, "clones", "github", owner, repo], - _ => { - let clone_dir = build_unknown_remote_clone_dir(&remote_repo.url) - .context("failed to prepare local clone directory")?; - pathbuf![root, "clones", "unknown", &clone_dir] - } - }; - - let git_ref = clone_or_update_remote(phase, &remote_repo.url, &path, refspec)?; - - let local = LocalGitRepo { path, git_ref }; - - Ok(Target { - specifier, - local, - remote: Some(remote_repo), - package: None, - }) -} - -/// Resolves a remote git repo derived from a source other than its remote location (e.g. a package or SPDX file) into a Target for analysis by Hipcheck -pub fn resolve_remote_package_repo( - phase: &SpinnerPhase, - root: &Path, - remote_repo: RemoteGitRepo, - specifier: String, - refspec: Option, -) -> Result { - let mut target = resolve_remote_repo(phase, root, remote_repo, refspec)?; - target.specifier = specifier; - Ok(target) -} - -fn try_resolve_remote_for_local(local: &Path) -> Result { - let url = { - let symbolic_ref = get_symbolic_ref(local)?; - - log::trace!("local source has symbolic ref [ref='{:?}']", symbolic_ref); - - if symbolic_ref.is_empty() { - return Err(Error::msg("no symbolic ref found")); - } - - let upstream = get_upstream_for_ref(local, &symbolic_ref)?; - - log::trace!("local source has upstream [upstream='{:?}']", upstream); - - if upstream.is_empty() { - return Err(Error::msg("no upstream found")); - } - - let remote = get_remote_from_upstream(&upstream) - .ok_or_else(|| hc_error!("failed to get remote name from upstream '{}'", upstream))?; - - log::trace!("local source has remote [remote='{:?}']", remote); - - if remote.is_empty() { - return Err(Error::msg("no remote found")); - } - - let raw = get_url_for_remote(local, remote)?; - - log::trace!("local source remote has url [url='{}']", raw); - - if raw.is_empty() { - return Err(Error::msg("no URL found for remote")); - } - - Url::parse(&raw)? - }; - - let host = url - .host_str() - .ok_or_else(|| hc_error!("no host name in '{}'", url))?; - - match host { - "github.com" => { - let (owner, repo) = get_github_owner_and_repo(&url)?; - Ok(RemoteGitRepo { - url, - known_remote: Some(KnownRemote::GitHub { owner, repo }), - }) - } - _ => Ok(RemoteGitRepo { - url, - known_remote: None, - }), - } -} - -fn get_remote_from_upstream(upstream: &str) -> Option<&str> { - upstream.split('/').next() -} - -pub fn get_github_owner_and_repo(url: &Url) -> Result<(String, String)> { - let mut segments = url - .path_segments() - .ok_or_else(|| Error::msg("GitHub URL missing path for owner and repository"))?; - - let owner = segments - .next() - .ok_or_else(|| Error::msg("GitHub URL missing owner"))? - .to_owned(); - - let repo = segments - .next() - .ok_or_else(|| Error::msg("GitHub URL missing repository"))? - .trim_end_matches(".git") - .to_owned(); - - Ok((owner, repo)) -} - -#[allow(dead_code)] -fn get_github_owner_repo_and_pull_request(url: &Url) -> Result<(String, String, u64)> { - let mut segments = url.path_segments().ok_or_else(|| { - Error::msg("GitHub URL missing path for owner, repository, and pull request number") - })?; - - let owner = segments - .next() - .ok_or_else(|| Error::msg("GitHub URL missing owner"))? - .to_owned(); - - let repo = segments - .next() - .ok_or_else(|| Error::msg("GitHub URL missing repository"))? - .to_owned(); - - let test_pull = segments.next(); - - if test_pull == Some("pull") { - let pull_request = segments - .next() - .ok_or_else(|| Error::msg("GitHub URL missing pull request number"))? - .to_owned(); - let pull_request_number: u64 = pull_request.parse().unwrap(); - debug!("Pull request number: {}", pull_request_number); - - Ok((owner, repo, pull_request_number)) - } else { - Err(Error::msg("GitHub URL not a pull request")) - } -} - -fn build_unknown_remote_clone_dir(url: &Url) -> Result { - let mut dir = String::new(); - - // Add the host to the destination. - // Unfortunately, due to borrowing issues, this is being recomputed here. - let host = url - .host_str() - .ok_or_else(|| Error::msg("remote URL missing host"))?; - dir.push_str(host); - - // Add each of the path segments. - let segments = url - .path_segments() - .ok_or_else(|| Error::msg("remote URL missing path"))?; - - for segment in segments { - dir.push_str("__"); - dir.push_str(segment); - } - - Ok(dir) -} - -fn clone_local_repo_to_cache(src: &Path, root: &Path) -> Result { - let src = src.canonicalize()?; - let hc_data_root = pathbuf![root, "clones"]; - // If src dir is already in HC_CACHE/clones, leave it be. else clone from local fs - if src.starts_with(&hc_data_root) { - return Ok(src); - } - let dest = pathbuf![&hc_data_root, "local", src.file_name().unwrap()]; - if dest.exists() { - std::fs::remove_dir_all(&dest)?; - } - let src_str = src - .to_str() - .ok_or_else(|| hc_error!("source isn't UTF-8 encoded '{}'", src.display()))?; - let dest_str = dest - .to_str() - .ok_or_else(|| hc_error!("destination isn't UTF-8 encoded '{}'", dest.display()))?; - let _output = GitCommand::new_repo(["clone", src_str, dest_str])?.output()?; - Ok(dest) -} - -pub fn clone_or_update_remote( - phase: &SpinnerPhase, - url: &Url, - dest: &Path, - refspec: Option, -) -> Result { - if dest.exists() { - phase.update_status("pulling"); - git::fetch(dest).context("failed to update remote repository")?; - } else { - phase.update_status("cloning"); - git::clone(url, dest).context("failed to clone remote repository")?; - } - git::checkout(dest, refspec) -} - -fn get_symbolic_ref(dest: &Path) -> Result { - let output = GitCommand::for_repo(dest, ["symbolic-ref", "-q", "HEAD"])? - .output() - .context("Git failed to get symbolic ref for HEAD")?; - - Ok(output.trim().to_owned()) -} - -fn get_upstream_for_ref(dest: &Path, symbolic_ref: &str) -> Result { - let output = GitCommand::for_repo( - dest, - ["for-each-ref", "--format=%(upstream:short)", symbolic_ref], - )? - .output() - .context("Git failed to get name of upstream for HEAD")?; - - Ok(output.trim().to_owned()) -} - -fn get_url_for_remote(dest: &Path, remote: &str) -> Result { - let output = GitCommand::for_repo(dest, ["remote", "get-url", remote])?.output()?; - - Ok(output.trim().to_owned()) -}