Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CFG Refactor + Bugfix #20

Merged
merged 3 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "bin2ml"
version = "0.3.2"
version = "0.4.0"
edition = "2021"

[dependencies]
Expand Down
91 changes: 47 additions & 44 deletions src/agfj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,17 +136,13 @@ impl AGFJFunc {
}
pub fn create_bb_edge_list(&mut self, min_blocks: &u16) {
if self.blocks.len() > (*min_blocks).into() && self.blocks[0].offset != 1 {
let mut addr_idxs = Vec::<i64>::new();

let bb_start_addrs: Vec<i64> = self.blocks.iter().map(|x| x.offset).collect::<Vec<_>>();
let mut edge_list = Vec::<(u32, u32, u32)>::new();

let min_offset: u64 = self.offset;
let max_offset: u64 = self.offset + self.size.unwrap_or(0);

for bb in &self.blocks {
bb.get_block_edges(&mut addr_idxs, &mut edge_list, max_offset, min_offset)
bb.get_block_edges(&bb_start_addrs, &mut edge_list)
}
self.addr_idx = Some(addr_idxs);
self.addr_idx = Some(bb_start_addrs);
self.edge_list = Some(edge_list);
}
}
Expand Down Expand Up @@ -279,23 +275,28 @@ impl AGFJFunc {
feature_type: FeatureType,
inference_job: &Option<Arc<InferenceJob>>,
) {
/*
This function needs some serious sorting out.

- Need to get GPU toggle-able
- Need to use new CFG edge builder
- General refactor
*/
info!("Processing {:?}", self.name);
let full_output_path =
get_save_file_path(path, output_path, Some(".json".to_string()), None, None);
check_or_create_dir(&full_output_path);

// offset != 1 has been added to skip functions with invalid instructions
if self.blocks.len() >= (*min_blocks).into() && self.blocks[0].offset != 1 {
let mut addr_idxs = Vec::<i64>::new();

let bb_start_addrs: Vec<i64> = self.blocks.iter().map(|x| x.offset).collect::<Vec<_>>();
let mut edge_list = Vec::<(u32, u32, u32)>::new();

let mut feature_vecs = Vec::<_>::new();
let mut feature_vec_of_vecs = Vec::<_>::new();
let min_offset = self.offset;
let max_offset = self.offset + self.size.unwrap_or(0);

for bb in &self.blocks {
bb.get_block_edges(&mut addr_idxs, &mut edge_list, max_offset, min_offset);
bb.get_block_edges(&bb_start_addrs, &mut edge_list);
if inference_job.is_some() {
let inference = inference_job.as_ref().unwrap().clone();
match feature_type {
Expand Down Expand Up @@ -393,7 +394,6 @@ impl AGFJFunc {
if !Path::new(&fname_string).is_file() {
// offset != 1 has been added to skip functions with invalid instructions
if self.blocks.len() >= (*min_blocks).into() && self.blocks[0].offset != 1 {
let mut addr_idxs = Vec::<i64>::new();
let mut edge_list = Vec::<(u32, u32, u32)>::new();

let mut feature_vecs: StringOrF64 = match feature_type {
Expand All @@ -411,36 +411,30 @@ impl AGFJFunc {
}
};

let min_offset: u64 = self.offset;
let max_offset: u64 = self.offset + self.size.unwrap_or(0);
let bb_start_addrs: Vec<i64> =
self.blocks.iter().map(|x| x.offset).collect::<Vec<_>>();

match feature_type {
FeatureType::Tiknib
| FeatureType::Gemini
| FeatureType::DiscovRE
| FeatureType::DGIS => {
let feature_vecs = feature_vecs.as_f64_mut().unwrap();
for bb in &self.blocks {
bb.get_block_edges(
&mut addr_idxs,
&mut edge_list,
max_offset,
min_offset,
);
bb.get_block_edges(&bb_start_addrs, &mut edge_list);
bb.generate_bb_feature_vec(feature_vecs, feature_type, architecture);
}
debug!("Number of Feature Vecs: {}", feature_vecs.len());
assert_eq!(self.blocks.len(), feature_vecs.len())
}
FeatureType::Esil | FeatureType::Disasm | FeatureType::Pseudo => {
let feature_vecs = feature_vecs.as_string_mut().unwrap();
for bb in &self.blocks {
bb.get_block_edges(
&mut addr_idxs,
&mut edge_list,
max_offset,
min_offset,
);
bb.get_block_edges(&bb_start_addrs, &mut edge_list);
bb.generate_bb_feature_strings(feature_vecs, feature_type, true);
}
debug!("Number of Feature Vecs: {}", feature_vecs.len())
debug!("Number of Feature Vecs: {}", feature_vecs.len());
assert_eq!(self.blocks.len(), feature_vecs.len())
}
FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => {
info!("Invalid Feature Type. Skipping..");
Expand All @@ -454,10 +448,16 @@ impl AGFJFunc {
edge_list.is_empty(),
edge_list.len()
);
if !edge_list.is_empty() {
let mut graph = Graph::<std::string::String, u32>::from_edges(&edge_list);

Self::str_to_hex_node_idxs(&mut graph, &mut addr_idxs);
if !edge_list.is_empty() {
let mut graph = Graph::<String, u32>::from_edges(&edge_list);
Self::str_to_hex_node_idxs(&mut graph, &bb_start_addrs);
if graph.node_count() != self.blocks.len() {
debug!("Graph for {} does not have the same number of nodes as basic blocks - N: {} B: {}. This suggests \
there is something wrong with the CFG edge recovery. If this is a problem, please raise a GitHub issue!",
self.name, graph.node_count(), self.blocks.len());
return;
}

// Unpack the NodeTypes to the inner Types
if feature_type == FeatureType::Gemini {
Expand Down Expand Up @@ -577,22 +577,24 @@ impl AGFJFunc {
info!("Function {} has no edges. Skipping...", self.name)
}
} else {
info!(
debug!(
"Function {} has less than the minimum number of blocks. Skipping..",
self.name
);
}
} else {
info!(
"Function {} has already been processed. Skipping...",
self.name
)
trace!("Function has fewer basic blocks than the minimum. Skipping...");
}
} else {
debug!(
"Function {} has already been processed. Skipping...",
self.name
)
}
}

// Convert string memory address to hex / string
fn str_to_hex_node_idxs(graph: &mut Graph<String, u32>, addr_idxs: &mut [i64]) {
fn str_to_hex_node_idxs(graph: &mut Graph<String, u32>, addr_idxs: &[i64]) {
for idx in graph.node_indices() {
let i_idx = idx.index();
let hex = addr_idxs[i_idx];
Expand Down Expand Up @@ -860,18 +862,19 @@ mod tests {

// Check edge list output is the correct format
let expected_edge_list = Some(vec![
(0, 2, 1),
(0, 1, 1),
(0, 2, 2),
(2, 3, 1),
(1, 3, 1),
(2, 3, 1),
(3, 5, 1),
(3, 4, 1),
(3, 5, 2),
(4, 8, 1),
(5, 7, 1),
(5, 6, 1),
(4, 7, 1),
(4, 8, 2),
(8, 6, 1),
(7, 6, 1),
(6, 8, 1),
(7, 8, 1),
]);

assert_eq!(target_func.edge_list, expected_edge_list)
}
}
75 changes: 13 additions & 62 deletions src/bb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,70 +446,21 @@ impl ACFJBlock {
}
num_offspring
}

// Get the edges associated with a given basic block.
// This function only considers valid edges as being
// fail, jumps or switchops that reside within the function itself.
// If there are edges that jump to another function outside of the program
// these edges are ignored.
//
// This function updates the provide mutable edge list with a three-tuple which
// represents (src, dst, weight). The weight in this case is the type of edge where
// 1 denotes jump, 2 denotes fail, 3 denotes switchop
pub fn get_block_edges(
&self,
addr_idxs: &mut Vec<i64>,
edge_list: &mut Vec<(u32, u32, u32)>,
max_offset: u64,
min_offset: u64,
) {
let mut addr: i64 = self.offset;
let mut jump: i64 = self.jump;
let mut fail: i64 = self.fail;

if addr < min_offset.try_into().unwrap() || addr >= max_offset.try_into().unwrap() {
addr = -1;
}

if jump < min_offset.try_into().unwrap() || jump >= max_offset.try_into().unwrap() {
jump = -1;
}

if fail < min_offset.try_into().unwrap() || fail >= max_offset.try_into().unwrap() {
fail = -1;
}

if addr != -1 && !addr_idxs.contains(&addr) {
addr_idxs.push(addr);
}
if jump != -1 && !addr_idxs.contains(&jump) {
addr_idxs.push(jump)
}

if fail != -1 && !addr_idxs.contains(&fail) {
addr_idxs.push(fail)
}

let addr_idx = addr_idxs.iter().position(|&p| p == addr);

if let Some(addr_idx) = addr_idx {
if jump != -1 {
let jump_idx = addr_idxs.iter().position(|&p| p == jump).unwrap();
edge_list.push((addr_idx as u32, jump_idx as u32, 1));
}

if fail != -1 {
let fail_idx = addr_idxs.iter().position(|&p| p == fail).unwrap();
edge_list.push((addr_idx as u32, fail_idx as u32, 2));
pub fn get_block_edges(&self, bb_start_addrs: &[i64], edge_list: &mut Vec<(u32, u32, u32)>) {
let offset_idx = bb_start_addrs.iter().position(|&p| p == self.offset);

if let Some(offset_idx) = offset_idx {
if self.jump != -1 {
let jump_idx = bb_start_addrs.iter().position(|&p| p == self.jump);
if let Some(jump_idx) = jump_idx {
edge_list.push((offset_idx as u32, jump_idx as u32, 1));
}
}

if self.switchop.is_some() {
for item in &self.switchop.as_ref().unwrap().cases {
if !addr_idxs.contains(&item.jump) {
addr_idxs.push(item.jump)
}
let item_addr_idx = addr_idxs.iter().position(|&p| p == item.jump).unwrap();
edge_list.push((addr_idx as u32, item_addr_idx as u32, 3));
if self.fail != -1 {
let fail_idx = bb_start_addrs.iter().position(|&p| p == self.fail);
if let Some(fail_idx) = fail_idx {
edge_list.push((offset_idx as u32, fail_idx as u32, 1));
}
}
}
Expand Down