diff --git a/.github/workflows/publish_crate.yml b/.github/workflows/publish_crate.yml index 0acd914..a2ef8b3 100644 --- a/.github/workflows/publish_crate.yml +++ b/.github/workflows/publish_crate.yml @@ -22,7 +22,7 @@ jobs: run: cargo fmt --check check_clippy: - name: Check clippy + name: Check clippy: no features runs-on: ubuntu-latest steps: @@ -41,6 +41,46 @@ jobs: - name: Run clippy run: cargo clippy --all-targets -- -D warnings + check_clippy_serde: + name: Check clippy: features: serde + runs-on: ubuntu-latest + + steps: + - name: Checkout reposistory + uses: actions/checkout@v4 + + - name: Setup Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + + - name: Setup clippy + run: rustup component add clippy + + - name: Run clippy + run: cargo clippy --all-targets --features serde -- -D warnings + + check_clippy_all_features: + name: Check clippy: features: all + runs-on: ubuntu-latest + + steps: + - name: Checkout reposistory + uses: actions/checkout@v4 + + - name: Setup Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + + - name: Setup clippy + run: rustup component add clippy + + - name: Run clippy + run: cargo clippy --all-targets --all-features -- -D warnings + run_tests: name: Run tests runs-on: ubuntu-latest @@ -58,6 +98,19 @@ jobs: - name: Run tests run: cargo test --workspace + msrv: + runs-on: ubuntu-latest + steps: + - name: Checkout reposistory + uses: actions/checkout@v4 + + - name: Setup MSRV checker + uses: taiki-e/install-action@cargo-hack + + - name: Run MSRV checker + run: cargo hack check --rust-version --workspace --all-targets --ignore-private + + publish: name: Publish runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index f25e209..bf3b90a 100644 --- a/.gitignore +++ b/.gitignore @@ -170,4 +170,3 @@ cython_debug/ # Added by cargo /target -/Cargo.lock diff --git a/CHANGELOG.md b/CHANGELOG.md index 96dca4c..b2a85f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [2.5.0] - 2024-08-09 + +### Added + +- Add Minimal Supported Rust Version (MSRV) to Cargo.toml. +- Add `MapFile::new_from_map_file` function to simplify `MapFile` creation. +- Add `serde` feature to the Rust crate. + - Allows serializing and deserializing a `MapFile` object using serde. + +### Changed + +- Tweak symbol comparison logic a bit. + - Symbol shifting (due to different sizes or extra/missing symbols) should + not affect comparing non shifted files. +- `Cargo.lock` file is now committed to the repo. +- Change Rust functions to properly take references instead of consuming the + argument. + +### Fixed + +- Fix `MapFile::find_lowest_differing_symbol` not returning a previous symbol + from a previous file if the symbol found is the first symbol from the file. + ## [2.4.0] - 2024-03-25 ### Added @@ -343,6 +366,7 @@ Full changes: "] description = "Map file parser library focusing decompilation projects" readme = "README.md" -homepage = "https://github.com/Decompollaborate/mapfile_parser" repository = "https://github.com/Decompollaborate/mapfile_parser" license = "MIT" keywords = ["mapfile", "parser", "decomp", "decompilation"] @@ -25,6 +25,8 @@ crate-type = ["cdylib", "staticlib", "rlib"] regex = "1.10.2" pyo3 = { version = "0.20.0", optional = true, features = ["abi3", "abi3-py37"]} lazy_static = "1.4.0" +serde = { version = "1.0", features = ["derive"], optional = true } [features] python_bindings = ["dep:pyo3"] +serde = ["dep:serde"] diff --git a/README.md b/README.md index ad1cdb8..cad04a0 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ If you use a `requirements.txt` file in your repository, then you can add this library with the following line: ```txt -mapfile_parser>=2.4.0,<3.0.0 +mapfile_parser>=2.5.0,<3.0.0 ``` #### Development version @@ -74,7 +74,7 @@ cargo add mapfile_parser Or add the following line manually to your `Cargo.toml` file: ```toml -mapfile_parser = "2.4.0" +mapfile_parser = "2.5.0" ``` ## Versioning and changelog diff --git a/pyproject.toml b/pyproject.toml index 526aedc..7010282 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ [project] name = "mapfile_parser" -version = "2.4.0" +version = "2.5.0" description = "Map file parser library focusing decompilation projects" readme = "README.md" requires-python = ">=3.8" diff --git a/src/mapfile_parser/__init__.py b/src/mapfile_parser/__init__.py index 0042ee5..6b7c6be 100644 --- a/src/mapfile_parser/__init__.py +++ b/src/mapfile_parser/__init__.py @@ -5,8 +5,8 @@ from __future__ import annotations -__version_info__ = (2, 4, 0) -__version__ = ".".join(map(str, __version_info__)) +__version_info__ = (2, 5, 0) +__version__ = ".".join(map(str, __version_info__))# + "-dev0" __author__ = "Decompollaborate" from . import utils as utils diff --git a/src/mapfile_parser/mapfile.py b/src/mapfile_parser/mapfile.py index b9aa284..bae239e 100644 --- a/src/mapfile_parser/mapfile.py +++ b/src/mapfile_parser/mapfile.py @@ -50,7 +50,28 @@ class SymbolComparisonInfo: buildFile: File|None expectedAddress: int expectedFile: File|None - diff: int|None + + @property + def diff(self) -> int|None: + if self.buildAddress < 0: + return None + if self.expectedAddress < 0: + return None + + buildAddress = self.buildAddress + expectedAddress = self.expectedAddress + + # If both symbols are present in the same file then we do a diff + # between their offsets into their respectives file. + # This is done as a way to avoid too much noise in case an earlier file + # did shift. + if self.buildFile is not None and self.expectedFile is not None: + if self.buildFile.filepath == self.expectedFile.filepath: + buildAddress -= self.buildFile.vram + expectedAddress -= self.expectedFile.vram + + return buildAddress - expectedAddress + class MapsComparisonInfo: def __init__(self): @@ -593,9 +614,10 @@ def findSymbolByVramOrVrom(self, address: int) -> FoundSymbolInfo|None: def findLowestDifferingSymbol(self, otherMapFile: MapFile) -> tuple[Symbol, File, Symbol|None]|None: minVram = None found = None - for builtSegement in self._segmentsList: - for builtFile in builtSegement: - for i, builtSym in enumerate(builtFile): + foundIndices = (0, 0) + for i, builtSegement in enumerate(self._segmentsList): + for j, builtFile in enumerate(builtSegement): + for k, builtSym in enumerate(builtFile): expectedSymInfo = otherMapFile.findSymbolByName(builtSym.name) if expectedSymInfo is None: continue @@ -605,9 +627,38 @@ def findLowestDifferingSymbol(self, otherMapFile: MapFile) -> tuple[Symbol, File if minVram is None or builtSym.vram < minVram: minVram = builtSym.vram prevSym = None - if i > 0: - prevSym = builtFile[i-1] + if k > 0: + prevSym = builtFile[k-1] found = (builtSym, builtFile, prevSym) + foundIndices = (i, j) + + if found is not None and found[2] is None: + # Previous symbol was not in the same section of the given + # file, so we try to backtrack until we find any symbol. + + foundBuiltSym, foundBuiltFile, _ = found + i, j = foundIndices + + # We want to check the previous file, not the current one, + # since we already know the current one doesn't have a symbol + # preceding the one we found. + j -= 1; + + while i >= 0: + builtSegment = self[i] + while j >= 0: + builtFile = builtSegment[j] + + if len(builtFile) > 0: + found = (foundBuiltSym, foundBuiltFile, builtFile[-1]) + i = -1 + j = -1 + break + j -= 1 + i -= 1 + if i >= 0: + j = len(self[i]) - 1 + return found @@ -695,13 +746,13 @@ def compareFilesAndSymbols(self, otherMapFile: MapFile, *, checkOtherOnSelf: boo for symbol in file: foundSymInfo = otherMapFile.findSymbolByName(symbol.name) if foundSymInfo is not None: - comp = SymbolComparisonInfo(symbol, symbol.vram, file, foundSymInfo.symbol.vram, foundSymInfo.file, symbol.vram - foundSymInfo.symbol.vram) + comp = SymbolComparisonInfo(symbol, symbol.vram, file, foundSymInfo.symbol.vram, foundSymInfo.file) compInfo.comparedList.append(comp) if comp.diff != 0: compInfo.badFiles.add(file) else: compInfo.missingFiles.add(file) - compInfo.comparedList.append(SymbolComparisonInfo(symbol, symbol.vram, file, -1, None, None)) + compInfo.comparedList.append(SymbolComparisonInfo(symbol, symbol.vram, file, -1, None)) if checkOtherOnSelf: for segment in otherMapFile: @@ -710,7 +761,7 @@ def compareFilesAndSymbols(self, otherMapFile: MapFile, *, checkOtherOnSelf: boo foundSymInfo = self.findSymbolByName(symbol.name) if foundSymInfo is None: compInfo.missingFiles.add(file) - compInfo.comparedList.append(SymbolComparisonInfo(symbol, -1, None, symbol.vram, file, None)) + compInfo.comparedList.append(SymbolComparisonInfo(symbol, -1, None, symbol.vram, file)) return compInfo diff --git a/src/mapfile_parser/mapfile_parser.pyi b/src/mapfile_parser/mapfile_parser.pyi index 39f0d68..f29dcb5 100644 --- a/src/mapfile_parser/mapfile_parser.pyi +++ b/src/mapfile_parser/mapfile_parser.pyi @@ -26,9 +26,10 @@ class SymbolComparisonInfo: buildFile: File|None expectedAddress: int expectedFile: File|None - diff: int|None - def __init__(self, symbol: Symbol, buildAddress: int, buildFile: File|None, expectedAddress: int, expectedFile: File|None, diff: int|None): ... + def __init__(self, symbol: Symbol, buildAddress: int, buildFile: File|None, expectedAddress: int, expectedFile: File|None): ... + + def diff(self) -> int|None: ... class MapsComparisonInfo: badFiles: set[File] = set() @@ -210,6 +211,9 @@ class Segment: class MapFile: def __init__(self) -> None: ... + @staticmethod + def newFromMapFile(mapPath: Path) -> MapFile: ... + def readMapFile(self, mapPath: Path) -> None: ... def parseMapContents(self, mapContents: str) -> None: ... def parseMapContentsGNU(self, mapContents: str) -> None: ... diff --git a/src/rs/file.rs b/src/rs/file.rs index cab89ce..2999aa1 100644 --- a/src/rs/file.rs +++ b/src/rs/file.rs @@ -12,8 +12,12 @@ use crate::{symbol, utils}; #[cfg(feature = "python_bindings")] use pyo3::prelude::*; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + #[derive(Debug, Clone)] #[cfg_attr(feature = "python_bindings", pyclass(module = "mapfile_parser"))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct File { pub filepath: PathBuf, @@ -54,27 +58,22 @@ impl File { utils::is_noload_section(&self.section_type) } - pub fn find_symbol_by_name(&self, sym_name: &str) -> Option { - for sym in &self.symbols { - if sym.name == sym_name { - return Some(sym.clone()); - } - } - None + pub fn find_symbol_by_name(&self, sym_name: &str) -> Option<&symbol::Symbol> { + self.symbols.iter().find(|&sym| sym.name == sym_name) } - pub fn find_symbol_by_vram_or_vrom(&self, address: u64) -> Option<(symbol::Symbol, i64)> { + pub fn find_symbol_by_vram_or_vrom(&self, address: u64) -> Option<(&symbol::Symbol, i64)> { let mut prev_sym: Option<&symbol::Symbol> = None; let is_vram = address >= 0x1000000; for sym in &self.symbols { if sym.vram == address { - return Some((sym.clone(), 0)); + return Some((sym, 0)); } if let Some(sym_vrom_temp) = sym.vrom { if sym_vrom_temp == address { - return Some((sym.clone(), 0)); + return Some((sym, 0)); } } @@ -86,7 +85,7 @@ impl File { if offset < 0 { return None; } - return Some((prev_sym_temp.clone(), offset)); + return Some((prev_sym_temp, offset)); } } } @@ -95,7 +94,7 @@ impl File { if offset < 0 { return None; } - return Some((prev_sym_temp.clone(), offset)); + return Some((prev_sym_temp, offset)); } } @@ -110,7 +109,7 @@ impl File { if offset < 0 { return None; } - return Some((prev_sym_temp.clone(), offset)); + return Some((prev_sym_temp, offset)); } } @@ -119,7 +118,7 @@ impl File { if offset < 0 { return None; } - return Some((prev_sym_temp.clone(), offset)); + return Some((prev_sym_temp, offset)); } } } @@ -271,7 +270,7 @@ pub(crate) mod python_bindings { #[pymethods] impl super::File { #[new] - pub fn py_new( + fn py_new( filepath: PathBuf, vram: u64, size: u64, @@ -371,7 +370,7 @@ pub(crate) mod python_bindings { */ #[getter] - pub fn isNoloadSection(&self) -> bool { + fn isNoloadSection(&self) -> bool { self.is_noload_section() } @@ -386,33 +385,37 @@ pub(crate) mod python_bindings { .collect() } - pub fn findSymbolByName(&self, sym_name: &str) -> Option { - self.find_symbol_by_name(sym_name) + fn findSymbolByName(&self, sym_name: &str) -> Option { + self.find_symbol_by_name(sym_name).cloned() } - pub fn findSymbolByVramOrVrom(&self, address: u64) -> Option<(symbol::Symbol, i64)> { - self.find_symbol_by_vram_or_vrom(address) + fn findSymbolByVramOrVrom(&self, address: u64) -> Option<(symbol::Symbol, i64)> { + if let Some((sym, offset)) = self.find_symbol_by_vram_or_vrom(address) { + Some((sym.clone(), offset)) + } else { + None + } } #[staticmethod] #[pyo3(signature=(print_vram=true))] - pub fn toCsvHeader(print_vram: bool) -> String { + fn toCsvHeader(print_vram: bool) -> String { Self::to_csv_header(print_vram) } #[pyo3(signature=(print_vram=true))] - pub fn toCsv(&self, print_vram: bool) -> String { + fn toCsv(&self, print_vram: bool) -> String { self.to_csv(print_vram) } #[staticmethod] #[pyo3(signature=(print_vram=true))] - pub fn printCsvHeader(print_vram: bool) { + fn printCsvHeader(print_vram: bool) { Self::print_csv_header(print_vram) } #[pyo3(signature=(print_vram=true))] - pub fn printAsCsv(&self, print_vram: bool) { + fn printAsCsv(&self, print_vram: bool) { self.print_as_csv(print_vram) } diff --git a/src/rs/found_symbol_info.rs b/src/rs/found_symbol_info.rs index 2a782e0..6ed876d 100644 --- a/src/rs/found_symbol_info.rs +++ b/src/rs/found_symbol_info.rs @@ -74,7 +74,7 @@ pub(crate) mod python_bindings { impl super::FoundSymbolInfo { #[new] #[pyo3(signature=(file, symbol, offset=0))] - pub fn py_new(file: file::File, symbol: symbol::Symbol, offset: i64) -> Self { + fn py_new(file: file::File, symbol: symbol::Symbol, offset: i64) -> Self { Self::new(file, symbol, offset) } @@ -116,12 +116,12 @@ pub(crate) mod python_bindings { /* Methods */ #[pyo3(name = "getAsStr")] - pub fn getAsStr(&self) -> String { + fn getAsStr(&self) -> String { self.get_as_str() } #[pyo3(name = "getAsStrPlusOffset")] - pub fn getAsStrPlusOffset(&self, sym_name: Option) -> String { + fn getAsStrPlusOffset(&self, sym_name: Option) -> String { self.get_as_str_plus_offset(sym_name) } } diff --git a/src/rs/lib.rs b/src/rs/lib.rs index c392aa3..e05e47c 100644 --- a/src/rs/lib.rs +++ b/src/rs/lib.rs @@ -42,6 +42,8 @@ fn mapfile_parser(_py: Python<'_>, m: &PyModule) -> PyResult<()> { #[cfg(test)] mod tests { + use std::path::PathBuf; + use crate::mapfile::MapFile; // TODO: tests @@ -49,6 +51,6 @@ mod tests { #[test] fn w0_000_map() { let mut map = MapFile::new(); - map.read_map_file("tests/maps/w0_000.map".into()); + map.read_map_file(&PathBuf::from("tests/maps/w0_000.map")); } } diff --git a/src/rs/mapfile.rs b/src/rs/mapfile.rs index 77d415d..fe40a82 100644 --- a/src/rs/mapfile.rs +++ b/src/rs/mapfile.rs @@ -4,6 +4,7 @@ use std::collections::HashMap; use std::collections::HashSet; use std::fmt::Write; +use std::path::Path; use std::path::PathBuf; use regex::*; @@ -11,6 +12,9 @@ use regex::*; #[cfg(feature = "python_bindings")] use pyo3::prelude::*; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + use crate::{ file, found_symbol_info, maps_comparison_info, progress_stats, segment, symbol, symbol_comparison_info, utils, @@ -27,6 +31,7 @@ lazy_static! { #[derive(Debug, Clone)] // TODO: sequence? #[cfg_attr(feature = "python_bindings", pyclass(module = "mapfile_parser"))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct MapFile { pub segments_list: Vec, @@ -37,7 +42,7 @@ pub struct MapFile { impl MapFile { pub fn new() -> Self { - MapFile { + Self { segments_list: Vec::new(), #[cfg(feature = "python_bindings")] @@ -45,8 +50,22 @@ impl MapFile { } } + /// Creates a new `MapFile` object and fills it with the contents from the + /// file pointed by the `map_path` argument. + /// + /// The format of the map will be guessed based on its contents. + /// + /// Currently supported map formats: + /// - GNU ld + /// - clang ld.lld + pub fn new_from_map_file(map_path: &Path) -> Self { + let mut m = Self::new(); + m.read_map_file(map_path); + m + } + /** - Opens the mapfile pointed by the `mapPath` argument and parses it. + Opens the mapfile pointed by the `map_path` argument and parses it. The format of the map will be guessed based on its contents. @@ -54,16 +73,16 @@ impl MapFile { - GNU ld - clang ld.lld */ - pub fn read_map_file(&mut self, map_path: PathBuf) { - let map_contents = utils::read_file_contents(&map_path); + pub fn read_map_file(&mut self, map_path: &Path) { + let map_contents = utils::read_file_contents(map_path); - self.parse_map_contents(map_contents); + self.parse_map_contents(&map_contents); } /** Parses the contents of the map. - The `mapContents` argument must contain the contents of a mapfile. + The `map_contents` argument must contain the contents of a mapfile. The format of the map will be guessed based on its contents. @@ -71,11 +90,11 @@ impl MapFile { - GNU ld - clang ld.lld */ - pub fn parse_map_contents(&mut self, map_contents: String) { + pub fn parse_map_contents(&mut self, map_contents: &str) { let regex_lld_header = Regex::new(r"\s+VMA\s+LMA\s+Size\s+Align\s+Out\s+In\s+Symbol").unwrap(); - if regex_lld_header.is_match(&map_contents) { + if regex_lld_header.is_match(map_contents) { self.parse_map_contents_lld(map_contents); } else { // GNU is the fallback @@ -86,9 +105,9 @@ impl MapFile { /** Parses the contents of a GNU ld map. - The `mapContents` argument must contain the contents of a GNU ld mapfile. + The `map_contents` argument must contain the contents of a GNU ld mapfile. */ - pub fn parse_map_contents_gnu(&mut self, map_contents: String) { + pub fn parse_map_contents_gnu(&mut self, map_contents: &str) { // TODO: maybe move somewhere else? let regex_section_alone_entry = Regex::new(r"^\s+(?P
[^*][^\s]+)\s*$").unwrap(); let regex_file_data_entry = Regex::new(r"^\s+(?P
([^*][^\s]+)?)\s+(?P0x[^\s]+)\s+(?P0x[^\s]+)\s+(?P[^\s]+)$").unwrap(); @@ -205,7 +224,7 @@ impl MapFile { name.push("__fill__"); filepath = prev_file.filepath.with_file_name(name); vram = prev_file.vram + prev_file.size; - section_type = prev_file.section_type.clone(); + section_type.clone_from(&prev_file.section_type); } current_segment.files_list.push(file::File::new_default( @@ -299,9 +318,9 @@ impl MapFile { /** Parses the contents of a clang ld.lld map. - The `mapContents` argument must contain the contents of a clang ld.lld mapfile. + The `map_contents` argument must contain the contents of a clang ld.lld mapfile. */ - pub fn parse_map_contents_lld(&mut self, map_contents: String) { + pub fn parse_map_contents_lld(&mut self, map_contents: &str) { let map_data = map_contents; // Every line starts with this information, so instead of duplicating it we put them on one single regex @@ -347,7 +366,7 @@ impl MapFile { name.push("__fill__"); filepath = prev_file.filepath.with_file_name(name); - section_type = prev_file.section_type.clone(); + section_type.clone_from(&prev_file.section_type); } let mut new_file = file::File::new_default(filepath, vram, size, §ion_type); @@ -443,7 +462,7 @@ impl MapFile { } } - pub fn filter_by_section_type(&self, section_type: &str) -> MapFile { + pub fn filter_by_section_type(&self, section_type: &str) -> Self { let mut new_map_file = MapFile::new(); for segment in &self.segments_list { @@ -457,7 +476,7 @@ impl MapFile { new_map_file } - pub fn get_every_file_except_section_type(&self, section_type: &str) -> MapFile { + pub fn get_every_file_except_section_type(&self, section_type: &str) -> Self { let mut new_map_file = MapFile::new(); for segment in &self.segments_list { @@ -499,14 +518,15 @@ impl MapFile { pub fn find_lowest_differing_symbol( &self, - other_map_file: MapFile, - ) -> Option<(symbol::Symbol, file::File, Option)> { + other_map_file: &Self, + ) -> Option<(&symbol::Symbol, &file::File, Option<&symbol::Symbol>)> { let mut min_vram = u64::MAX; let mut found = None; + let mut found_indices = (0, 0); - for built_segement in &self.segments_list { - for built_file in &built_segement.files_list { - for (i, built_sym) in built_file.symbols.iter().enumerate() { + for (i, built_segment) in self.segments_list.iter().enumerate() { + for (j, built_file) in built_segment.files_list.iter().enumerate() { + for (k, built_sym) in built_file.symbols.iter().enumerate() { if let Some(expected_sym_info) = other_map_file.find_symbol_by_name(&built_sym.name) { @@ -515,24 +535,61 @@ impl MapFile { if built_sym.vram != expected_sym.vram && built_sym.vram < min_vram { min_vram = built_sym.vram; - let mut prev_sym = None; - if i > 0 { - prev_sym = Some(built_file.symbols[i - 1].clone()); - } + let prev_sym = if k > 0 { + Some(&built_file.symbols[k - 1]) + } else { + None + }; found = Some((built_sym, built_file, prev_sym)); + found_indices = (i as isize, j as isize); } } } } } - if let Some(found_temp) = found { - return Some((found_temp.0.clone(), found_temp.1.clone(), found_temp.2)); + if let Some((found_built_sym, found_built_file, prev_sym)) = found { + if prev_sym.is_none() { + // Previous symbol was not in the same section of the given + // file, so we try to backtrack until we find any symbol. + + let (mut i, mut j) = found_indices; + + // We want to check the previous file, not the current one, + // since we already know the current one doesn't have a symbol + // preceding the one we found. + j -= 1; + + 'outer: while i >= 0 { + let built_segment = &self.segments_list[i as usize]; + + while j >= 0 { + let built_file = &built_segment.files_list[j as usize]; + + if !built_file.symbols.is_empty() { + found = Some(( + found_built_sym, + found_built_file, + built_file.symbols.last(), + )); + break 'outer; + } + + j -= 1; + } + + i -= 1; + if i >= 0 { + j = self.segments_list[i as usize].files_list.len() as isize - 1; + } + } + } } - None + + found } - pub fn mix_folders(&self) -> MapFile { + pub fn mix_folders(&self) -> Self { let mut new_map_file = MapFile::new(); for segment in &self.segments_list { @@ -544,9 +601,9 @@ impl MapFile { pub fn get_progress( &self, - asm_path: PathBuf, - nonmatchings: PathBuf, - aliases: HashMap, + asm_path: &Path, + nonmatchings: &Path, + aliases: &HashMap, path_index: usize, ) -> ( progress_stats::ProgressStats, @@ -616,7 +673,7 @@ impl MapFile { /// Useful for finding bss reorders pub fn compare_files_and_symbols( &self, - other_map_file: MapFile, + other_map_file: &Self, check_other_on_self: bool, ) -> maps_comparison_info::MapsComparisonInfo { let mut comp_info = maps_comparison_info::MapsComparisonInfo::new(); @@ -625,17 +682,15 @@ impl MapFile { for file in &segment.files_list { for symbol in &file.symbols { if let Some(found_sym_info) = other_map_file.find_symbol_by_name(&symbol.name) { - let diff = symbol.vram as i64 - found_sym_info.symbol.vram as i64; let comp = symbol_comparison_info::SymbolComparisonInfo::new( symbol.clone(), symbol.vram, Some(file.clone()), symbol.vram, Some(found_sym_info.file), - Some(diff), ); - if diff != 0 { + if comp.diff() != Some(0) { comp_info.bad_files.insert(file.clone()); } comp_info.compared_list.push(comp); @@ -648,7 +703,6 @@ impl MapFile { Some(file.clone()), u64::MAX, None, - None, ), ); } @@ -671,7 +725,6 @@ impl MapFile { None, symbol.vram, Some(file.clone()), - None, ), ); } @@ -716,18 +769,18 @@ impl MapFile { impl MapFile { // TODO: figure out if this is doing unnecessary copies or something - fn preprocess_map_data_gnu(mut map_data: String) -> String { + fn preprocess_map_data_gnu(map_data: &str) -> String { // Skip the stuff we don't care about // Looking for this string will only work on English machines (or C locales) // but it doesn't matter much, because if this string is not found then the // parsing should still work, but just a bit slower because of the extra crap if let Some(aux_var) = map_data.find("\nLinker script and memory map") { if let Some(start_index) = map_data[aux_var + 1..].find('\n') { - map_data = map_data[aux_var + 1 + start_index + 1..].to_string(); + return map_data[aux_var + 1 + start_index + 1..].to_string(); } } - map_data + map_data.to_string() } } @@ -742,27 +795,31 @@ impl Default for MapFile { pub(crate) mod python_bindings { use pyo3::prelude::*; - use std::collections::HashMap; - use std::path::PathBuf; + use std::{collections::HashMap, path::PathBuf}; use crate::{file, found_symbol_info, maps_comparison_info, progress_stats, segment, symbol}; #[pymethods] impl super::MapFile { #[new] - pub fn py_new() -> Self { + fn py_new() -> Self { Self::new() } - pub fn readMapFile(&mut self, map_path: PathBuf) { - self.read_map_file(map_path) + #[staticmethod] + fn newFromMapFile(map_path: PathBuf) -> Self { + Self::new_from_map_file(&map_path) + } + + fn readMapFile(&mut self, map_path: PathBuf) { + self.read_map_file(&map_path) } - pub fn parseMapContents(&mut self, map_contents: String) { + fn parseMapContents(&mut self, map_contents: &str) { self.parse_map_contents(map_contents) } - pub fn parseMapContentsGNU(&mut self, map_contents: String) { + fn parseMapContentsGNU(&mut self, map_contents: &str) { self.parse_map_contents_gnu(map_contents) } @@ -772,45 +829,46 @@ pub(crate) mod python_bindings { The `mapContents` argument must contain the contents of a clang ld.lld mapfile. */ #[pyo3(name = "parseMapContentsLLD")] - pub fn parseMapContentsLLD(&mut self, map_contents: String) { + fn parseMapContentsLLD(&mut self, map_contents: &str) { self.parse_map_contents_lld(map_contents) } - pub fn filterBySectionType(&self, section_type: &str) -> Self { + fn filterBySectionType(&self, section_type: &str) -> Self { self.filter_by_section_type(section_type) } - pub fn getEveryFileExceptSectionType(&self, section_type: &str) -> Self { + fn getEveryFileExceptSectionType(&self, section_type: &str) -> Self { self.get_every_file_except_section_type(section_type) } - pub fn findSymbolByName( - &self, - sym_name: &str, - ) -> Option { + fn findSymbolByName(&self, sym_name: &str) -> Option { self.find_symbol_by_name(sym_name) } - pub fn findSymbolByVramOrVrom( + fn findSymbolByVramOrVrom( &self, address: u64, ) -> Option { self.find_symbol_by_vram_or_vrom(address) } - pub fn findLowestDifferingSymbol( + fn findLowestDifferingSymbol( &self, - other_map_file: Self, + other_map_file: &Self, ) -> Option<(symbol::Symbol, file::File, Option)> { - self.find_lowest_differing_symbol(other_map_file) + if let Some((s, f, os)) = self.find_lowest_differing_symbol(other_map_file) { + Some((s.clone(), f.clone(), os.cloned())) + } else { + None + } } - pub fn mixFolders(&self) -> Self { + fn mixFolders(&self) -> Self { self.mix_folders() } #[pyo3(signature = (asm_path, nonmatchings, aliases=HashMap::new(), path_index=2))] - pub fn getProgress( + fn getProgress( &self, asm_path: PathBuf, nonmatchings: PathBuf, @@ -820,33 +878,33 @@ pub(crate) mod python_bindings { progress_stats::ProgressStats, HashMap, ) { - self.get_progress(asm_path, nonmatchings, aliases, path_index) + self.get_progress(&asm_path, &nonmatchings, &aliases, path_index) } #[pyo3(signature=(other_map_file, *, check_other_on_self=true))] - pub fn compareFilesAndSymbols( + fn compareFilesAndSymbols( &self, - other_map_file: Self, + other_map_file: &Self, check_other_on_self: bool, ) -> maps_comparison_info::MapsComparisonInfo { self.compare_files_and_symbols(other_map_file, check_other_on_self) } #[pyo3(signature=(print_vram=true, skip_without_symbols=true))] - pub fn toCsv(&self, print_vram: bool, skip_without_symbols: bool) -> String { + fn toCsv(&self, print_vram: bool, skip_without_symbols: bool) -> String { self.to_csv(print_vram, skip_without_symbols) } - pub fn toCsvSymbols(&self) -> String { + fn toCsvSymbols(&self) -> String { self.to_csv_symbols() } #[pyo3(signature=(print_vram=true, skip_without_symbols=true))] - pub fn printAsCsv(&self, print_vram: bool, skip_without_symbols: bool) { + fn printAsCsv(&self, print_vram: bool, skip_without_symbols: bool) { self.print_as_csv(print_vram, skip_without_symbols) } - pub fn printSymbolsCsv(&self) { + fn printSymbolsCsv(&self) { self.print_symbols_csv() } diff --git a/src/rs/maps_comparison_info.rs b/src/rs/maps_comparison_info.rs index 6cedd2c..6dce8fd 100644 --- a/src/rs/maps_comparison_info.rs +++ b/src/rs/maps_comparison_info.rs @@ -46,7 +46,7 @@ pub(crate) mod python_bindings { #[pymethods] impl super::MapsComparisonInfo { #[new] - pub fn py_new() -> Self { + fn py_new() -> Self { Self::new() } diff --git a/src/rs/progress_stats.rs b/src/rs/progress_stats.rs index 7e0e48d..14eb4b0 100644 --- a/src/rs/progress_stats.rs +++ b/src/rs/progress_stats.rs @@ -109,7 +109,7 @@ pub(crate) mod python_bindings { #[pymethods] impl super::ProgressStats { #[new] - pub fn py_new() -> Self { + fn py_new() -> Self { Self::new() } @@ -139,7 +139,7 @@ pub(crate) mod python_bindings { #[getter] #[pyo3(name = "total")] - pub fn py_total(&self) -> usize { + fn py_total(&self) -> usize { self.total() } @@ -161,24 +161,24 @@ pub(crate) mod python_bindings { self.decomped_percentage_total(total_stats) } - pub fn getAsFrogressEntry(&self, name: &str) -> HashMap { + fn getAsFrogressEntry(&self, name: &str) -> HashMap { self.get_as_frogress_entry(name) } #[staticmethod] #[pyo3(signature=(category_column_size=28))] - pub fn getHeaderAsStr(category_column_size: usize) -> String { + fn getHeaderAsStr(category_column_size: usize) -> String { Self::get_header_as_str(category_column_size) } #[staticmethod] #[pyo3(signature=(category_column_size=28))] - pub fn printHeader(category_column_size: usize) { + fn printHeader(category_column_size: usize) { Self::print_header(category_column_size) } #[pyo3(signature=(category, total_stats, category_column_size=28))] - pub fn getEntryAsStr( + fn getEntryAsStr( &self, category: &str, total_stats: &Self, @@ -189,7 +189,7 @@ pub(crate) mod python_bindings { #[pyo3(name = "print")] #[pyo3(signature=(category, total_stats, category_column_size=28))] - pub fn py_print(&self, category: &str, total_stats: &Self, category_column_size: usize) { + fn py_print(&self, category: &str, total_stats: &Self, category_column_size: usize) { self.print(category, total_stats, category_column_size) } } diff --git a/src/rs/segment.rs b/src/rs/segment.rs index f472219..2137c72 100644 --- a/src/rs/segment.rs +++ b/src/rs/segment.rs @@ -13,8 +13,12 @@ use std::hash::{Hash, Hasher}; #[cfg(feature = "python_bindings")] use pyo3::prelude::*; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + #[derive(Debug, Clone)] #[cfg_attr(feature = "python_bindings", pyclass(module = "mapfile_parser"))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Segment { pub name: String, @@ -73,7 +77,7 @@ impl Segment { if let Some(sym) = file.find_symbol_by_name(sym_name) { return Some(found_symbol_info::FoundSymbolInfo::new_default( file.clone(), - sym, + sym.clone(), )); } } @@ -85,13 +89,10 @@ impl Segment { address: u64, ) -> Option { for file in &self.files_list { - if let Some(pair) = file.find_symbol_by_vram_or_vrom(address) { - let sym = pair.0; - let offset = pair.1; - + if let Some((sym, offset)) = file.find_symbol_by_vram_or_vrom(address) { return Some(found_symbol_info::FoundSymbolInfo::new( file.clone(), - sym, + sym.clone(), offset, )); } @@ -102,7 +103,6 @@ impl Segment { pub fn mix_folders(&self) -> Self { let mut new_segment = self.clone_no_filelist(); - // > let mut aux_dict = HashMap::new(); // Put files in the same folder together @@ -286,7 +286,7 @@ pub(crate) mod python_bindings { #[pymethods] impl super::Segment { #[new] - pub fn py_new(name: String, vram: u64, size: u64, vrom: u64, align: Option) -> Self { + fn py_new(name: String, vram: u64, size: u64, vrom: u64, align: Option) -> Self { Self::new(name, vram, size, vrom, align) } @@ -362,47 +362,44 @@ pub(crate) mod python_bindings { /* Methods */ - pub fn filterBySectionType(&self, section_type: &str) -> Self { + fn filterBySectionType(&self, section_type: &str) -> Self { self.filter_by_section_type(section_type) } - pub fn getEveryFileExceptSectionType(&self, section_type: &str) -> Self { + fn getEveryFileExceptSectionType(&self, section_type: &str) -> Self { self.get_every_file_except_section_type(section_type) } - pub fn findSymbolByName( - &self, - sym_name: &str, - ) -> Option { + fn findSymbolByName(&self, sym_name: &str) -> Option { self.find_symbol_by_name(sym_name) } - pub fn findSymbolByVramOrVrom( + fn findSymbolByVramOrVrom( &self, address: u64, ) -> Option { self.find_symbol_by_vram_or_vrom(address) } - pub fn mixFolders(&self) -> Self { + fn mixFolders(&self) -> Self { self.mix_folders() } #[pyo3(signature=(print_vram=true, skip_without_symbols=true))] - pub fn toCsv(&self, print_vram: bool, skip_without_symbols: bool) -> String { + fn toCsv(&self, print_vram: bool, skip_without_symbols: bool) -> String { self.to_csv(print_vram, skip_without_symbols) } - pub fn toCsvSymbols(&self) -> String { + fn toCsvSymbols(&self) -> String { self.to_csv_symbols() } #[pyo3(signature=(print_vram=true, skip_without_symbols=true))] - pub fn printAsCsv(&self, print_vram: bool, skip_without_symbols: bool) { + fn printAsCsv(&self, print_vram: bool, skip_without_symbols: bool) { self.print_as_csv(print_vram, skip_without_symbols) } - pub fn printSymbolsCsv(&self) { + fn printSymbolsCsv(&self) { self.print_symbols_csv() } diff --git a/src/rs/symbol.rs b/src/rs/symbol.rs index bb2c15b..e94457b 100644 --- a/src/rs/symbol.rs +++ b/src/rs/symbol.rs @@ -7,8 +7,12 @@ use std::hash::{Hash, Hasher}; #[cfg(feature = "python_bindings")] use pyo3::prelude::*; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + #[derive(Debug, Clone)] #[cfg_attr(feature = "python_bindings", pyclass(module = "mapfile_parser"))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Symbol { pub name: String, @@ -20,7 +24,9 @@ pub struct Symbol { pub align: Option, + // idk if it is worth to continue maintaining this, given the complexity introduced by other features #[cfg(feature = "python_bindings")] + #[cfg(not(feature = "serde"))] chached_name: Option, } @@ -40,6 +46,7 @@ impl Symbol { align, #[cfg(feature = "python_bindings")] + #[cfg(not(feature = "serde"))] chached_name: None, } } @@ -53,6 +60,7 @@ impl Symbol { align: None, #[cfg(feature = "python_bindings")] + #[cfg(not(feature = "serde"))] chached_name: None, } } @@ -131,7 +139,7 @@ pub(crate) mod python_bindings { impl super::Symbol { #[new] #[pyo3(signature=(name,vram,size=None,vrom=None,align=None))] - pub fn py_new( + fn py_new( name: String, vram: u64, size: Option, @@ -144,6 +152,7 @@ pub(crate) mod python_bindings { /* Getters and setters */ #[getter] + #[cfg(not(feature = "serde"))] fn get_name(&mut self) -> PyObject { Python::with_gil(|py| { if self.chached_name.is_none() { @@ -154,9 +163,18 @@ pub(crate) mod python_bindings { }) } + #[getter] + #[cfg(feature = "serde")] + fn get_name(&self) -> PyResult { + Ok(self.name.clone()) + } + #[setter] fn set_name(&mut self, value: String) -> PyResult<()> { - self.chached_name = None; + #[cfg(not(feature = "serde"))] + { + self.chached_name = None; + } self.name = value; Ok(()) } @@ -265,37 +283,37 @@ pub(crate) mod python_bindings { /* Methods */ - pub fn getVramStr(&self) -> String { + fn getVramStr(&self) -> String { self.get_vram_str() } - pub fn getSizeStr(&self) -> String { + fn getSizeStr(&self) -> String { self.get_size_str() } - pub fn getVromStr(&self) -> String { + fn getVromStr(&self) -> String { self.get_vrom_str() } - pub fn getAlignStr(&self) -> String { + fn getAlignStr(&self) -> String { self.get_align_str() } #[staticmethod] - pub fn toCsvHeader() -> String { + fn toCsvHeader() -> String { Self::to_csv_header() } - pub fn toCsv(&self) -> String { + fn toCsv(&self) -> String { self.to_csv() } #[staticmethod] - pub fn printCsvHeader() { + fn printCsvHeader() { Self::print_csv_header() } - pub fn printAsCsv(&self) { + fn printAsCsv(&self) { self.print_as_csv() } diff --git a/src/rs/symbol_comparison_info.rs b/src/rs/symbol_comparison_info.rs index 8cef422..4e684d5 100644 --- a/src/rs/symbol_comparison_info.rs +++ b/src/rs/symbol_comparison_info.rs @@ -18,8 +18,6 @@ pub struct SymbolComparisonInfo { pub expected_address: u64, pub expected_file: Option, - - pub diff: Option, } impl SymbolComparisonInfo { @@ -29,7 +27,6 @@ impl SymbolComparisonInfo { build_file: Option, expected_address: u64, expected_file: Option, - diff: Option, ) -> Self { Self { symbol, @@ -37,9 +34,35 @@ impl SymbolComparisonInfo { build_file, expected_address, expected_file, - diff, } } + + pub fn diff(&self) -> Option { + if self.build_address == u64::MAX { + return None; + } + if self.expected_address == u64::MAX { + return None; + } + + let mut build_address = self.build_address; + let mut expected_address = self.expected_address; + + // If both symbols are present in the same file then we do a diff + // between their offsets into their respectives file. + // This is done as a way to avoid too much noise in case an earlier file + // did shift. + if let Some(build_file) = &self.build_file { + if let Some(expected_file) = &self.expected_file { + if build_file.filepath == expected_file.filepath { + build_address -= build_file.vram; + expected_address -= expected_file.vram; + } + } + } + + Some(build_address as i64 - expected_address as i64) + } } #[cfg(feature = "python_bindings")] @@ -52,14 +75,13 @@ pub(crate) mod python_bindings { #[pymethods] impl super::SymbolComparisonInfo { #[new] - #[pyo3(signature = (symbol, build_address, build_file, expected_address, expected_file, diff))] - pub fn py_new( + #[pyo3(signature = (symbol, build_address, build_file, expected_address, expected_file))] + fn py_new( symbol: symbol::Symbol, build_address: u64, build_file: Option, expected_address: u64, expected_file: Option, - diff: Option, ) -> Self { Self::new( symbol, @@ -67,7 +89,6 @@ pub(crate) mod python_bindings { build_file, expected_address, expected_file, - diff, ) } @@ -124,15 +145,9 @@ pub(crate) mod python_bindings { Ok(()) } - #[getter] - fn get_diff(&self) -> PyResult> { - Ok(self.diff) - } - - #[setter] - fn set_diff(&mut self, value: Option) -> PyResult<()> { - self.diff = value; - Ok(()) + #[pyo3(name = "diff")] + fn py_diff(&self) -> PyResult> { + Ok(self.diff()) } } } diff --git a/src/rs/utils.rs b/src/rs/utils.rs index a239a74..e10bdeb 100644 --- a/src/rs/utils.rs +++ b/src/rs/utils.rs @@ -1,13 +1,13 @@ /* SPDX-FileCopyrightText: © 2023-2024 Decompollaborate */ /* SPDX-License-Identifier: MIT */ -use std::{fs::File, io::Read, path::PathBuf}; +use std::{fs::File, io::Read, path::Path}; pub fn parse_hex(src: &str) -> u64 { u64::from_str_radix(src.trim_start_matches("0x"), 16).unwrap() } -pub fn read_file_contents(file_path: &PathBuf) -> String { +pub fn read_file_contents(file_path: &Path) -> String { let mut f = File::open(file_path).expect("Could not open input file"); let mut file_contents: String = String::new(); let _contents_length = f