Skip to content

Commit

Permalink
Merge pull request #936 from google/py-tuning
Browse files Browse the repository at this point in the history
Model tuning and new test case
  • Loading branch information
reyammer authored Jan 30, 2025
2 parents b9896f4 + 3af5c52 commit feb1848
Show file tree
Hide file tree
Showing 10 changed files with 26 additions and 9 deletions.
2 changes: 1 addition & 1 deletion assets/models/standard_v3_0/config.min.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"latex":0.95,"markdown":0.9,"pascal":0.95},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3}
{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"pascal":0.95},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3}
2 changes: 1 addition & 1 deletion python/src/magika/models/standard_v3_0/config.min.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"latex":0.95,"markdown":0.9,"pascal":0.95},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3}
{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"pascal":0.95},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3}
6 changes: 6 additions & 0 deletions rust/cli/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## 0.1.0-dev

### Minor

- Update the model thresholds

## 0.1.0-rc.3

### Minor
Expand Down
4 changes: 2 additions & 2 deletions rust/cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions rust/cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "magika-cli"
version = "0.1.0-rc.3"
version = "0.1.0-dev"
authors = ["Magika Developers <[email protected]>"]
license = "Apache-2.0"
edition = "2021"
Expand All @@ -20,7 +20,7 @@ anyhow = "1.0.86"
async-channel = "2.3.1"
clap = { version = "4.5.9", features = ["cargo", "derive", "string"] }
colored = "2.1.0"
magika = { version = "=0.1.0-rc.3", path = "../lib", features = ["serde"] }
magika = { version = "0.1.0-dev", path = "../lib", features = ["serde"] }
num_cpus = "1.16.0"
ort = "=2.0.0-rc.9"
serde = { version = "1.0.204", features = ["derive"] }
Expand Down
6 changes: 6 additions & 0 deletions rust/lib/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## 0.1.0-dev

### Minor

- Update the model thresholds

## 0.1.0-rc.3

### Minor
Expand Down
2 changes: 1 addition & 1 deletion rust/lib/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion rust/lib/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "magika"
version = "0.1.0-rc.3"
version = "0.1.0-dev"
authors = ["Magika Developers <[email protected]>"]
license = "Apache-2.0"
edition = "2021"
Expand Down
2 changes: 1 addition & 1 deletion rust/lib/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub(crate) const CONFIG: ModelConfig = ModelConfig {
};

#[rustfmt::skip]
const THRESHOLDS: [f32; ContentType::SIZE] = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5];
const THRESHOLDS: [f32; ContentType::SIZE] = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5];
const OVERWRITE_MAP: [ContentType; ContentType::SIZE] = [
ContentType::_3gp,
ContentType::Ace,
Expand Down
5 changes: 5 additions & 0 deletions tests_data/basic/ignorefile/example.ignorefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.DS_Store
.classpath
.project
.target/
.settings/

0 comments on commit feb1848

Please sign in to comment.