Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Format improvements (WIP) #63

Draft
wants to merge 18 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,13 +476,13 @@ pub enum Expr {
},
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct IntFormat {
pub unsigned: bool,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might prefer signed rather than unsigned to avoid double negatives

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went with unsigned since it's more of an exception to the norm than anything. I tend to structure my bools with true as the "do something different" value so that they don't need to be negated whenever being read, but I guess that doesn't matter as much with match since you have to be explicit in both cases.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

true as "do something different" makes sense in C-likes where it's easy to zero-initialize things. It doesn't make so much sense in rust.

pub radix: IntRadix,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum IntRadix {
/// Display as decimal.
Dec,
Expand Down
40 changes: 20 additions & 20 deletions src/core_mapfiles/std.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ static STD_06: &CoreSignatures = &CoreSignatures {
(Th06, 0, Some(("fff", None))),
(Th06, 1, Some(("Cff", None))),
(Th06, 2, Some(("fff", None))),
(Th06, 3, Some(("S________", None))),
(Th06, 4, Some(("S________", None))),
(Th06, 5, Some(("____________", None))),
(Th06, 3, Some(("S__", None))),
(Th06, 4, Some(("S__", None))),
(Th06, 5, Some(("___", None))),
],
var: &[],
};
Expand All @@ -34,40 +34,40 @@ static STD_07_09: &CoreSignatures = &CoreSignatures {
ins: &[
(Th07, 0, Some(("fff", None))),
(Th07, 1, Some(("Cff", None))),
(Th07, 2, Some(("S________", None))),
(Th07, 3, Some(("____________", None))),
(Th07, 4, Some(("ot____", Some(IKind::Jmp)))),
(Th07, 2, Some(("S__", None))),
(Th07, 3, Some(("___", None))),
(Th07, 4, Some(("ot_", Some(IKind::Jmp)))),
(Th07, 5, Some(("fff", None))),
(Th07, 6, Some(("SS____", None))),
(Th07, 6, Some(("SS_", None))),
(Th07, 7, Some(("fff", None))),
(Th07, 8, Some(("SS____", None))),
(Th07, 8, Some(("SS_", None))),
(Th07, 9, Some(("fff", None))),
(Th07, 10, Some(("SS____", None))),
(Th07, 11, Some(("f________", None))),
(Th07, 12, Some(("SS____", None))),
(Th07, 13, Some(("C________", None))),
(Th07, 10, Some(("SS_", None))),
(Th07, 11, Some(("f__", None))),
(Th07, 12, Some(("SS_", None))),
(Th07, 13, Some(("C__", None))),
(Th07, 14, Some(("fff", None))),
(Th07, 15, Some(("fff", None))),
(Th07, 16, Some(("fff", None))),
(Th07, 17, Some(("fff", None))),
(Th07, 18, Some(("S________", None))),
(Th07, 18, Some(("S__", None))),
(Th07, 19, Some(("fff", None))),
(Th07, 20, Some(("fff", None))),
(Th07, 21, Some(("fff", None))),
(Th07, 22, Some(("fff", None))),
(Th07, 23, Some(("S________", None))),
(Th07, 23, Some(("S__", None))),
(Th07, 24, Some(("fff", None))),
(Th07, 25, Some(("fff", None))),
(Th07, 26, Some(("fff", None))),
(Th07, 27, Some(("fff", None))),
(Th07, 28, Some(("S________", None))),
(Th07, 29, Some(("S________", None))), // anm script
(Th07, 30, Some(("S________", None))), // anm script
(Th07, 31, Some(("S________", Some(IKind::InterruptLabel)))),
(Th07, 28, Some(("S__", None))),
(Th07, 29, Some(("S__", None))), // anm script
(Th07, 30, Some(("S__", None))), // anm script
(Th07, 31, Some(("S__", Some(IKind::InterruptLabel)))),

(Th08, 32, Some(("fff", None))),
(Th08, 33, Some(("S________", None))),
(Th08, 34, Some(("S________", None))), // anm script
(Th08, 33, Some(("S__", None))),
(Th08, 34, Some(("S__", None))), // anm script
],
var: &[],
};
Expand Down
47 changes: 29 additions & 18 deletions src/llir/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ pub struct InstrAbi {
///
/// By this notion, [`ArgEncoding`] tends to be more relevant for immediate/literal arguments, while
/// [`ScalarType`] tends to be more relevant for variables.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ArgEncoding {
/// `S`, `s`, or `c` in mapfile. Integer immediate or register. Displayed as signed.
/// `U`, `u`, or `b` in mapfile. Integer immediate or register. Displayed as unsigned.
Expand All @@ -47,8 +47,9 @@ pub enum ArgEncoding {
JumpOffset,
/// `t` in mapfile. Max of one per instruction, and requires an accompanying `o` arg.
JumpTime,
/// `_` in mapfile. Unused 1-byte space.
Padding,
/// `_` in mapfile. Unused 4-byte space.
/// `-` in mapfile. Unused 1-byte space.
Padding { size: u8 },
/// `f` in mapfile. Single-precision float.
Float { immediate: bool },
/// `z(bs=<int>)`, `m(bs=<int>;mask=<int>,<int>,<int>)`, or `m(len=<int>;mask=<int>,<int>,<int>)` in mapfile.
Expand All @@ -66,7 +67,7 @@ pub enum ArgEncoding {
},
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum StringArgSize {
/// A string arg that uses `len=`.
///
Expand All @@ -93,7 +94,9 @@ impl ArgEncoding {
Self::Integer { size: _, .. } => "integer",
Self::JumpOffset => "jump offset",
Self::JumpTime => "jump time",
Self::Padding => "padding",
Self::Padding { size: 4 } => "dword padding",
Self::Padding { size: 1 } => "byte padding",
Self::Padding { size: _ } => "padding",
Self::Float { .. } => "float",
Self::String { .. } => "string",
}
Expand Down Expand Up @@ -125,18 +128,23 @@ impl ArgEncoding {
}

pub fn contributes_to_param_mask(&self) -> bool {
!matches!(self, Self::Padding)
!matches!(self, Self::Padding { .. })
}

pub fn is_immediate(&self) -> bool {
matches!(self,
pub fn is_always_immediate(&self) -> bool {
match self {
| Self::String { .. }
| Self::JumpOffset
| Self::JumpTime
| Self::Padding
| Self::Padding { .. }
| Self::Integer { immediate: true, .. }
| Self::Float { immediate: true, .. }
)
=> true,

| Self::Integer { immediate: false, .. }
| Self::Float { immediate: false, .. }
=> false,
}
}
}

Expand Down Expand Up @@ -207,7 +215,7 @@ impl ArgEncoding {
match self {
| ArgEncoding::JumpOffset
| ArgEncoding::JumpTime
| ArgEncoding::Padding
| ArgEncoding::Padding { .. }
| ArgEncoding::Integer { .. }
=> ScalarType::Int,

Expand Down Expand Up @@ -274,15 +282,17 @@ fn int_from_attrs(param: &abi_ast::Param, emitter: &dyn Emitter) -> Result<Optio
hex_radix = true;
}

let radix = match hex_radix {
false => ast::IntRadix::Dec,
true => ast::IntRadix::Hex,
};

Ok(Some(ArgEncoding::Integer {
size,
ty_color: user_ty_color.or(default_ty_color),
arg0: arg0.is_some(),
immediate: imm.is_some(),
format: match hex_radix {
false => ast::IntFormat { unsigned: unsigned, radix: ast::IntRadix::Dec },
true => ast::IntFormat { unsigned: unsigned, radix: ast::IntRadix::Hex },
},
format: ast::IntFormat { unsigned, radix },
}))
})
}
Expand Down Expand Up @@ -351,7 +361,8 @@ fn other_from_attrs(param: &abi_ast::Param, _emitter: &dyn Emitter) -> Result<Op
match param.format_char.value {
'o' => Ok(Some(ArgEncoding::JumpOffset)),
't' => Ok(Some(ArgEncoding::JumpTime)),
'_' => Ok(Some(ArgEncoding::Padding)),
'_' => Ok(Some(ArgEncoding::Padding { size: 4 })),
'-' => Ok(Some(ArgEncoding::Padding { size: 1 })),
_ => Ok(None),
}
}
Expand Down Expand Up @@ -407,8 +418,8 @@ fn abi_to_signature(abi: &InstrAbi, abi_span: Span, ctx: &mut CompilerContext<'_
| ArgEncoding::JumpTime
=> Info { ty: ScalarType::Int, default: None, reg_ok: false, ty_color: None },

| ArgEncoding::Padding
=> Info { ty: ScalarType::Int, default: Some(sp!(0.into())), reg_ok: true, ty_color: None },
| ArgEncoding::Padding { .. }
=> Info { ty: ScalarType::Int, default: Some(sp!(0.into())), reg_ok: false, ty_color: None },
ExpHP marked this conversation as resolved.
Show resolved Hide resolved

| ArgEncoding::Float { .. }
=> Info { ty: ScalarType::Float, default: None, reg_ok: true, ty_color: None },
Expand Down
2 changes: 1 addition & 1 deletion src/llir/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ impl IntrinsicAbiHelper<'_> {
}

fn find_and_remove_padding(&self, arg_encodings: &mut Vec<(usize, &ArgEncoding)>) {
arg_encodings.retain(|(_, enc)| !matches!(*enc, ArgEncoding::Padding));
arg_encodings.retain(|(_, enc)| !matches!(*enc, ArgEncoding::Padding { .. }));
}

fn find_and_remove_jump(&self, arg_encodings: &mut Vec<(usize, &ArgEncoding)>) -> Result<(usize, abi_parts::JumpArgOrder), Diagnostic> {
Expand Down
12 changes: 8 additions & 4 deletions src/llir/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,12 @@ fn encode_args(
// to ensure that this loop reads an equal number of items from all iters.
assert!(args_iter.len() <= arg_encodings_iter.len());
for enc in arg_encodings_iter.by_ref() {
if matches!(enc, ArgEncoding::Padding) {
args_blob.write_u8(0).expect("Cursor<Vec> failed?!");
if let ArgEncoding::Padding { size } = enc {
match size {
1 => args_blob.write_u8(0).expect("Cursor<Vec> failed?!"),
4 => args_blob.write_u32(0).expect("Cursor<Vec> failed?!"),
_ => unreachable!(),
}
continue;
}
let arg = args_iter.next().expect("function arity already checked");
Expand All @@ -567,7 +571,7 @@ fn encode_args(
};
// Verify this arg even applies to the param mask...
if enc.contributes_to_param_mask() {
if enc.is_immediate() && arg_bit != 0 {
if enc.is_always_immediate() && arg_bit != 0 {
// Warn if a register is used for an immediate arg
emitter.emit(warning!(
message("non-constant expression in immediate argument"),
Expand All @@ -593,7 +597,7 @@ fn encode_args(

match *enc {
| ArgEncoding::Integer { arg0: true, .. }
| ArgEncoding::Padding
| ArgEncoding::Padding { .. }
=> unreachable!(),

| ArgEncoding::JumpOffset
Expand Down
30 changes: 22 additions & 8 deletions src/llir/raise/early.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,19 +224,34 @@ fn decode_args_with_abi(

let reg_style = hooks.register_style();
for (arg_index, enc) in siggy.arg_encodings().enumerate() {
// Padding produces values for the sake of verifying the bytes are 0. TODO: Implement!
// They're filtered out later on after dealing with @arg0 in the argument-raising pass.
if let ArgEncoding::Padding { size } = enc {
decrease_len(emitter, &mut remaining_len, *size as usize)?;
let raw_value = match size {
1 => args_blob.read_u8().expect("already checked len") as i32,
4 => args_blob.read_u32().expect("already checked len") as i32,
_ => unreachable!(),
};
args.push(SimpleArg { value: ScalarValue::Int(raw_value), is_reg: false } );
continue;
}
let ref emitter = add_argument_context(emitter, arg_index);

// TODO: Add a way to fallback to @mask for
// "bad" mask bits to allow roundtripping
let can_be_param = if enc.contributes_to_param_mask() {
let value = !enc.is_immediate() && param_mask & 1 == 1;
let value = !enc.is_always_immediate() && param_mask & 1 == 1;
param_mask >>= 1;
value
} else {
false
};

let value = match *enc {
| ArgEncoding::Padding { .. }
=> unreachable!(),

| ArgEncoding::Integer { arg0: true, .. }
=> {
// a check that non-timeline languages don't have timeline args in their signature
Expand All @@ -250,7 +265,7 @@ fn decode_args_with_abi(
| ArgEncoding::Integer { arg0: false, size: 4, format: ast::IntFormat { unsigned: false, radix: _ }, .. }
=> {
decrease_len(emitter, &mut remaining_len, 4)?;
ScalarValue::Int(args_blob.read_i32().expect("already checked len") as i32)
ScalarValue::Int(args_blob.read_i32().expect("already checked len"))
},

| ArgEncoding::Integer { arg0: false, size: 2, format: ast::IntFormat { unsigned: false, radix: _ }, .. }
Expand All @@ -277,7 +292,6 @@ fn decode_args_with_abi(
ScalarValue::Int(args_blob.read_u16().expect("already checked len") as i32)
},

| ArgEncoding::Padding
| ArgEncoding::Integer { arg0: false, size: 1, format: ast::IntFormat { unsigned: true, radix: _ }, .. }
=> {
decrease_len(emitter, &mut remaining_len, 1)?;
Expand Down Expand Up @@ -556,7 +570,7 @@ impl AtomRaiser<'_, '_> {
//
// IMPORTANT: this is looking at the original arg list because the new lengths may differ due to arg0.
let mut arg_iter = abi.arg_encodings();
raised_args.retain(|_| !matches!(arg_iter.next().unwrap(), ArgEncoding::Padding));
raised_args.retain(|_| !matches!(arg_iter.next().unwrap(), ArgEncoding::Padding { .. }));

Ok(RaisedIntrinsicParts {
opcode: Some(instr.opcode),
Expand Down Expand Up @@ -665,7 +679,7 @@ impl AtomRaiser<'_, '_> {
| ArgEncoding::Integer { ty_color: None, format, .. }
=> Ok(ast::Expr::LitInt { value: raw.expect_int(), format: *format }),

| ArgEncoding::Padding
| ArgEncoding::Padding { .. }
=> Ok(ast::Expr::from(raw.expect_int())),
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this is unreachable now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's definitely not unreachable since the padding args are only dropped after being raised thanks to some sort of logic with @arg0. I don't know how important the positioning of that is since you'd left a comment about it being important to reference the original args list, so I chose to leave it there for now. If it's possible to implement the "verify the bytes are 0 or fallback to blob" check at at line 541 before raising all the arguments, then it would be unreachable for sure.


| ArgEncoding::Integer { ty_color: Some(ty_color), format, .. }
Expand All @@ -677,7 +691,7 @@ impl AtomRaiser<'_, '_> {
&lookup_table,
raw.expect_int(),
ty_color,
format,
*format,
))
}

Expand Down Expand Up @@ -750,7 +764,7 @@ impl AtomRaiser<'_, '_> {
}
}

fn raise_to_possibly_named_constant(names: &IdMap<i32, Sp<Ident>>, id: i32, ty_color: &TypeColor, format: &ast::IntFormat) -> ast::Expr {
fn raise_to_possibly_named_constant(names: &IdMap<i32, Sp<Ident>>, id: i32, ty_color: &TypeColor, format: ast::IntFormat) -> ast::Expr {
match names.get(&id) {
Some(ident) => {
match ty_color {
Expand All @@ -762,7 +776,7 @@ fn raise_to_possibly_named_constant(names: &IdMap<i32, Sp<Ident>>, id: i32, ty_c
},
}
},
None => ast::Expr::LitInt { value: id, format: *format },
None => ast::Expr::LitInt { value: id, format: format },
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/parse/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ pub fn parse_abi(
}

match next_char {
format_char @ sp_pat!('a'..='z' | 'A'..='Z' | '_' | '0'..='9') => {
format_char @ sp_pat!('a'..='z' | 'A'..='Z' | '_' | '-' | '0'..='9') => {
let next_non_ws = text.chars().filter(|&c| !is_ws(c)).next();
let attributes = match next_non_ws {
// Type with attributes.
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/timeline_arg0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ source_test!(
ECL_TIMELINE_06, unused_arg0_padding_edge_case,
mapfile: r#"!eclmap
!timeline_ins_signatures
200 s(arg0;enum="MsgScript")S________
200 s(arg0;enum="MsgScript")S__
"#,
// this is an edge case that arose in decompilation where the presence of a timeline
// arg could make the code that trims padding look at the wrong values
Expand Down