Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft for IL decompilation #357

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions src/Draco.Compiler.Tests/Decompilation/CilFormatter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
using System.Reflection.Metadata;
using System.Reflection.PortableExecutable;
using System.Text;
using Draco.Compiler.Internal.Symbols;
using Draco.Compiler.Internal.Symbols.Metadata;
using Draco.Compiler.Tests.Utilities;

namespace Draco.Compiler.Tests.Decompilation;

internal static class CilFormatter
{
public static string VisualizeIl(CompiledLibrary library, MetadataMethodSymbol func, PEReader peReader, MetadataReader reader)
{
var body = peReader.GetMethodBody(func.BodyRelativeVirtualAddress);

var sb = new StringBuilder();
var isb = new IndentedStringBuilder(sb);
isb.AppendLine("{");
isb.PushIndent();

// TODO: branching & exception handling
WriteBodyProlog(library, func, reader, body, isb);
WriteInstructions(library, func, reader, body, isb);

isb.PopIndent();
isb.AppendLine("}");

return sb.ToString();
}

private static unsafe void WriteInstructions(CompiledLibrary library, MetadataMethodSymbol func, MetadataReader reader, MethodBodyBlock body, IndentedStringBuilder sb)
{
var blobReader = body.GetILReader();

var span = new ReadOnlySpan<byte>(blobReader.StartPointer, blobReader.Length);

var instructions = new List<CilInstruction>();
instructions.EnsureCapacity(10);

HashSet<int>? jumpTargets = null;

while (!span.IsEmpty)
{
var instruction = InstructionDecoder.Read(span, blobReader.Length - span.Length, library.Codegen.GetSymbol, reader.GetUserString, out var advance);
span = span[advance..];

if (InstructionDecoder.IsBranch(instruction.OpCode))
{
jumpTargets ??= new();
jumpTargets.Add(((IConvertible)instruction.Operand!).ToInt32(null));
}

instructions.Add(instruction);
}

foreach (var (opCode, offset, operand) in instructions)
{
foreach (var region in body.ExceptionRegions)
if (region.TryOffset == offset)
{
sb.AppendLine(".try {");
sb.PushIndent();
}
else if (region.HandlerOffset == offset)
switch (region.Kind)
{
case ExceptionRegionKind.Catch:
break;
case ExceptionRegionKind.Filter:
break;
case ExceptionRegionKind.Finally:
sb.AppendLine("finally {");
sb.PushIndent();
break;
case ExceptionRegionKind.Fault:
break;
}

if (jumpTargets is { } && jumpTargets.Contains(offset))
using (sb.WithDedent())
{
sb.Append("IL_");
sb.Append(offset.ToString("X4"));
sb.AppendLine(":");
}

sb.Append(InstructionDecoder.GetText(opCode));

switch (operand)
{
case Symbol symbol:
sb.Append(' ');
MethodBodyTokenFormatter.FormatTo(symbol, library.Compilation, sb);
break;
case string strOp:
sb.Append(' ');
sb.Append('"');
sb.Append(strOp);
sb.Append('"');
break;
case { } when InstructionDecoder.IsBranch(opCode):
sb.Append(" IL_");
sb.AppendLine(((IFormattable)operand).ToString("X4", null));
break;
case { }:
sb.Append(' ');
sb.Append(operand);
break;
case null:
break;
}

sb.AppendLine();

var opCodeEndOffset = offset + InstructionDecoder.GetTotalOpCodeSize(opCode);

foreach (var region in body.ExceptionRegions)
{
if (region.TryOffset + region.TryLength == opCodeEndOffset
|| region.HandlerOffset + region.HandlerLength == opCodeEndOffset)
{
sb.PopIndent();
sb.AppendLine("}");
}
}
}
}

private static void WriteBodyProlog(CompiledLibrary library, MetadataMethodSymbol func, MetadataReader reader, MethodBodyBlock body, IndentedStringBuilder sb)
{
if (!body.LocalSignature.IsNil)
{
sb.Append(".maxstack ");
sb.Append(body.MaxStack);
sb.AppendLine();

sb.Append(".locals ");
if (body.LocalVariablesInitialized)
sb.Append("init ");

sb.Append('(');

var locals = reader.GetStandaloneSignature(body.LocalSignature).DecodeLocalSignature(library.Compilation.TypeProvider, func);
for (var i = 0; i < locals.Length; i++)
{
if (i > 0)
sb.Append(", ");

MethodBodyTokenFormatter.FormatTo(locals[i], library.Compilation, sb);
}

sb.Append(')');
sb.AppendLine();
sb.AppendLine();
}
}
}
5 changes: 5 additions & 0 deletions src/Draco.Compiler.Tests/Decompilation/CilInstruction.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
using System.Reflection.Metadata;

namespace Draco.Compiler.Tests.Decompilation;

internal readonly record struct CilInstruction(ILOpCode OpCode, int Offset, object? Operand);
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
using System.Diagnostics.CodeAnalysis;
using System.Runtime.InteropServices;

namespace Draco.Compiler.Tests.Decompilation;

internal sealed class CilSpaceAgnosticStringComparer : IEqualityComparer<string>
{
public static CilSpaceAgnosticStringComparer Ordinal { get; } = new(StringComparison.Ordinal);

private readonly StringComparison _comparison;

public CilSpaceAgnosticStringComparer(StringComparison comparison)
{
_comparison = comparison;
}

public bool Equals(string? x, string? y)
{
if (x is null)
return y is null;

if (y is null)
return false;

var xIt = new Enumerator(x);
var yIt = new Enumerator(y);

while (true)
if (xIt.MoveNext())
{
if (!yIt.MoveNext())
return false;

if (!xIt.CurrentSpan.Equals(yIt.CurrentSpan, _comparison))
return false;
}
else
// one of them ended earlier
return !yIt.MoveNext();
}

public int GetHashCode([DisallowNull] string obj)
{
var hash = new HashCode();

var span = obj.AsSpan();

foreach (var range in new Enumerable(span))
hash.AddBytes(MemoryMarshal.AsBytes(span[range]));

return hash.ToHashCode();
}

private readonly ref struct Enumerable
{
public ReadOnlySpan<char> String { get; }

public Enumerable(ReadOnlySpan<char> @string) => String = @string;

public Enumerator GetEnumerator() => new(String);
}

private ref struct Enumerator
{
public ReadOnlySpan<char> String { get; }

private int _start;
private int _end;

public Enumerator(ReadOnlySpan<char> @string)
{
String = @string;
}

public bool MoveNext()
{
var s = String;

_start = _end;

while (_start < s.Length && IsWhiteSpace(s[_start]))
_start++;

_end = _start;

if (_start == s.Length)
return false;

if (s[_end] is '\'' or '\"')
{
var quote = s[_end];
_end++;

while (s[_end] != quote && _end < s.Length)
_end++;

if (_end == s.Length)
throw new InvalidOperationException("Unclosed quoted string");

_end++;
}
else
while (_end < s.Length && !IsWhiteSpace(s[_end]))
_end++;
return true;
}

private static bool IsWhiteSpace(char ch)
{
// don't use char.IsWhiteSpace as it checks additional chars, which won't appear in code
return ch is ' ' or '\n' or '\r';
}

public readonly ReadOnlySpan<char> CurrentSpan => String[Current];

public readonly Range Current => new(_start, _end);
}
}
Loading