forked from Dees-Troy/android_external_libenc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
enc_base.h
740 lines (703 loc) · 23.2 KB
/
enc_base.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Alexander V. Astapchuk
*/
/**
* @file
* @brief Main encoding routines and structures.
*/
#ifndef __ENC_BASE_H_INCLUDED__
#define __ENC_BASE_H_INCLUDED__
#include "enc_defs.h"
#include <stdlib.h>
#include <assert.h>
#include <memory.h>
ENCODER_NAMESPACE_START
struct MnemonicInfo;
struct OpcodeInfo;
struct Rex;
/**
* @brief Basic facilities for generation of processor's instructions.
*
* The class EncoderBase represents the basic facilities for the encoding of
* processor's instructions on IA32 and EM64T platforms.
*
* The class provides general interface to generate the instructions as well
* as to retrieve some static data about instructions (number of arguments,
* their roles, etc).
*
* Currently, the EncoderBase class is used for both LIL and Jitrino code
* generators. Each of these code generators has its own wrapper to adapt
* this general interface for specific needs - see encoder.h for LIL wrappers
* and Ia32Encoder.h for Jitrino's adapter.
*
* Interface is provided through static methods, no instances of EncoderBase
* to be created.
*
* @todo RIP-based addressing on EM64T - it's not yet supported currently.
*/
class EncoderBase {
public:
class Operands;
struct MnemonicDesc;
/**
* @brief Generates processor's instruction.
*
* @param stream - a buffer to generate into
* @param mn - \link Mnemonic mnemonic \endlink of the instruction
* @param opnds - operands for the instruction
* @returns (stream + length of the just generated instruction)
*/
static char * encode(char * stream, Mnemonic mn, const Operands& opnds);
static char * getOpndLocation(int index);
/**
* @brief Generates the smallest possible number of NOP-s.
*
* Effectively generates the smallest possible number of instructions,
* which are NOP-s for CPU. Normally used to make a code alignment.
*
* The method inserts exactly number of bytes specified. It's a caller's
* responsibility to make sure the buffer is big enough.
*
* @param stream - buffer where to generate code into, can not be NULL
* @param howMany - how many bytes to fill with NOP-s
* @return \c (stream+howMany)
*/
static char * nops(char * stream, unsigned howMany);
/**
* @brief Inserts a prefix into the code buffer.
*
* The method writes no more than one byte into the buffer. This is a
* caller's responsibility to make sure the buffer is big enough.
*
* @param stream - buffer where to insert the prefix
* @param pref - prefix to be inserted. If it's InstPrefix_Null, then
* no action performed and return value is \c stream.
* @return \c (stream+1) if pref is not InstPrefix_Null, or \c stream
* otherwise
*/
static char * prefix(char* stream, InstPrefix pref);
/**
* @brief Determines if operand with opndExt suites the position with instExt.
*/
static bool extAllowed(OpndExt opndExt, OpndExt instExt);
/**
* @brief Returns #MnemonicDesc by the given Mnemonic.
*/
static const MnemonicDesc * getMnemonicDesc(Mnemonic mn)
{
assert(mn < Mnemonic_Count);
return mnemonics + mn;
}
/**
* @brief Returns a Mnemonic for the given name.
*
* The lookup is case insensitive, if no mnemonic found for the given
* string, then Mnemonic_Null returned.
*/
static Mnemonic str2mnemonic(const char * mn_name);
/**
* @brief Returns a string representation of the given Mnemonic.
*
* If invalid mnemonic passed, then the behavior is unpredictable.
*/
static const char * getMnemonicString(Mnemonic mn)
{
return getMnemonicDesc(mn)->name;
}
static const char * toStr(Mnemonic mn)
{
return getMnemonicDesc(mn)->name;
}
/**
* @brief Description of operand.
*
* Description of an operand in opcode - its kind, size or RegName if
* operand must be a particular register.
*/
struct OpndDesc {
/**
* @brief Location of the operand.
*
* May be a mask, i.e. OpndKind_Imm|OpndKind_Mem.
*/
OpndKind kind;
/**
* @brief Size of the operand.
*/
OpndSize size;
/**
* @brief Extention of the operand.
*/
OpndExt ext;
/**
* @brief Appropriate RegName if operand must reside on a particular
* register (i.e. CWD/CDQ instructions), RegName_Null
* otherwise.
*/
RegName reg;
};
/**
* @brief Description of operands' roles in instruction.
*/
struct OpndRolesDesc {
/**
* @brief Total number of operands in the operation.
*/
unsigned count;
/**
* @brief Number of defs in the operation.
*/
unsigned defCount;
/**
* @brief Number of uses in the operation.
*/
unsigned useCount;
/**
* @brief Operand roles, bit-packed.
*
* A bit-packed info about operands' roles. Each operand's role is
* described by two bits, counted from right-to-left - the less
* significant bits (0,1) represent operand#0.
*
* The mask is build by ORing #OpndRole_Def and #OpndRole_Use
* appropriately and shifting left, i.e. operand#0's role would be
* - '(OpndRole_Def|OpndRole_Use)'
* - opnd#1's role would be 'OpndRole_Use<<2'
* - and operand#2's role would be, say, 'OpndRole_Def<<4'.
*/
unsigned roles;
};
/**
* @brief Extracts appropriate OpndRole for a given operand.
*
* The order of operands is left-to-right, i.e. for MOV, it
* would be 'MOV op0, op1'
*/
static OpndRole getOpndRoles(OpndRolesDesc ord, unsigned idx)
{
assert(idx < ord.count);
return (OpndRole)(ord.roles>>((ord.count-1-idx)*2) & 0x3);
}
/**
* @brief Info about single opcode - its opcode bytes, operands,
* operands' roles.
*/
union OpcodeDesc {
char dummy[128]; // To make total size a power of 2
struct {
/**
* @brief Raw opcode bytes.
*
* 'Raw' opcode bytes which do not require any analysis and are
* independent from arguments/sizes/etc (may include opcode size
* prefix).
*/
char opcode[5];
unsigned opcode_len;
unsigned aux0;
unsigned aux1;
/**
* @brief Info about opcode's operands.
*
* The [3] mostly comes from IDIV/IMUL which both may have up to 3
* operands.
*/
OpndDesc opnds[3];
unsigned first_opnd;
/**
* @brief Info about operands - total number, number of uses/defs,
* operands' roles.
*/
OpndRolesDesc roles;
/**
* @brief If not zero, then this is final OpcodeDesc structure in
* the list of opcodes for a given mnemonic.
*/
char last;
char platf;
};
};
public:
/**
* @brief General info about mnemonic.
*/
struct MnemonicDesc {
/**
* @brief The mnemonic itself.
*/
Mnemonic mn;
/**
* Various characteristics of mnemonic.
* @see MF_
*/
unsigned flags;
/**
* @brief Operation's operand's count and roles.
*
* For the operations whose opcodes may use different number of
* operands (i.e. IMUL/SHL) either most common value used, or empty
* value left.
*/
OpndRolesDesc roles;
/**
* @brief Print name of the mnemonic.
*/
const char * name;
};
/**
* @brief Magic number, shows a maximum value a hash code can take.
*
* For meaning and arithmetics see enc_tabl.cpp.
*
* The value was increased from '5155' to '8192' to make it aligned
* for faster access in EncoderBase::lookup().
*/
static const unsigned int HASH_MAX = 8192; //5155;
/**
* @brief Empty value, used in hash-to-opcode map to show an empty slot.
*/
static const unsigned char NOHASH = 0xFF;
/**
* @brief The name says it all.
*/
static const unsigned char HASH_BITS_PER_OPERAND = 5;
/**
* @brief Contains info about a single instructions's operand - its
* location, size and a value for immediate or RegName for
* register operands.
*/
class Operand {
public:
/**
* @brief Initializes the instance with empty size and kind.
*/
Operand() : m_kind(OpndKind_Null), m_size(OpndSize_Null), m_ext(OpndExt_None), m_need_rex(false) {}
/**
* @brief Creates register operand from given RegName.
*/
Operand(RegName reg, OpndExt ext = OpndExt_None) : m_kind(getRegKind(reg)),
m_size(getRegSize(reg)),
m_ext(ext), m_reg(reg)
{
hash_it();
}
/**
* @brief Creates register operand from given RegName and with the
* specified size and kind.
*
* Used to speedup Operand creation as there is no need to extract
* size and kind from the RegName.
* The provided size and kind must match the RegName's ones though.
*/
Operand(OpndSize sz, OpndKind kind, RegName reg, OpndExt ext = OpndExt_None) :
m_kind(kind), m_size(sz), m_ext(ext), m_reg(reg)
{
assert(m_size == getRegSize(reg));
assert(m_kind == getRegKind(reg));
hash_it();
}
/**
* @brief Creates immediate operand with the given size and value.
*/
Operand(OpndSize size, long long ival, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Imm), m_size(size), m_ext(ext), m_imm64(ival)
{
hash_it();
}
/**
* @brief Creates immediate operand of OpndSize_32.
*/
Operand(int ival, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Imm), m_size(OpndSize_32), m_ext(ext), m_imm64(ival)
{
hash_it();
}
/**
* @brief Creates immediate operand of OpndSize_16.
*/
Operand(short ival, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Imm), m_size(OpndSize_16), m_ext(ext), m_imm64(ival)
{
hash_it();
}
/**
* @brief Creates immediate operand of OpndSize_8.
*/
Operand(char ival, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Imm), m_size(OpndSize_8), m_ext(ext), m_imm64(ival)
{
hash_it();
}
/**
* @brief Creates memory operand.
*/
Operand(OpndSize size, RegName base, RegName index, unsigned scale,
int disp, OpndExt ext = OpndExt_None) : m_kind(OpndKind_Mem), m_size(size), m_ext(ext)
{
m_base = base;
m_index = index;
m_scale = scale;
m_disp = disp;
hash_it();
}
/**
* @brief Creates memory operand with only base and displacement.
*/
Operand(OpndSize size, RegName base, int disp, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Mem), m_size(size), m_ext(ext)
{
m_base = base;
m_index = RegName_Null;
m_scale = 0;
m_disp = disp;
hash_it();
}
//
// general info
//
/**
* @brief Returns kind of the operand.
*/
OpndKind kind(void) const { return m_kind; }
/**
* @brief Returns size of the operand.
*/
OpndSize size(void) const { return m_size; }
/**
* @brief Returns extention of the operand.
*/
OpndExt ext(void) const { return m_ext; }
/**
* @brief Returns hash of the operand.
*/
unsigned hash(void) const { return m_hash; }
//
#ifdef _EM64T_
bool need_rex(void) const { return m_need_rex; }
#else
bool need_rex(void) const { return false; }
#endif
/**
* @brief Tests whether operand is memory operand.
*/
bool is_mem(void) const { return is_placed_in(OpndKind_Mem); }
/**
* @brief Tests whether operand is immediate operand.
*/
bool is_imm(void) const { return is_placed_in(OpndKind_Imm); }
/**
* @brief Tests whether operand is register operand.
*/
bool is_reg(void) const { return is_placed_in(OpndKind_Reg); }
/**
* @brief Tests whether operand is general-purpose register operand.
*/
bool is_gpreg(void) const { return is_placed_in(OpndKind_GPReg); }
/**
* @brief Tests whether operand is float-point pseudo-register operand.
*/
bool is_fpreg(void) const { return is_placed_in(OpndKind_FPReg); }
/**
* @brief Tests whether operand is XMM register operand.
*/
bool is_xmmreg(void) const { return is_placed_in(OpndKind_XMMReg); }
#ifdef _HAVE_MMX_
/**
* @brief Tests whether operand is MMX register operand.
*/
bool is_mmxreg(void) const { return is_placed_in(OpndKind_MMXReg); }
#endif
/**
* @brief Tests whether operand is signed immediate operand.
*/
//bool is_signed(void) const { assert(is_imm()); return m_is_signed; }
/**
* @brief Returns base of memory operand (RegName_Null if not memory).
*/
RegName base(void) const { return is_mem() ? m_base : RegName_Null; }
/**
* @brief Returns index of memory operand (RegName_Null if not memory).
*/
RegName index(void) const { return is_mem() ? m_index : RegName_Null; }
/**
* @brief Returns scale of memory operand (0 if not memory).
*/
unsigned scale(void) const { return is_mem() ? m_scale : 0; }
/**
* @brief Returns displacement of memory operand (0 if not memory).
*/
int disp(void) const { return is_mem() ? m_disp : 0; }
/**
* @brief Returns RegName of register operand (RegName_Null if not
* register).
*/
RegName reg(void) const { return is_reg() ? m_reg : RegName_Null; }
/**
* @brief Returns value of immediate operand (0 if not immediate).
*/
long long imm(void) const { return is_imm() ? m_imm64 : 0; }
private:
bool is_placed_in(OpndKind kd) const
{
return kd == OpndKind_Reg ?
m_kind == OpndKind_GPReg ||
#ifdef _HAVE_MMX_
m_kind == OpndKind_MMXReg ||
#endif
m_kind == OpndKind_FPReg ||
m_kind == OpndKind_XMMReg
: kd == m_kind;
}
void hash_it(void)
{
m_hash = get_size_hash(m_size) | get_kind_hash(m_kind);
#ifdef _EM64T_
m_need_rex = false;
if (is_reg() && is_em64t_extra_reg(m_reg)) {
m_need_rex = true;
}
else if (is_mem() && (is_em64t_extra_reg(m_base) ||
is_em64t_extra_reg(m_index))) {
m_need_rex = true;
}
#endif
}
// general info
OpndKind m_kind;
OpndSize m_size;
OpndExt m_ext;
// complex address form support
RegName m_base;
RegName m_index;
unsigned m_scale;
union {
int m_disp;
RegName m_reg;
long long m_imm64;
};
unsigned m_hash;
bool m_need_rex;
friend class EncoderBase::Operands;
};
/**
* @brief Simple container for up to 3 Operand-s.
*/
class Operands {
public:
Operands(void)
{
clear();
}
Operands(const Operand& op0)
{
clear();
add(op0);
}
Operands(const Operand& op0, const Operand& op1)
{
clear();
add(op0); add(op1);
}
Operands(const Operand& op0, const Operand& op1, const Operand& op2)
{
clear();
add(op0); add(op1); add(op2);
}
unsigned count(void) const { return m_count; }
unsigned hash(void) const { return m_hash; }
const Operand& operator[](unsigned idx) const
{
assert(idx<m_count);
return m_operands[idx];
}
void add(const Operand& op)
{
assert(m_count < COUNTOF(m_operands));
m_hash = (m_hash<<HASH_BITS_PER_OPERAND) | op.hash();
m_operands[m_count++] = op;
m_need_rex = m_need_rex || op.m_need_rex;
}
#ifdef _EM64T_
bool need_rex(void) const { return m_need_rex; }
#else
bool need_rex(void) const { return false; }
#endif
void clear(void)
{
m_count = 0; m_hash = 0; m_need_rex = false;
}
private:
unsigned m_count;
Operand m_operands[COUNTOF( ((OpcodeDesc*)NULL)->opnds )];
unsigned m_hash;
bool m_need_rex;
};
public:
#ifdef _DEBUG
/**
* Verifies some presumptions about encoding data table.
* Called automaticaly during statics initialization.
*/
static int verify(void);
#endif
private:
/**
* @brief Returns found OpcodeDesc by the given Mnemonic and operands.
*/
static const OpcodeDesc * lookup(Mnemonic mn, const Operands& opnds);
/**
* @brief Encodes mod/rm byte.
*/
static char* encodeModRM(char* stream, const Operands& opnds,
unsigned idx, const OpcodeDesc * odesc, Rex * prex);
/**
* @brief Encodes special things of opcode description - '/r', 'ib', etc.
*/
static char* encode_aux(char* stream, unsigned aux,
const Operands& opnds, const OpcodeDesc * odesc,
unsigned * pargsCount, Rex* prex);
#ifdef _EM64T_
/**
* @brief Returns true if the 'reg' argument represents one of the new
* EM64T registers - R8(D)-R15(D).
*
* The 64 bits versions of 'old-fashion' registers, i.e. RAX are not
* considered as 'extra'.
*/
static bool is_em64t_extra_reg(const RegName reg)
{
if (needs_rex_r(reg)) {
return true;
}
if (RegName_SPL <= reg && reg <= RegName_R15L) {
return true;
}
return false;
}
static bool needs_rex_r(const RegName reg)
{
if (RegName_R8 <= reg && reg <= RegName_R15) {
return true;
}
if (RegName_R8D <= reg && reg <= RegName_R15D) {
return true;
}
if (RegName_R8S <= reg && reg <= RegName_R15S) {
return true;
}
if (RegName_R8L <= reg && reg <= RegName_R15L) {
return true;
}
if (RegName_XMM8 <= reg && reg <= RegName_XMM15) {
return true;
}
if (RegName_XMM8D <= reg && reg <= RegName_XMM15D) {
return true;
}
if (RegName_XMM8S <= reg && reg <= RegName_XMM15S) {
return true;
}
return false;
}
/**
* @brief Returns an 'processor's index' of the register - the index
* used to encode the register in ModRM/SIB bytes.
*
* For the new EM64T registers the 'HW index' differs from the index
* encoded in RegName. For old-fashion registers it's effectively the
* same as ::getRegIndex(RegName).
*/
static unsigned char getHWRegIndex(const RegName reg)
{
if (getRegKind(reg) != OpndKind_GPReg) {
return getRegIndex(reg);
}
if (RegName_SPL <= reg && reg<=RegName_DIL) {
return getRegIndex(reg);
}
if (RegName_R8L<= reg && reg<=RegName_R15L) {
return getRegIndex(reg) - getRegIndex(RegName_R8L);
}
return is_em64t_extra_reg(reg) ?
getRegIndex(reg)-getRegIndex(RegName_R8D) : getRegIndex(reg);
}
#else
static unsigned char getHWRegIndex(const RegName reg)
{
return getRegIndex(reg);
}
static bool is_em64t_extra_reg(const RegName reg)
{
return false;
}
#endif
public:
static unsigned char get_size_hash(OpndSize size) {
return (size <= OpndSize_64) ? size_hash[size] : 0xFF;
}
static unsigned char get_kind_hash(OpndKind kind) {
return (kind <= OpndKind_Mem) ? kind_hash[kind] : 0xFF;
}
/**
* @brief A table used for the fast computation of hash value.
*
* A change must be strictly balanced with hash-related functions and data
* in enc_base.h/.cpp.
*/
static const unsigned char size_hash[OpndSize_64+1];
/**
* @brief A table used for the fast computation of hash value.
*
* A change must be strictly balanced with hash-related functions and data
* in enc_base.h/.cpp.
*/
static const unsigned char kind_hash[OpndKind_Mem+1];
/**
* @brief Maximum number of opcodes used for a single mnemonic.
*
* No arithmetics behind the number, simply estimated.
*/
static const unsigned int MAX_OPCODES = 32; //20;
/**
* @brief Mapping between operands hash code and operands.
*/
static unsigned char opcodesHashMap[Mnemonic_Count][HASH_MAX];
/**
* @brief Array of mnemonics.
*/
static MnemonicDesc mnemonics[Mnemonic_Count];
/**
* @brief Array of available opcodes.
*/
static OpcodeDesc opcodes[Mnemonic_Count][MAX_OPCODES];
static int buildTable(void);
static void buildMnemonicDesc(const MnemonicInfo * minfo);
/**
* @brief Computes hash value for the given operands.
*/
static unsigned short getHash(const OpcodeInfo* odesc);
/**
* @brief Dummy variable, for automatic invocation of buildTable() at
* startup.
*/
static int dummy;
static char * curRelOpnd[3];
};
ENCODER_NAMESPACE_END
#endif // ifndef __ENC_BASE_H_INCLUDED__