diff options
Diffstat (limited to 'externals/breakpad/src/third_party/libdisasm/x86_format.c')
-rw-r--r-- | externals/breakpad/src/third_party/libdisasm/x86_format.c | 1430 |
1 files changed, 1430 insertions, 0 deletions
diff --git a/externals/breakpad/src/third_party/libdisasm/x86_format.c b/externals/breakpad/src/third_party/libdisasm/x86_format.c new file mode 100644 index 0000000000..bb547ad4c0 --- /dev/null +++ b/externals/breakpad/src/third_party/libdisasm/x86_format.c @@ -0,0 +1,1430 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "libdis.h" +#include <inttypes.h> + +#ifdef _MSC_VER + #define snprintf _snprintf + #define inline __inline +#endif + + +/* + * concatenation macros. STRNCATF concatenates a format string, buf + * only with one argument. + */ +#define STRNCAT( buf, str, len ) do { \ + int _i = strlen(str), _blen = strlen(buf), _len = len - 1; \ + if ( len ) { \ + strncat( buf, str, _len ); \ + if ( _len <= _i ) { \ + buf[_blen+_len] = '\0'; \ + len = 0; \ + } else { \ + len -= _i; \ + } \ + } \ +} while( 0 ) + +#define STRNCATF( buf, fmt, data, len ) do { \ + char _tmp[MAX_OP_XML_STRING]; \ + \ + snprintf( _tmp, sizeof _tmp, fmt, data ); \ + STRNCAT( buf, _tmp, len ); \ +} while( 0 ) + + +#define PRINT_DISPLACEMENT( ea ) do { \ + if ( ea->disp_size && ea->disp ) { \ + if ( ea->disp_sign ) { \ + STRNCATF( buf, "-0x%" PRIX32, -ea->disp, len ); \ + } else { \ + STRNCATF( buf, "0x%" PRIX32, ea->disp, len ); \ + } \ + } \ +} while( 0 ) + +static const char *prefix_strings[] = { + "", /* no prefix */ + "repz ", /* the trailing spaces make it easy to prepend to mnemonic */ + "repnz ", + "lock ", + "branch delay " /* unused in x86 */ +}; + +static int format_insn_prefix_str( enum x86_insn_prefix prefix, char *buf, + int len ) { + + int len_orig = len; + + /* concat all prefix strings */ + if ( prefix & 1 ) { STRNCAT( buf, prefix_strings[1], len ); } + if ( prefix & 2 ) { STRNCAT( buf, prefix_strings[2], len ); } + if ( prefix & 4 ) { STRNCAT( buf, prefix_strings[3], len ); } + if ( prefix & 8 ) { STRNCAT( buf, prefix_strings[4], len ); } + + /* return the number of characters added */ + return (len_orig - len); +} + +/* + * sprint's an operand's data to string str. + */ +static void get_operand_data_str( x86_op_t *op, char *str, int len ){ + + if ( op->flags & op_signed ) { + switch ( op->datatype ) { + case op_byte: + snprintf( str, len, "%" PRId8, op->data.sbyte ); + return; + case op_word: + snprintf( str, len, "%" PRId16, op->data.sword ); + return; + case op_qword: + snprintf( str, len, "%" PRId64, op->data.sqword ); + return; + default: + snprintf( str, len, "%" PRId32, op->data.sdword ); + return; + } + } + + //else + switch ( op->datatype ) { + case op_byte: + snprintf( str, len, "0x%02" PRIX8, op->data.byte ); + return; + case op_word: + snprintf( str, len, "0x%04" PRIX16, op->data.word ); + return; + case op_qword: + snprintf( str, len, "0x%08" PRIX64,op->data.sqword ); + return; + default: + snprintf( str, len, "0x%08" PRIX32, op->data.dword ); + return; + } +} + +/* + * sprints register types to a string. the register types can be ORed + * together. + */ +static void get_operand_regtype_str( int regtype, char *str, int len ) +{ + static struct { + const char *name; + int value; + } operand_regtypes[] = { + {"reg_gen" , 0x00001}, + {"reg_in" , 0x00002}, + {"reg_out" , 0x00004}, + {"reg_local" , 0x00008}, + {"reg_fpu" , 0x00010}, + {"reg_seg" , 0x00020}, + {"reg_simd" , 0x00040}, + {"reg_sys" , 0x00080}, + {"reg_sp" , 0x00100}, + {"reg_fp" , 0x00200}, + {"reg_pc" , 0x00400}, + {"reg_retaddr", 0x00800}, + {"reg_cond" , 0x01000}, + {"reg_zero" , 0x02000}, + {"reg_ret" , 0x04000}, + {"reg_src" , 0x10000}, + {"reg_dest" , 0x20000}, + {"reg_count" , 0x40000}, + {NULL, 0}, //end + }; + + unsigned int i; + + memset( str, 0, len ); + + //go thru every type in the enum + for ( i = 0; operand_regtypes[i].name; i++ ) { + //skip if type is not set + if(! (regtype & operand_regtypes[i].value) ) + continue; + + //not the first time around + if( str[0] ) { + STRNCAT( str, " ", len ); + } + + STRNCAT(str, operand_regtypes[i].name, len ); + } +} + +static int format_expr( x86_ea_t *ea, char *buf, int len, + enum x86_asm_format format ) { + char str[MAX_OP_STRING]; + + if ( format == att_syntax ) { + if (ea->base.name[0] || ea->index.name[0] || ea->scale) { + PRINT_DISPLACEMENT(ea); + STRNCAT( buf, "(", len ); + + if ( ea->base.name[0]) { + STRNCATF( buf, "%%%s", ea->base.name, len ); + } + if ( ea->index.name[0]) { + STRNCATF( buf, ",%%%s", ea->index.name, len ); + if ( ea->scale > 1 ) { + STRNCATF( buf, ",%d", ea->scale, len ); + } + } + /* handle the syntactic exception */ + if ( ! ea->base.name[0] && + ! ea->index.name[0] ) { + STRNCATF( buf, ",%d", ea->scale, len ); + } + + STRNCAT( buf, ")", len ); + } else + STRNCATF( buf, "0x%" PRIX32, ea->disp, len ); + + } else if ( format == xml_syntax ){ + + if ( ea->base.name[0]) { + STRNCAT (buf, "\t\t\t<base>\n", len); + + get_operand_regtype_str (ea->base.type, str, + sizeof str); + STRNCAT (buf, "\t\t\t\t<register ", len); + STRNCATF (buf, "name=\"%s\" ", ea->base.name, len); + STRNCATF (buf, "type=\"%s\" ", str, len); + STRNCATF (buf, "size=%d/>\n", ea->base.size, len); + + STRNCAT (buf, "\t\t\t</base>\n", len); + } + + if ( ea->index.name[0]) { + STRNCAT (buf, "\t\t\t<index>\n", len); + + get_operand_regtype_str (ea->index.type, str, + sizeof str); + + STRNCAT (buf, "\t\t\t\t<register ", len); + STRNCATF (buf, "name=\"%s\" ", ea->index.name, len); + STRNCATF (buf, "type=\"%s\" ", str, len); + STRNCATF (buf, "size=%d/>\n", ea->index.size, len); + + STRNCAT (buf, "\t\t\t</index>\n", len); + } + + //scale + STRNCAT (buf, "\t\t\t<scale>\n", len); + STRNCAT (buf, "\t\t\t\t<immediate ", len); + STRNCATF (buf, "value=\"%d\"/>\n", ea->scale, len); + STRNCAT (buf, "\t\t\t</scale>\n", len); + + if ( ea->disp_size ) { + + STRNCAT (buf, "\t\t\t<displacement>\n", len); + + if ( ea->disp_size > 1 && ! ea->disp_sign ) { + STRNCAT (buf, "\t\t\t\t<address ", len); + STRNCATF (buf, "value=\"0x%" PRIX32 "\"/>\n", ea->disp, + len); + } else { + STRNCAT (buf, "\t\t\t\t<immediate ", len); + STRNCATF (buf, "value=%" PRId32 "/>\n", ea->disp, len); + } + + STRNCAT (buf, "\t\t\t</displacement>\n", len); + } + + } else if ( format == raw_syntax ) { + + PRINT_DISPLACEMENT(ea); + STRNCAT( buf, "(", len ); + + STRNCATF( buf, "%s,", ea->base.name, len ); + STRNCATF( buf, "%s,", ea->index.name, len ); + STRNCATF( buf, "%d", ea->scale, len ); + STRNCAT( buf, ")", len ); + + } else { + + STRNCAT( buf, "[", len ); + + if ( ea->base.name[0] ) { + STRNCAT( buf, ea->base.name, len ); + if ( ea->index.name[0] || + (ea->disp_size && ! ea->disp_sign) ) { + STRNCAT( buf, "+", len ); + } + } + if ( ea->index.name[0] ) { + STRNCAT( buf, ea->index.name, len ); + if ( ea->scale > 1 ) + { + STRNCATF( buf, "*%" PRId32, ea->scale, len ); + } + if ( ea->disp_size && ! ea->disp_sign ) + { + STRNCAT( buf, "+", len ); + } + } + + if ( ea->disp_size || (! ea->index.name[0] && + ! ea->base.name[0] ) ) + { + PRINT_DISPLACEMENT(ea); + } + + STRNCAT( buf, "]", len ); + } + + return( strlen(buf) ); +} + +static int format_seg( x86_op_t *op, char *buf, int len, + enum x86_asm_format format ) { + int len_orig = len; + const char *reg = ""; + + if (! op || ! buf || ! len || ! op->flags) { + return(0); + } + if ( op->type != op_offset && op->type != op_expression ){ + return(0); + } + if (! ((int) op->flags & 0xF00) ) { + return(0); + } + + switch (op->flags & 0xF00) { + case op_es_seg: reg = "es"; break; + case op_cs_seg: reg = "cs"; break; + case op_ss_seg: reg = "ss"; break; + case op_ds_seg: reg = "ds"; break; + case op_fs_seg: reg = "fs"; break; + case op_gs_seg: reg = "gs"; break; + default: + break; + } + + if (! reg[0] ) { + return( 0 ); + } + + switch( format ) { + case xml_syntax: + STRNCAT( buf, "\t\t\t<segment ", len ); + STRNCATF( buf, "value=\"%s\"/>\n", reg, len ); + break; + case att_syntax: + STRNCATF( buf, "%%%s:", reg, len ); + break; + + default: + STRNCATF( buf, "%s:", reg, len ); + break; + } + + return( len_orig - len ); /* return length of appended string */ +} + +static const char *get_operand_datatype_str( x86_op_t *op ){ + + static const char *types[] = { + "sbyte", /* 0 */ + "sword", + "sqword", + "sdword", + "sdqword", /* 4 */ + "byte", + "word", + "qword", + "dword", /* 8 */ + "dqword", + "sreal", + "dreal", + "extreal", /* 12 */ + "bcd", + "ssimd", + "dsimd", + "sssimd", /* 16 */ + "sdsimd", + "descr32", + "descr16", + "pdescr32", /* 20 */ + "pdescr16", + "bounds16", + "bounds32", + "fpu_env16", + "fpu_env32", /* 25 */ + "fpu_state16", + "fpu_state32", + "fp_reg_set" + }; + + /* handle signed values first */ + if ( op->flags & op_signed ) { + switch (op->datatype) { + case op_byte: return types[0]; + case op_word: return types[1]; + case op_qword: return types[2]; + case op_dqword: return types[4]; + default: return types[3]; + } + } + + switch (op->datatype) { + case op_byte: return types[5]; + case op_word: return types[6]; + case op_qword: return types[7]; + case op_dqword: return types[9]; + case op_sreal: return types[10]; + case op_dreal: return types[11]; + case op_extreal: return types[12]; + case op_bcd: return types[13]; + case op_ssimd: return types[14]; + case op_dsimd: return types[15]; + case op_sssimd: return types[16]; + case op_sdsimd: return types[17]; + case op_descr32: return types[18]; + case op_descr16: return types[19]; + case op_pdescr32: return types[20]; + case op_pdescr16: return types[21]; + case op_bounds16: return types[22]; + case op_bounds32: return types[23]; + case op_fpustate16: return types[24]; + case op_fpustate32: return types[25]; + case op_fpuenv16: return types[26]; + case op_fpuenv32: return types[27]; + case op_fpregset: return types[28]; + default: return types[8]; + } +} + +static int format_insn_eflags_str( enum x86_flag_status flags, char *buf, + int len) { + + static struct { + const char *name; + int value; + } insn_flags[] = { + { "carry_set ", 0x0001 }, + { "zero_set ", 0x0002 }, + { "oflow_set ", 0x0004 }, + { "dir_set ", 0x0008 }, + { "sign_set ", 0x0010 }, + { "parity_set ", 0x0020 }, + { "carry_or_zero_set ", 0x0040 }, + { "zero_set_or_sign_ne_oflow ", 0x0080 }, + { "carry_clear ", 0x0100 }, + { "zero_clear ", 0x0200 }, + { "oflow_clear ", 0x0400 }, + { "dir_clear ", 0x0800 }, + { "sign_clear ", 0x1000 }, + { "parity_clear ", 0x2000 }, + { "sign_eq_oflow ", 0x4000 }, + { "sign_ne_oflow ", 0x8000 }, + { NULL, 0x0000 }, //end + }; + + unsigned int i; + int len_orig = len; + + for (i = 0; insn_flags[i].name; i++) { + if (! (flags & insn_flags[i].value) ) + continue; + + STRNCAT( buf, insn_flags[i].name, len ); + } + + return( len_orig - len ); +} + +static const char *get_insn_group_str( enum x86_insn_group gp ) { + + static const char *types[] = { + "", // 0 + "controlflow",// 1 + "arithmetic", // 2 + "logic", // 3 + "stack", // 4 + "comparison", // 5 + "move", // 6 + "string", // 7 + "bit_manip", // 8 + "flag_manip", // 9 + "fpu", // 10 + "", // 11 + "", // 12 + "interrupt", // 13 + "system", // 14 + "other", // 15 + }; + + if ( gp > sizeof (types)/sizeof(types[0]) ) + return ""; + + return types[gp]; +} + +static const char *get_insn_type_str( enum x86_insn_type type ) { + + static struct { + const char *name; + int value; + } types[] = { + /* insn_controlflow */ + { "jmp", 0x1001 }, + { "jcc", 0x1002 }, + { "call", 0x1003 }, + { "callcc", 0x1004 }, + { "return", 0x1005 }, + { "loop", 0x1006 }, + /* insn_arithmetic */ + { "add", 0x2001 }, + { "sub", 0x2002 }, + { "mul", 0x2003 }, + { "div", 0x2004 }, + { "inc", 0x2005 }, + { "dec", 0x2006 }, + { "shl", 0x2007 }, + { "shr", 0x2008 }, + { "rol", 0x2009 }, + { "ror", 0x200A }, + /* insn_logic */ + { "and", 0x3001 }, + { "or", 0x3002 }, + { "xor", 0x3003 }, + { "not", 0x3004 }, + { "neg", 0x3005 }, + /* insn_stack */ + { "push", 0x4001 }, + { "pop", 0x4002 }, + { "pushregs", 0x4003 }, + { "popregs", 0x4004 }, + { "pushflags", 0x4005 }, + { "popflags", 0x4006 }, + { "enter", 0x4007 }, + { "leave", 0x4008 }, + /* insn_comparison */ + { "test", 0x5001 }, + { "cmp", 0x5002 }, + /* insn_move */ + { "mov", 0x6001 }, /* move */ + { "movcc", 0x6002 }, /* conditional move */ + { "xchg", 0x6003 }, /* exchange */ + { "xchgcc", 0x6004 }, /* conditional exchange */ + /* insn_string */ + { "strcmp", 0x7001 }, + { "strload", 0x7002 }, + { "strmov", 0x7003 }, + { "strstore", 0x7004 }, + { "translate", 0x7005 }, /* xlat */ + /* insn_bit_manip */ + { "bittest", 0x8001 }, + { "bitset", 0x8002 }, + { "bitclear", 0x8003 }, + /* insn_flag_manip */ + { "clear_carry", 0x9001 }, + { "clear_zero", 0x9002 }, + { "clear_oflow", 0x9003 }, + { "clear_dir", 0x9004 }, + { "clear_sign", 0x9005 }, + { "clear_parity", 0x9006 }, + { "set_carry", 0x9007 }, + { "set_zero", 0x9008 }, + { "set_oflow", 0x9009 }, + { "set_dir", 0x900A }, + { "set_sign", 0x900B }, + { "set_parity", 0x900C }, + { "tog_carry", 0x9010 }, + { "tog_zero", 0x9020 }, + { "tog_oflow", 0x9030 }, + { "tog_dir", 0x9040 }, + { "tog_sign", 0x9050 }, + { "tog_parity", 0x9060 }, + /* insn_fpu */ + { "fmov", 0xA001 }, + { "fmovcc", 0xA002 }, + { "fneg", 0xA003 }, + { "fabs", 0xA004 }, + { "fadd", 0xA005 }, + { "fsub", 0xA006 }, + { "fmul", 0xA007 }, + { "fdiv", 0xA008 }, + { "fsqrt", 0xA009 }, + { "fcmp", 0xA00A }, + { "fcos", 0xA00C }, + { "fldpi", 0xA00D }, + { "fldz", 0xA00E }, + { "ftan", 0xA00F }, + { "fsine", 0xA010 }, + { "fsys", 0xA020 }, + /* insn_interrupt */ + { "int", 0xD001 }, + { "intcc", 0xD002 }, /* not present in x86 ISA */ + { "iret", 0xD003 }, + { "bound", 0xD004 }, + { "debug", 0xD005 }, + { "trace", 0xD006 }, + { "invalid_op", 0xD007 }, + { "oflow", 0xD008 }, + /* insn_system */ + { "halt", 0xE001 }, + { "in", 0xE002 }, /* input from port/bus */ + { "out", 0xE003 }, /* output to port/bus */ + { "cpuid", 0xE004 }, + /* insn_other */ + { "nop", 0xF001 }, + { "bcdconv", 0xF002 }, /* convert to or from BCD */ + { "szconv", 0xF003 }, /* change size of operand */ + { NULL, 0 }, //end + }; + + unsigned int i; + + //go thru every type in the enum + for ( i = 0; types[i].name; i++ ) { + if ( types[i].value == type ) + return types[i].name; + } + + return ""; +} + +static const char *get_insn_cpu_str( enum x86_insn_cpu cpu ) { + static const char *intel[] = { + "", // 0 + "8086", // 1 + "80286", // 2 + "80386", // 3 + "80387", // 4 + "80486", // 5 + "Pentium", // 6 + "Pentium Pro", // 7 + "Pentium 2", // 8 + "Pentium 3", // 9 + "Pentium 4" // 10 + }; + + if ( cpu < sizeof(intel)/sizeof(intel[0]) ) { + return intel[cpu]; + } else if ( cpu == 16 ) { + return "K6"; + } else if ( cpu == 32 ) { + return "K7"; + } else if ( cpu == 48 ) { + return "Athlon"; + } + + return ""; +} + +static const char *get_insn_isa_str( enum x86_insn_isa isa ) { + static const char *subset[] = { + NULL, // 0 + "General Purpose", // 1 + "Floating Point", // 2 + "FPU Management", // 3 + "MMX", // 4 + "SSE", // 5 + "SSE2", // 6 + "SSE3", // 7 + "3DNow!", // 8 + "System" // 9 + }; + + if ( isa > sizeof (subset)/sizeof(subset[0]) ) { + return ""; + } + + return subset[isa]; +} + +static int format_operand_att( x86_op_t *op, x86_insn_t *insn, char *buf, + int len){ + + char str[MAX_OP_STRING]; + + memset (str, 0, sizeof str); + + switch ( op->type ) { + case op_register: + STRNCATF( buf, "%%%s", op->data.reg.name, len ); + break; + + case op_immediate: + get_operand_data_str( op, str, sizeof str ); + STRNCATF( buf, "$%s", str, len ); + break; + + case op_relative_near: + STRNCATF( buf, "0x%08X", + (unsigned int)(op->data.sbyte + + insn->addr + insn->size), len ); + break; + + case op_relative_far: + if (op->datatype == op_word) { + STRNCATF( buf, "0x%08X", + (unsigned int)(op->data.sword + + insn->addr + insn->size), len ); + } else { + STRNCATF( buf, "0x%08X", + (unsigned int)(op->data.sdword + + insn->addr + insn->size), len ); + } + break; + + case op_absolute: + /* ATT uses the syntax $section, $offset */ + STRNCATF( buf, "$0x%04" PRIX16 ", ", op->data.absolute.segment, + len ); + if (op->datatype == op_descr16) { + STRNCATF( buf, "$0x%04" PRIX16, + op->data.absolute.offset.off16, len ); + } else { + STRNCATF( buf, "$0x%08" PRIX32, + op->data.absolute.offset.off32, len ); + } + break; + case op_offset: + /* ATT requires a '*' before JMP/CALL ops */ + if (insn->type == insn_jmp || insn->type == insn_call) + STRNCAT( buf, "*", len ); + + len -= format_seg( op, buf, len, att_syntax ); + STRNCATF( buf, "0x%08" PRIX32, op->data.sdword, len ); + break; + + case op_expression: + /* ATT requires a '*' before JMP/CALL ops */ + if (insn->type == insn_jmp || insn->type == insn_call) + STRNCAT( buf, "*", len ); + + len -= format_seg( op, buf, len, att_syntax ); + len -= format_expr( &op->data.expression, buf, len, + att_syntax ); + break; + case op_unused: + case op_unknown: + /* return 0-truncated buffer */ + break; + } + + return ( strlen( buf ) ); +} + +static int format_operand_native( x86_op_t *op, x86_insn_t *insn, char *buf, + int len){ + + char str[MAX_OP_STRING]; + + switch (op->type) { + case op_register: + STRNCAT( buf, op->data.reg.name, len ); + break; + + case op_immediate: + get_operand_data_str( op, str, sizeof str ); + STRNCAT( buf, str, len ); + break; + + case op_relative_near: + STRNCATF( buf, "0x%08" PRIX32, + (unsigned int)(op->data.sbyte + + insn->addr + insn->size), len ); + break; + + case op_relative_far: + if ( op->datatype == op_word ) { + STRNCATF( buf, "0x%08" PRIX32, + (unsigned int)(op->data.sword + + insn->addr + insn->size), len ); + break; + } else { + STRNCATF( buf, "0x%08" PRIX32, op->data.sdword + + insn->addr + insn->size, len ); + } + break; + + case op_absolute: + STRNCATF( buf, "$0x%04" PRIX16 ":", op->data.absolute.segment, + len ); + if (op->datatype == op_descr16) { + STRNCATF( buf, "0x%04" PRIX16, + op->data.absolute.offset.off16, len ); + } else { + STRNCATF( buf, "0x%08" PRIX32, + op->data.absolute.offset.off32, len ); + } + break; + + case op_offset: + len -= format_seg( op, buf, len, native_syntax ); + STRNCATF( buf, "[0x%08" PRIX32 "]", op->data.sdword, len ); + break; + + case op_expression: + len -= format_seg( op, buf, len, native_syntax ); + len -= format_expr( &op->data.expression, buf, len, + native_syntax ); + break; + case op_unused: + case op_unknown: + /* return 0-truncated buffer */ + break; + } + + return( strlen( buf ) ); +} + +static int format_operand_xml( x86_op_t *op, x86_insn_t *insn, char *buf, + int len){ + + char str[MAX_OP_STRING] = "\0"; + + switch (op->type) { + case op_register: + + get_operand_regtype_str( op->data.reg.type, str, + sizeof str ); + + STRNCAT( buf, "\t\t<register ", len ); + STRNCATF( buf, "name=\"%s\" ", op->data.reg.name, len ); + STRNCATF( buf, "type=\"%s\" ", str, len ); + STRNCATF( buf, "size=%d/>\n", op->data.reg.size, len ); + break; + + case op_immediate: + + get_operand_data_str( op, str, sizeof str ); + + STRNCAT( buf, "\t\t<immediate ", len ); + STRNCATF( buf, "type=\"%s\" ", + get_operand_datatype_str (op), len ); + STRNCATF( buf, "value=\"%s\"/>\n", str, len ); + break; + + case op_relative_near: + STRNCAT( buf, "\t\t<relative_offset ", len ); + + STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n", + (unsigned int)(op->data.sbyte + + insn->addr + insn->size), len ); + break; + + case op_relative_far: + STRNCAT( buf, "\t\t<relative_offset ", len ); + + if (op->datatype == op_word) { + STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n", + (unsigned int)(op->data.sword + + insn->addr + insn->size), len); + break; + } else { + + STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n", + op->data.sdword + insn->addr + insn->size, + len ); + } + break; + + case op_absolute: + + STRNCATF( buf, + "\t\t<absolute_address segment=\"0x%04" PRIX16 "\"", + op->data.absolute.segment, len ); + + if (op->datatype == op_descr16) { + STRNCATF( buf, "offset=\"0x%04" PRIX16 "\">", + op->data.absolute.offset.off16, len ); + } else { + STRNCATF( buf, "offset=\"0x%08" PRIX32 "\">", + op->data.absolute.offset.off32, len ); + } + + STRNCAT( buf, "\t\t</absolute_address>\n", len ); + break; + + case op_expression: + + + STRNCAT( buf, "\t\t<address_expression>\n", len ); + + len -= format_seg( op, buf, len, xml_syntax ); + len -= format_expr( &op->data.expression, buf, len, + xml_syntax ); + + STRNCAT( buf, "\t\t</address_expression>\n", len ); + break; + + case op_offset: + + STRNCAT( buf, "\t\t<segment_offset>\n", len ); + + len -= format_seg( op, buf, len, xml_syntax ); + + STRNCAT( buf, "\t\t\t<address ", len); + STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n", + op->data.sdword, len ); + STRNCAT( buf, "\t\t</segment_offset>\n", len ); + break; + + case op_unused: + case op_unknown: + /* return 0-truncated buffer */ + break; + } + + return( strlen( buf ) ); +} + +static int format_operand_raw( x86_op_t *op, x86_insn_t *insn, char *buf, + int len){ + + char str[MAX_OP_RAW_STRING]; + const char *datatype = get_operand_datatype_str(op); + + switch (op->type) { + case op_register: + + get_operand_regtype_str( op->data.reg.type, str, + sizeof str ); + + STRNCAT( buf, "reg|", len ); + STRNCATF( buf, "%s|", datatype, len ); + STRNCATF( buf, "%s:", op->data.reg.name, len ); + STRNCATF( buf, "%s:", str, len ); + STRNCATF( buf, "%d|", op->data.reg.size, len ); + break; + + case op_immediate: + + get_operand_data_str( op, str, sizeof str ); + + STRNCAT( buf, "immediate|", len ); + STRNCATF( buf, "%s|", datatype, len ); + STRNCATF( buf, "%s|", str, len ); + break; + + case op_relative_near: + /* NOTE: in raw format, we print the + * relative offset, not the actual + * address of the jump target */ + + STRNCAT( buf, "relative|", len ); + STRNCATF( buf, "%s|", datatype, len ); + STRNCATF( buf, "%" PRId8 "|", op->data.sbyte, len ); + break; + + case op_relative_far: + + STRNCAT( buf, "relative|", len ); + STRNCATF( buf, "%s|", datatype, len ); + + if (op->datatype == op_word) { + STRNCATF( buf, "%" PRId16 "|", op->data.sword, len); + break; + } else { + STRNCATF( buf, "%" PRId32 "|", op->data.sdword, len ); + } + break; + + case op_absolute: + + STRNCAT( buf, "absolute_address|", len ); + STRNCATF( buf, "%s|", datatype, len ); + + STRNCATF( buf, "$0x%04" PRIX16 ":", op->data.absolute.segment, + len ); + if (op->datatype == op_descr16) { + STRNCATF( buf, "0x%04" PRIX16 "|", + op->data.absolute.offset.off16, len ); + } else { + STRNCATF( buf, "0x%08" PRIX32 "|", + op->data.absolute.offset.off32, len ); + } + + break; + + case op_expression: + + STRNCAT( buf, "address_expression|", len ); + STRNCATF( buf, "%s|", datatype, len ); + + len -= format_seg( op, buf, len, native_syntax ); + len -= format_expr( &op->data.expression, buf, len, + raw_syntax ); + + STRNCAT( buf, "|", len ); + break; + + case op_offset: + + STRNCAT( buf, "segment_offset|", len ); + STRNCATF( buf, "%s|", datatype, len ); + + len -= format_seg( op, buf, len, xml_syntax ); + + STRNCATF( buf, "%08" PRIX32 "|", op->data.sdword, len ); + break; + + case op_unused: + case op_unknown: + /* return 0-truncated buffer */ + break; + } + + return( strlen( buf ) ); +} + +int x86_format_operand( x86_op_t *op, char *buf, int len, + enum x86_asm_format format ){ + x86_insn_t *insn; + + if ( ! op || ! buf || len < 1 ) { + return(0); + } + + /* insn is stored in x86_op_t since .21-pre3 */ + insn = (x86_insn_t *) op->insn; + + memset( buf, 0, len ); + + switch ( format ) { + case att_syntax: + return format_operand_att( op, insn, buf, len ); + case xml_syntax: + return format_operand_xml( op, insn, buf, len ); + case raw_syntax: + return format_operand_raw( op, insn, buf, len ); + case native_syntax: + case intel_syntax: + default: + return format_operand_native( op, insn, buf, len ); + } +} + +#define is_imm_jmp(op) (op->type == op_absolute || \ + op->type == op_immediate || \ + op->type == op_offset) +#define is_memory_op(op) (op->type == op_absolute || \ + op->type == op_expression || \ + op->type == op_offset) + +static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) { + int size = 0; + const char *suffix; + + if (! insn || ! buf || ! len ) + return(0); + + memset( buf, 0, len ); + + /* do long jump/call prefix */ + if ( insn->type == insn_jmp || insn->type == insn_call ) { + if (! is_imm_jmp( x86_operand_1st(insn) ) || + (x86_operand_1st(insn))->datatype != op_byte ) { + /* far jump/call, use "l" prefix */ + STRNCAT( buf, "l", len ); + } + STRNCAT( buf, insn->mnemonic, len ); + + return ( strlen( buf ) ); + } + + /* do mnemonic */ + STRNCAT( buf, insn->mnemonic, len ); + + /* do suffixes for memory operands */ + if (!(insn->note & insn_note_nosuffix) && + (insn->group == insn_arithmetic || + insn->group == insn_logic || + insn->group == insn_move || + insn->group == insn_stack || + insn->group == insn_string || + insn->group == insn_comparison || + insn->type == insn_in || + insn->type == insn_out + )) { + if ( x86_operand_count( insn, op_explicit ) > 0 && + is_memory_op( x86_operand_1st(insn) ) ){ + size = x86_operand_size( x86_operand_1st( insn ) ); + } else if ( x86_operand_count( insn, op_explicit ) > 1 && + is_memory_op( x86_operand_2nd(insn) ) ){ + size = x86_operand_size( x86_operand_2nd( insn ) ); + } + } + + if ( size == 1 ) suffix = "b"; + else if ( size == 2 ) suffix = "w"; + else if ( size == 4 ) suffix = "l"; + else if ( size == 8 ) suffix = "q"; + else suffix = ""; + + STRNCAT( buf, suffix, len ); + return ( strlen( buf ) ); +} + +int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len, + enum x86_asm_format format){ + char str[MAX_OP_STRING]; + + memset( buf, 0, len ); + STRNCAT( buf, insn->prefix_string, len ); + if ( format == att_syntax ) { + format_att_mnemonic( insn, str, sizeof str ); + STRNCAT( buf, str, len ); + } else { + STRNCAT( buf, insn->mnemonic, len ); + } + + return( strlen( buf ) ); +} + +struct op_string { char *buf; size_t len; }; + +static void format_op_raw( x86_op_t *op, x86_insn_t *insn, void *arg ) { + struct op_string * opstr = (struct op_string *) arg; + + format_operand_raw(op, insn, opstr->buf, opstr->len); +} + +static int format_insn_note(x86_insn_t *insn, char *buf, int len){ + char note[32] = {0}; + int len_orig = len, note_len = 32; + + if ( insn->note & insn_note_ring0 ) { + STRNCATF( note, "%s", "Ring0 ", note_len ); + } + if ( insn->note & insn_note_smm ) { + STRNCATF( note, "%s", "SMM ", note_len ); + } + if ( insn->note & insn_note_serial ) { + STRNCATF(note, "%s", "Serialize ", note_len ); + } + STRNCATF( buf, "%s|", note, len ); + + return( len_orig - len ); +} + +static int format_raw_insn( x86_insn_t *insn, char *buf, int len ){ + struct op_string opstr = { buf, len }; + int i; + + /* RAW style: + * ADDRESS|OFFSET|SIZE|BYTES| + * PREFIX|PREFIX_STRING|GROUP|TYPE|NOTES| + * MNEMONIC|CPU|ISA|FLAGS_SET|FLAGS_TESTED| + * STACK_MOD|STACK_MOD_VAL + * [|OP_TYPE|OP_DATATYPE|OP_ACCESS|OP_FLAGS|OP]* + * + * Register values are encoded as: + * NAME:TYPE:SIZE + * + * Effective addresses are encoded as: + * disp(base_reg,index_reg,scale) + */ + STRNCATF( buf, "0x%08" PRIX32 "|", insn->addr , len ); + STRNCATF( buf, "0x%08" PRIX32 "|", insn->offset, len ); + STRNCATF( buf, "%d|" , insn->size , len ); + + /* print bytes */ + for ( i = 0; i < insn->size; i++ ) { + STRNCATF( buf, "%02X ", insn->bytes[i], len ); + } + STRNCAT( buf, "|", len ); + + len -= format_insn_prefix_str( insn->prefix, buf, len ); + STRNCATF( buf, "|%s|", insn->prefix_string , len ); + STRNCATF( buf, "%s|", get_insn_group_str( insn->group ), len ); + STRNCATF( buf, "%s|", get_insn_type_str( insn->type ) , len ); + STRNCATF( buf, "%s|", insn->mnemonic , len ); + STRNCATF( buf, "%s|", get_insn_cpu_str( insn->cpu ) , len ); + STRNCATF( buf, "%s|", get_insn_isa_str( insn->isa ) , len ); + + /* insn note */ + len -= format_insn_note( insn, buf, len ); + + len -= format_insn_eflags_str( insn->flags_set, buf, len ); + STRNCAT( buf, "|", len ); + len -= format_insn_eflags_str( insn->flags_tested, buf, len ); + STRNCAT( buf, "|", len ); + STRNCATF( buf, "%d|", insn->stack_mod, len ); + STRNCATF( buf, "%" PRId32 "|", insn->stack_mod_val, len ); + + opstr.len = len; + x86_operand_foreach( insn, format_op_raw, &opstr, op_any ); + + return( strlen (buf) ); +} + +static int format_xml_insn( x86_insn_t *insn, char *buf, int len ) { + char str[MAX_OP_XML_STRING]; + int i; + + STRNCAT( buf, "<x86_insn>\n", len ); + + STRNCATF( buf, "\t<address rva=\"0x%08" PRIX32 "\" ", insn->addr, len ); + STRNCATF( buf, "offset=\"0x%08" PRIX32 "\" ", insn->offset, len ); + STRNCATF( buf, "size=%d bytes=\"", insn->size, len ); + + for ( i = 0; i < insn->size; i++ ) { + STRNCATF( buf, "%02X ", insn->bytes[i], len ); + } + STRNCAT( buf, "\"/>\n", len ); + + STRNCAT( buf, "\t<prefix type=\"", len ); + len -= format_insn_prefix_str( insn->prefix, buf, len ); + STRNCATF( buf, "\" string=\"%s\"/>\n", insn->prefix_string, len ); + + STRNCATF( buf, "\t<mnemonic group=\"%s\" ", + get_insn_group_str (insn->group), len ); + STRNCATF( buf, "type=\"%s\" ", get_insn_type_str (insn->type), len ); + STRNCATF( buf, "string=\"%s\"/>\n", insn->mnemonic, len ); + + STRNCAT( buf, "\t<flags type=set>\n", len ); + STRNCAT( buf, "\t\t<flag name=\"", len ); + len -= format_insn_eflags_str( insn->flags_set, buf, len ); + STRNCAT( buf, "\"/>\n\t</flags>\n", len ); + + + STRNCAT( buf, "\t<flags type=tested>\n", len ); + STRNCAT( buf, "\t\t<flag name=\"", len ); + len -= format_insn_eflags_str( insn->flags_tested, buf, len ); + STRNCAT( buf, "\"/>\n\t</flags>\n", len ); + + if ( x86_operand_1st( insn ) ) { + x86_format_operand( x86_operand_1st(insn), str, + sizeof str, xml_syntax); + STRNCAT( buf, "\t<operand name=dest>\n", len ); + STRNCAT( buf, str, len ); + STRNCAT( buf, "\t</operand>\n", len ); + } + + if ( x86_operand_2nd( insn ) ) { + x86_format_operand( x86_operand_2nd( insn ), str, + sizeof str, xml_syntax); + STRNCAT( buf, "\t<operand name=src>\n", len ); + STRNCAT( buf, str, len ); + STRNCAT( buf, "\t</operand>\n", len ); + } + + if ( x86_operand_3rd( insn ) ) { + x86_format_operand( x86_operand_3rd(insn), str, + sizeof str, xml_syntax); + STRNCAT( buf, "\t<operand name=imm>\n", len ); + STRNCAT( buf, str, len ); + STRNCAT( buf, "\t</operand>\n", len ); + } + + STRNCAT( buf, "</x86_insn>\n", len ); + + return strlen (buf); +} + +int x86_format_header( char *buf, int len, enum x86_asm_format format ) { + switch (format) { + case att_syntax: + snprintf( buf, len, "MNEMONIC\tSRC, DEST, IMM" ); + break; + case intel_syntax: + snprintf( buf, len, "MNEMONIC\tDEST, SRC, IMM" ); + break; + case native_syntax: + snprintf( buf, len, "ADDRESS\tBYTES\tMNEMONIC\t" + "DEST\tSRC\tIMM" ); + break; + case raw_syntax: + snprintf( buf, len, "ADDRESS|OFFSET|SIZE|BYTES|" + "PREFIX|PREFIX_STRING|GROUP|TYPE|NOTES|" + "MNEMONIC|CPU|ISA|FLAGS_SET|FLAGS_TESTED|" + "STACK_MOD|STACK_MOD_VAL" + "[|OP_TYPE|OP_DATATYPE|OP_ACCESS|OP_FLAGS|OP]*" + ); + break; + case xml_syntax: + snprintf( buf, len, + "<x86_insn>" + "<address rva= offset= size= bytes=/>" + "<prefix type= string=/>" + "<mnemonic group= type= string= " + "cpu= isa= note= />" + "<flags type=set>" + "<flag name=>" + "</flags>" + "<stack_mod val= >" + "<flags type=tested>" + "<flag name=>" + "</flags>" + "<operand name=>" + "<register name= type= size=/>" + "<immediate type= value=/>" + "<relative_offset value=/>" + "<absolute_address value=>" + "<segment value=/>" + "</absolute_address>" + "<address_expression>" + "<segment value=/>" + "<base>" + "<register name= type= size=/>" + "</base>" + "<index>" + "<register name= type= size=/>" + "</index>" + "<scale>" + "<immediate value=/>" + "</scale>" + "<displacement>" + "<immediate value=/>" + "<address value=/>" + "</displacement>" + "</address_expression>" + "<segment_offset>" + "<address value=/>" + "</segment_offset>" + "</operand>" + "</x86_insn>" + ); + break; + case unknown_syntax: + if ( len ) { + buf[0] = '\0'; + } + break; + } + + return( strlen(buf) ); +} + +int x86_format_insn( x86_insn_t *insn, char *buf, int len, + enum x86_asm_format format ){ + char str[MAX_OP_STRING]; + x86_op_t *src, *dst; + int i; + + memset(buf, 0, len); + if ( format == intel_syntax ) { + /* INTEL STYLE: mnemonic dest, src, imm */ + STRNCAT( buf, insn->prefix_string, len ); + STRNCAT( buf, insn->mnemonic, len ); + STRNCAT( buf, "\t", len ); + + /* dest */ + if ( (dst = x86_operand_1st( insn )) && !(dst->flags & op_implied) ) { + x86_format_operand( dst, str, MAX_OP_STRING, format); + STRNCAT( buf, str, len ); + } + + /* src */ + if ( (src = x86_operand_2nd( insn )) ) { + if ( !(dst->flags & op_implied) ) { + STRNCAT( buf, ", ", len ); + } + x86_format_operand( src, str, MAX_OP_STRING, format); + STRNCAT( buf, str, len ); + } + + /* imm */ + if ( x86_operand_3rd( insn )) { + STRNCAT( buf, ", ", len ); + x86_format_operand( x86_operand_3rd( insn ), + str, MAX_OP_STRING, format); + STRNCAT( buf, str, len ); + } + + } else if ( format == att_syntax ) { + /* ATT STYLE: mnemonic src, dest, imm */ + STRNCAT( buf, insn->prefix_string, len ); + format_att_mnemonic(insn, str, MAX_OP_STRING); + STRNCATF( buf, "%s\t", str, len); + + + /* not sure which is correct? sometimes GNU as requires + * an imm as the first operand, sometimes as the third... */ + /* imm */ + if ( x86_operand_3rd( insn ) ) { + x86_format_operand(x86_operand_3rd( insn ), + str, MAX_OP_STRING, format); + STRNCAT( buf, str, len ); + /* there is always 'dest' operand if there is 'src' */ + STRNCAT( buf, ", ", len ); + } + + if ( (insn->note & insn_note_nonswap ) == 0 ) { + /* regular AT&T style swap */ + src = x86_operand_2nd( insn ); + dst = x86_operand_1st( insn ); + } + else { + /* special-case instructions */ + src = x86_operand_1st( insn ); + dst = x86_operand_2nd( insn ); + } + + /* src */ + if ( src ) { + x86_format_operand(src, str, MAX_OP_STRING, format); + STRNCAT( buf, str, len ); + /* there is always 'dest' operand if there is 'src' */ + if ( dst && !(dst->flags & op_implied) ) { + STRNCAT( buf, ", ", len ); + } + } + + /* dest */ + if ( dst && !(dst->flags & op_implied) ) { + x86_format_operand( dst, str, MAX_OP_STRING, format); + STRNCAT( buf, str, len ); + } + + + } else if ( format == raw_syntax ) { + format_raw_insn( insn, buf, len ); + } else if ( format == xml_syntax ) { + format_xml_insn( insn, buf, len ); + } else { /* default to native */ + /* NATIVE style: RVA\tBYTES\tMNEMONIC\tOPERANDS */ + /* print address */ + STRNCATF( buf, "%08" PRIX32 "\t", insn->addr, len ); + + /* print bytes */ + for ( i = 0; i < insn->size; i++ ) { + STRNCATF( buf, "%02X ", insn->bytes[i], len ); + } + + STRNCAT( buf, "\t", len ); + + /* print mnemonic */ + STRNCAT( buf, insn->prefix_string, len ); + STRNCAT( buf, insn->mnemonic, len ); + STRNCAT( buf, "\t", len ); + + /* print operands */ + /* dest */ + if ( x86_operand_1st( insn ) ) { + x86_format_operand( x86_operand_1st( insn ), + str, MAX_OP_STRING, format); + STRNCATF( buf, "%s\t", str, len ); + } + + /* src */ + if ( x86_operand_2nd( insn ) ) { + x86_format_operand(x86_operand_2nd( insn ), + str, MAX_OP_STRING, format); + STRNCATF( buf, "%s\t", str, len ); + } + + /* imm */ + if ( x86_operand_3rd( insn )) { + x86_format_operand( x86_operand_3rd( insn ), + str, MAX_OP_STRING, format); + STRNCAT( buf, str, len ); + } + } + + return( strlen( buf ) ); +} + |