You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

357 lines
13 KiB

/*
* FCML - Free Code Manipulation Library.
* Copyright (C) 2010-2019 Slawomir Wojtasiak
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/** @file fcml_disassembler.h
* Structures and functions declarations related to FCML disassembler.
* @copyright Copyright (C) 2010-2015 Slawomir Wojtasiak. All rights reserved.
* This project is released under the GNU Lesser General Public License.
*/
#ifndef FCML_DISASSEMBLER_H_
#define FCML_DISASSEMBLER_H_
#include "fcml_lib_export.h"
#include "fcml_instructions.h"
#include "fcml_types.h"
#include "fcml_errors.h"
#include "fcml_common.h"
#include "fcml_dialect.h"
#ifdef __cplusplus
extern "C" {
#endif
/** Maximal number of instruction prefixes. */
#define FCML_DASM_PREFIXES_COUNT 12
/** First group of conditional suffixes (See FCML manual). */
#define FCML_DASM_CONDITIONAL_GROUP_1 0x00
/** Second group of conditional suffixes (See FCML manual). */
#define FCML_DASM_CONDITIONAL_GROUP_2 0x01
/** This structure and type declaration represents an abstract disassembler. */
typedef struct fcml_st_disassembler fcml_st_disassembler;
/** Disassembler configuration. */
typedef struct fcml_st_disassembler_conf {
/** Set to true in order to make disassembler to increment IP address by
* length of the disassembled instruction. */
fcml_bool increment_ip;
/** True if optional error and warning messages should be collected
* during processing. */
fcml_bool enable_error_messages;
/** True if suffixes for carry flag has to be used by disassembler.*/
fcml_bool carry_flag_conditional_suffix;
/** There are two groups of suffixes for conditional instructions, you
* can choose which one should be used. */
fcml_uint8_t conditional_group;
/** Set to true in order to use short forms.
* For instance 'cmpsb' will be used instead of
* 'cmps byte ptr [si],byte ptr [di]'
*/
fcml_bool short_forms;
/** True if displacement should be sign extended to effective
* address size; otherwise false. */
fcml_bool extend_disp_to_asa;
/** If set to true assembler will return FCML_CEH_GEC_UNKNOWN_INSTRUCTION
* error code if instruction is not known.
*/
fcml_bool fail_if_unknown_instruction;
} fcml_st_disassembler_conf;
/** Disassembler context. */
typedef struct fcml_st_disassembler_context {
/** Disassembler used to decode instructions. */
fcml_st_disassembler *disassembler;
/** Disassembler configuration. */
fcml_st_disassembler_conf configuration;
/** Instruction entry point configuration. */
fcml_st_entry_point entry_point;
/** Pointer to the encoded instruction. */
fcml_ptr code;
/** Size of the code in the buffer above. */
fcml_usize code_length;
} fcml_st_disassembler_context;
/* Prefixes */
/** Available types of instruction prefixes. For more information see
* Intel/AMD Architecture Manual. */
typedef enum fcml_en_prefix_types {
FCML_PT_GROUP_UNKNOWN = 0,
FCML_PT_GROUP_1 = 1,
FCML_PT_GROUP_2,
FCML_PT_GROUP_3,
FCML_PT_GROUP_4,
FCML_PT_REX,
FCML_PT_VEX,
FCML_PT_XOP,
FCML_PT_EVEX
} fcml_en_prefix_types;
/** Describes one decoded prefix. */
typedef struct fcml_st_instruction_prefix {
/** Prefix itself as raw byte. */
fcml_uint8_t prefix;
/** Type of the prefix. */
fcml_en_prefix_types prefix_type;
/** FCML_TRUE if prefix is treated as mandatory one. */
fcml_bool mandatory_prefix;
/** Place for additional bytes of VEX/EVEX/XOP prefix.
* since 1.2.0
*/
fcml_uint8_t avx_bytes[3];
} fcml_st_instruction_prefix;
/**
* Contains some additional information about all decoded
* instruction prefixes.
*/
typedef struct fcml_st_prefixes_details {
/** Array with decoded prefixes. */
fcml_st_instruction_prefix prefixes[FCML_DASM_PREFIXES_COUNT];
/** Number of decoded prefixes. */
fcml_int prefixes_count;
/** Number of bytes used by all decoded prefixes. */
fcml_int prefixes_bytes_count;
/** FCML_TRUE if branch prefix exists. */
fcml_bool is_branch;
/** FCML_TRUE if nobranch prefix exists. */
fcml_bool is_nobranch;
/** FCML_TRUE if lock explicit prefix exists. */
fcml_bool is_lock;
/** FCML_TRUE if rep explicit prefix exists. */
fcml_bool is_rep;
/** FCML_TRUE if repne explicit prefix exists. */
fcml_bool is_repne;
/** FCML_TRUE if xrelease explicit prefix exists. */
fcml_bool is_xrelease;
/** FCML_TRUE if xacquire explicit prefix exists. */
fcml_bool is_xacquire;
/** FCML_TRUE if VEX prefix exists. */
fcml_bool is_vex;
/** FCML TRUE if EVEX prefix exists. */
fcml_bool is_evex;
/** FCML_TRUE if XOP prefix exists. */
fcml_bool is_xop;
/** True if it is an AVX instruction (VEX/XOP/EVEX). */
fcml_bool is_avx;
/** FCML_TRUE if REX prefix exists. */
fcml_bool is_rex;
/** First byte of AVX prefix. */
fcml_uint8_t avx_first_byte;
/** R field of REX,XOP or VEX prefix. */
fcml_uint8_t R;
/** EVEX R High-16 register specifier modifier. */
fcml_uint8_t R_prim;
/** X field of REX,XOP or VEX prefix. */
fcml_uint8_t X;
/** B field of REX,XOP or VEX prefix. */
fcml_uint8_t B;
/** b field of EVEX prefix. */
fcml_uint8_t b;
/** W field of REX,XOP or VEX/EVEX prefix. */
fcml_uint8_t W;
/** L field of XOP or VEX prefix. */
fcml_uint8_t L;
/** L field of EVEX prefix. */
fcml_uint8_t L_prim;
/** m-mmmm field of XOP or VEX prefix. */
fcml_uint8_t mmmm;
/** vvvv field of XOP or VEX prefix. */
fcml_uint8_t vvvv;
/** pp field of XOP or VEX/EVEX prefix. */
fcml_uint8_t pp;
/** z field of EVEX prefix */
fcml_uint8_t z;
/** V field of EVEX prefix. */
fcml_uint8_t V_prim;
/** Embedded opmask register specifier. */
fcml_uint8_t aaa;
} fcml_st_prefixes_details;
/** Some additional disassembler specific information about decoded operands. */
typedef struct fcml_st_operand_details {
/** Instruction operand access mode READ, WRITE or both. */
fcml_en_access_mode access_mode;
} fcml_st_operand_details;
/**
* Displacement in raw form.
*/
typedef struct fcml_st_raw_displacement {
/** Displacement as encoded in disp8/disp16/disp32/disp8*N. */
fcml_st_integer displacement;
/** Scaling factor N in EVEX specific compressed disp8*N. */
fcml_nuint32_t N;
} fcml_st_raw_displacement;
/** Some basic information about decoded ModR/M and SIB bytes. */
typedef struct fcml_st_decoded_modrm_details {
/** ModR/M byte if exists.*/
fcml_uint8_t modrm;
/** SIB byte if exists.*/
fcml_nuint8_t sib;
/** True if RIP encoding is used by decoded instruction. This flag is
* used only in 64 bit mode. */
fcml_bool is_rip;
/** True if ModR/M exists. */
fcml_bool is_modrm;
/** Raw displacement */
fcml_st_raw_displacement displacement;
} fcml_st_decoded_modrm_details;
/** Additional instruction details provided by disassembler. */
typedef struct fcml_st_instruction_details {
/** True if this is a shortcut.
* A good example of such instruction is 'cmpsb' as opposed to
* 'cmps byte ptr [si],byte ptr [di]'. It is very important to take this
* information into consideration when instruction models are analyzed
* because there is no operands in the GIM for shortcuts.
*/
fcml_bool is_shortcut;
/** True if given instruction is a short form of pseudo-ops instructions.
* See 'vcmpunordsd' for instance. */
fcml_bool is_pseudo_op;
/** Code of the disassembled instruction. */
fcml_uint8_t instruction_code[FCML_INSTRUCTION_SIZE];
/** Instruction size in bytes. */
fcml_usize instruction_size;
/** Some additional information about decoded instruction prefixes. */
fcml_st_prefixes_details prefixes_details;
/** All disassembler specific information about operands going there. */
fcml_st_operand_details operand_details[FCML_OPERANDS_COUNT];
/** Details about decoded ModR/M and SIB bytes. */
fcml_st_decoded_modrm_details modrm_details;
/** Opcode field 's'.
* This is set only for informational purpose only and you should not
* use it for any critical functionality.
*/
fcml_bool opcode_field_s_bit;
/** Opcode field 'w'.
* This is set only for informational purpose only and you should not
* use it for any critical functionality.
*/
fcml_bool opcode_field_w_bit;
/** Instruction code/number. @see fcml_instructions.h header file. */
fcml_en_instruction instruction;
/** Pseudo operation code. */
fcml_en_pseudo_operations pseudo_op;
/** Code of the instruction form/addressing mode of the instruction
* above. */
fcml_uint16_t addr_mode;
/** Instruction group. */
fcml_uint64_t instruction_group;
/** avx-512 tuple type */
fcml_uint8_t tuple_type;
} fcml_st_instruction_details;
/** Reusable disassembler result holder. */
typedef struct fcml_st_disassembler_result {
/** All errors and warnings messages going here. */
fcml_st_ceh_error_container errors;
/** Additional disassembler specific information about decoded
* instruction. */
fcml_st_instruction_details instruction_details;
/** Decoded instruction in its generic form.*/
fcml_st_instruction instruction;
} fcml_st_disassembler_result;
/**
* Initializes disassembler instance.
* Initializes disassembler instance for given dialect. Disassembler
* initialized in such a way is dialect dependent and generates generic
* instruction models compliant to the syntax supported by the dialect
* (Intel, AT&T). Every disassembler instance has to be freed using
* fcml_fn_disassembler_free() function as soon as it is not needed anymore.
*
* @param dialect Dialect for newly created disassembler.
* @param[out] disassembler Initialized disassembler instance.
* @return Error code or FCML_CEH_GEC_NO_ERROR.
* @see fcml_fn_disassembler_free
*/
LIB_EXPORT fcml_ceh_error LIB_CALL fcml_fn_disassembler_init(
const fcml_st_dialect *dialect, fcml_st_disassembler **disassembler);
/**
* Disassembles one instruction from provided code buffer.
* Disassembles the first instruction available in the provided code buffer
* using disassembler instance, configuration and entry point accessible through
* the disassembler context. Disassembled instruction model as well as potential
* errors are returned in reusable result holder given in the second parameter.
* Result holder has to be allocated by the user and appropriately prepared
* using fcml_fn_disassembler_result_prepare() function. As long as the
* instruction context and the result holder are not shared across multiple
* function calls disassembling process is thread safe.
*
* @param context Disassembler context.
* @param result Appropriately prepared result holder.
* @return Error code or FCML_CEH_GEC_NO_ERROR.
* @see fcml_fn_disassembler_result_free
*/
LIB_EXPORT fcml_ceh_error LIB_CALL fcml_fn_disassemble(
fcml_st_disassembler_context *context,
fcml_st_disassembler_result *result);
/**
* Prepares reusable result holder for disassembler.
* Every instance of fcml_st_disassembler_result structure is reusable from
* the disassembler's point of view, so it has to be prepared in the right
* way in order to allow disassembler to reuse it correctly. It is up to the
* library user to allocate space for the holder itself. This function is only
* responsible for cleaning the structure correctly and preparing it for first
* disassembling process. Notice that disassembler has to clean the result
* holder at the beginning so you can not pass an uninitialized memory
* block because it can even cause a crash due to illegal memory access.
*
* @param result Result holder instance to be prepared.
* @see fcml_fn_disassembler_result_free
*/
LIB_EXPORT void LIB_CALL fcml_fn_disassembler_result_prepare(
fcml_st_disassembler_result *result);
/**
* Cleans result holder.
* Frees all memory blocks allocated by the disassembler and held inside
* the result holder (Instructions, errors etc.). Notice that result holder
* itself is not freed and can be even safety reused after calling this
* function. In fact this function is also called internally by assembler in
* order to clean result holder before reusing it.
*
* @param result Result holder to clean.
*/
LIB_EXPORT void LIB_CALL fcml_fn_disassembler_result_free(
fcml_st_disassembler_result *result);
/**
* Frees disassembler instance.
* Every disassembler instance manages some resources internally and as
* such it has to be deallocated as soon as it is not needed anymore.
* @param disassembler Disassembler to be freed.
*/
LIB_EXPORT void LIB_CALL fcml_fn_disassembler_free(
fcml_st_disassembler *disassembler);
#ifdef __cplusplus
}
#endif
#endif /* FCML_DISASSEMBLER_H_ */