You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

357 lines
13 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*
* FCML - Free Code Manipulation Library.
* Copyright (C) 2010-2019 Slawomir Wojtasiak
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/** @file fcml_disassembler.h
* Structures and functions declarations related to FCML disassembler.
* @copyright Copyright (C) 2010-2015 Slawomir Wojtasiak. All rights reserved.
* This project is released under the GNU Lesser General Public License.
*/
#ifndef FCML_DISASSEMBLER_H_
#define FCML_DISASSEMBLER_H_
#include "fcml_lib_export.h"
#include "fcml_instructions.h"
#include "fcml_types.h"
#include "fcml_errors.h"
#include "fcml_common.h"
#include "fcml_dialect.h"
#ifdef __cplusplus
extern "C" {
#endif
/** Maximal number of instruction prefixes. */
#define FCML_DASM_PREFIXES_COUNT 12
/** First group of conditional suffixes (See FCML manual). */
#define FCML_DASM_CONDITIONAL_GROUP_1 0x00
/** Second group of conditional suffixes (See FCML manual). */
#define FCML_DASM_CONDITIONAL_GROUP_2 0x01
/** This structure and type declaration represents an abstract disassembler. */
typedef struct fcml_st_disassembler fcml_st_disassembler;
/** Disassembler configuration. */
typedef struct fcml_st_disassembler_conf {
/** Set to true in order to make disassembler to increment IP address by
* length of the disassembled instruction. */
fcml_bool increment_ip;
/** True if optional error and warning messages should be collected
* during processing. */
fcml_bool enable_error_messages;
/** True if suffixes for carry flag has to be used by disassembler.*/
fcml_bool carry_flag_conditional_suffix;
/** There are two groups of suffixes for conditional instructions, you
* can choose which one should be used. */
fcml_uint8_t conditional_group;
/** Set to true in order to use short forms.
* For instance 'cmpsb' will be used instead of
* 'cmps byte ptr [si],byte ptr [di]'
*/
fcml_bool short_forms;
/** True if displacement should be sign extended to effective
* address size; otherwise false. */
fcml_bool extend_disp_to_asa;
/** If set to true assembler will return FCML_CEH_GEC_UNKNOWN_INSTRUCTION
* error code if instruction is not known.
*/
fcml_bool fail_if_unknown_instruction;
} fcml_st_disassembler_conf;
/** Disassembler context. */
typedef struct fcml_st_disassembler_context {
/** Disassembler used to decode instructions. */
fcml_st_disassembler *disassembler;
/** Disassembler configuration. */
fcml_st_disassembler_conf configuration;
/** Instruction entry point configuration. */
fcml_st_entry_point entry_point;
/** Pointer to the encoded instruction. */
fcml_ptr code;
/** Size of the code in the buffer above. */
fcml_usize code_length;
} fcml_st_disassembler_context;
/* Prefixes */
/** Available types of instruction prefixes. For more information see
* Intel/AMD Architecture Manual. */
typedef enum fcml_en_prefix_types {
FCML_PT_GROUP_UNKNOWN = 0,
FCML_PT_GROUP_1 = 1,
FCML_PT_GROUP_2,
FCML_PT_GROUP_3,
FCML_PT_GROUP_4,
FCML_PT_REX,
FCML_PT_VEX,
FCML_PT_XOP,
FCML_PT_EVEX
} fcml_en_prefix_types;
/** Describes one decoded prefix. */
typedef struct fcml_st_instruction_prefix {
/** Prefix itself as raw byte. */
fcml_uint8_t prefix;
/** Type of the prefix. */
fcml_en_prefix_types prefix_type;
/** FCML_TRUE if prefix is treated as mandatory one. */
fcml_bool mandatory_prefix;
/** Place for additional bytes of VEX/EVEX/XOP prefix.
* since 1.2.0
*/
fcml_uint8_t avx_bytes[3];
} fcml_st_instruction_prefix;
/**
* Contains some additional information about all decoded
* instruction prefixes.
*/
typedef struct fcml_st_prefixes_details {
/** Array with decoded prefixes. */
fcml_st_instruction_prefix prefixes[FCML_DASM_PREFIXES_COUNT];
/** Number of decoded prefixes. */
fcml_int prefixes_count;
/** Number of bytes used by all decoded prefixes. */
fcml_int prefixes_bytes_count;
/** FCML_TRUE if branch prefix exists. */
fcml_bool is_branch;
/** FCML_TRUE if nobranch prefix exists. */
fcml_bool is_nobranch;
/** FCML_TRUE if lock explicit prefix exists. */
fcml_bool is_lock;
/** FCML_TRUE if rep explicit prefix exists. */
fcml_bool is_rep;
/** FCML_TRUE if repne explicit prefix exists. */
fcml_bool is_repne;
/** FCML_TRUE if xrelease explicit prefix exists. */
fcml_bool is_xrelease;
/** FCML_TRUE if xacquire explicit prefix exists. */
fcml_bool is_xacquire;
/** FCML_TRUE if VEX prefix exists. */
fcml_bool is_vex;
/** FCML TRUE if EVEX prefix exists. */
fcml_bool is_evex;
/** FCML_TRUE if XOP prefix exists. */
fcml_bool is_xop;
/** True if it is an AVX instruction (VEX/XOP/EVEX). */
fcml_bool is_avx;
/** FCML_TRUE if REX prefix exists. */
fcml_bool is_rex;
/** First byte of AVX prefix. */
fcml_uint8_t avx_first_byte;
/** R field of REX,XOP or VEX prefix. */
fcml_uint8_t R;
/** EVEX R High-16 register specifier modifier. */
fcml_uint8_t R_prim;
/** X field of REX,XOP or VEX prefix. */
fcml_uint8_t X;
/** B field of REX,XOP or VEX prefix. */
fcml_uint8_t B;
/** b field of EVEX prefix. */
fcml_uint8_t b;
/** W field of REX,XOP or VEX/EVEX prefix. */
fcml_uint8_t W;
/** L field of XOP or VEX prefix. */
fcml_uint8_t L;
/** L field of EVEX prefix. */
fcml_uint8_t L_prim;
/** m-mmmm field of XOP or VEX prefix. */
fcml_uint8_t mmmm;
/** vvvv field of XOP or VEX prefix. */
fcml_uint8_t vvvv;
/** pp field of XOP or VEX/EVEX prefix. */
fcml_uint8_t pp;
/** z field of EVEX prefix */
fcml_uint8_t z;
/** V field of EVEX prefix. */
fcml_uint8_t V_prim;
/** Embedded opmask register specifier. */
fcml_uint8_t aaa;
} fcml_st_prefixes_details;
/** Some additional disassembler specific information about decoded operands. */
typedef struct fcml_st_operand_details {
/** Instruction operand access mode READ, WRITE or both. */
fcml_en_access_mode access_mode;
} fcml_st_operand_details;
/**
* Displacement in raw form.
*/
typedef struct fcml_st_raw_displacement {
/** Displacement as encoded in disp8/disp16/disp32/disp8*N. */
fcml_st_integer displacement;
/** Scaling factor N in EVEX specific compressed disp8*N. */
fcml_nuint32_t N;
} fcml_st_raw_displacement;
/** Some basic information about decoded ModR/M and SIB bytes. */
typedef struct fcml_st_decoded_modrm_details {
/** ModR/M byte if exists.*/
fcml_uint8_t modrm;
/** SIB byte if exists.*/
fcml_nuint8_t sib;
/** True if RIP encoding is used by decoded instruction. This flag is
* used only in 64 bit mode. */
fcml_bool is_rip;
/** True if ModR/M exists. */
fcml_bool is_modrm;
/** Raw displacement */
fcml_st_raw_displacement displacement;
} fcml_st_decoded_modrm_details;
/** Additional instruction details provided by disassembler. */
typedef struct fcml_st_instruction_details {
/** True if this is a shortcut.
* A good example of such instruction is 'cmpsb' as opposed to
* 'cmps byte ptr [si],byte ptr [di]'. It is very important to take this
* information into consideration when instruction models are analyzed
* because there is no operands in the GIM for shortcuts.
*/
fcml_bool is_shortcut;
/** True if given instruction is a short form of pseudo-ops instructions.
* See 'vcmpunordsd' for instance. */
fcml_bool is_pseudo_op;
/** Code of the disassembled instruction. */
fcml_uint8_t instruction_code[FCML_INSTRUCTION_SIZE];
/** Instruction size in bytes. */
fcml_usize instruction_size;
/** Some additional information about decoded instruction prefixes. */
fcml_st_prefixes_details prefixes_details;
/** All disassembler specific information about operands going there. */
fcml_st_operand_details operand_details[FCML_OPERANDS_COUNT];
/** Details about decoded ModR/M and SIB bytes. */
fcml_st_decoded_modrm_details modrm_details;
/** Opcode field 's'.
* This is set only for informational purpose only and you should not
* use it for any critical functionality.
*/
fcml_bool opcode_field_s_bit;
/** Opcode field 'w'.
* This is set only for informational purpose only and you should not
* use it for any critical functionality.
*/
fcml_bool opcode_field_w_bit;
/** Instruction code/number. @see fcml_instructions.h header file. */
fcml_en_instruction instruction;
/** Pseudo operation code. */
fcml_en_pseudo_operations pseudo_op;
/** Code of the instruction form/addressing mode of the instruction
* above. */
fcml_uint16_t addr_mode;
/** Instruction group. */
fcml_uint64_t instruction_group;
/** avx-512 tuple type */
fcml_uint8_t tuple_type;
} fcml_st_instruction_details;
/** Reusable disassembler result holder. */
typedef struct fcml_st_disassembler_result {
/** All errors and warnings messages going here. */
fcml_st_ceh_error_container errors;
/** Additional disassembler specific information about decoded
* instruction. */
fcml_st_instruction_details instruction_details;
/** Decoded instruction in its generic form.*/
fcml_st_instruction instruction;
} fcml_st_disassembler_result;
/**
* Initializes disassembler instance.
* Initializes disassembler instance for given dialect. Disassembler
* initialized in such a way is dialect dependent and generates generic
* instruction models compliant to the syntax supported by the dialect
* (Intel, AT&T). Every disassembler instance has to be freed using
* fcml_fn_disassembler_free() function as soon as it is not needed anymore.
*
* @param dialect Dialect for newly created disassembler.
* @param[out] disassembler Initialized disassembler instance.
* @return Error code or FCML_CEH_GEC_NO_ERROR.
* @see fcml_fn_disassembler_free
*/
LIB_EXPORT fcml_ceh_error LIB_CALL fcml_fn_disassembler_init(
const fcml_st_dialect *dialect, fcml_st_disassembler **disassembler);
/**
* Disassembles one instruction from provided code buffer.
* Disassembles the first instruction available in the provided code buffer
* using disassembler instance, configuration and entry point accessible through
* the disassembler context. Disassembled instruction model as well as potential
* errors are returned in reusable result holder given in the second parameter.
* Result holder has to be allocated by the user and appropriately prepared
* using fcml_fn_disassembler_result_prepare() function. As long as the
* instruction context and the result holder are not shared across multiple
* function calls disassembling process is thread safe.
*
* @param context Disassembler context.
* @param result Appropriately prepared result holder.
* @return Error code or FCML_CEH_GEC_NO_ERROR.
* @see fcml_fn_disassembler_result_free
*/
LIB_EXPORT fcml_ceh_error LIB_CALL fcml_fn_disassemble(
fcml_st_disassembler_context *context,
fcml_st_disassembler_result *result);
/**
* Prepares reusable result holder for disassembler.
* Every instance of fcml_st_disassembler_result structure is reusable from
* the disassembler's point of view, so it has to be prepared in the right
* way in order to allow disassembler to reuse it correctly. It is up to the
* library user to allocate space for the holder itself. This function is only
* responsible for cleaning the structure correctly and preparing it for first
* disassembling process. Notice that disassembler has to clean the result
* holder at the beginning so you can not pass an uninitialized memory
* block because it can even cause a crash due to illegal memory access.
*
* @param result Result holder instance to be prepared.
* @see fcml_fn_disassembler_result_free
*/
LIB_EXPORT void LIB_CALL fcml_fn_disassembler_result_prepare(
fcml_st_disassembler_result *result);
/**
* Cleans result holder.
* Frees all memory blocks allocated by the disassembler and held inside
* the result holder (Instructions, errors etc.). Notice that result holder
* itself is not freed and can be even safety reused after calling this
* function. In fact this function is also called internally by assembler in
* order to clean result holder before reusing it.
*
* @param result Result holder to clean.
*/
LIB_EXPORT void LIB_CALL fcml_fn_disassembler_result_free(
fcml_st_disassembler_result *result);
/**
* Frees disassembler instance.
* Every disassembler instance manages some resources internally and as
* such it has to be deallocated as soon as it is not needed anymore.
* @param disassembler Disassembler to be freed.
*/
LIB_EXPORT void LIB_CALL fcml_fn_disassembler_free(
fcml_st_disassembler *disassembler);
#ifdef __cplusplus
}
#endif
#endif /* FCML_DISASSEMBLER_H_ */