From dca70322fe36bf71bb55248913c185730038faa3 Mon Sep 17 00:00:00 2001 From: _xeroxz Date: Sun, 29 May 2022 16:20:07 -0700 Subject: [PATCH] removed the code that splits functions up. this code should be a pass, not part of the framework itself. move this code. :) --- include/decomp/routine.hpp | 8 +- src/decomp/decomp.cpp | 13 ++-- src/decomp/routine.cpp | 152 +++++++------------------------------ 3 files changed, 34 insertions(+), 139 deletions(-) diff --git a/include/decomp/routine.hpp b/include/decomp/routine.hpp index 068cdd4..6ac8124 100644 --- a/include/decomp/routine.hpp +++ b/include/decomp/routine.hpp @@ -66,14 +66,13 @@ class routine_t { explicit routine_t(coff::symbol_t* sym, coff::image_t* img, coff::section_header_t* scn, - std::vector& fn, - sym_type_t dcmp_type); + std::vector& fn); /// /// decompose the function into symbol(s). /// - /// symbol(s) of the function. - std::vector decompose(); + /// symbol of the function. + decomp::symbol_t decompose(); /// /// gets the section header of the section in which the symbol is located in. @@ -93,6 +92,5 @@ class routine_t { std::vector m_data; coff::image_t* m_img; coff::section_header_t* m_scn; - sym_type_t m_dcmp_type; }; } // namespace theo::decomp \ No newline at end of file diff --git a/src/decomp/decomp.cpp b/src/decomp/decomp.cpp index 46995e2..d7fa17c 100644 --- a/src/decomp/decomp.cpp +++ b/src/decomp/decomp.cpp @@ -112,20 +112,17 @@ std::optional decomp_t::decompose( if (sym->has_section()) { if (sym->derived_type == coff::derived_type_id::function) { auto scn = img->get_section(sym->section_index - 1); - auto dcmp_type = - scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME - ? decomp::sym_type_t::instruction - : decomp::sym_type_t::function; - auto fn_size = next_sym(img, scn, sym); auto fn_bgn = scn->ptr_raw_data + reinterpret_cast(img) + sym->value; + // extract the bytes the function is composed of... + // std::vector fn(fn_bgn, fn_bgn + fn_size); - decomp::routine_t rtn(sym, img, scn, fn, dcmp_type); + decomp::routine_t rtn(sym, img, scn, fn); - auto syms = rtn.decompose(); - m_syms->put_symbols(syms); + auto fsym = rtn.decompose(); + m_syms->put_symbol(fsym); // else the symbol isnt a function and its public or private (some data // symbols are private)... } else if (sym->storage_class == coff::storage_class_id::public_symbol || diff --git a/src/decomp/routine.cpp b/src/decomp/routine.cpp index 55cdf6f..a8bd9c7 100644 --- a/src/decomp/routine.cpp +++ b/src/decomp/routine.cpp @@ -34,135 +34,35 @@ namespace theo::decomp { routine_t::routine_t(coff::symbol_t* sym, coff::image_t* img, coff::section_header_t* scn, - std::vector& fn, - sym_type_t dcmp_type) - : m_img(img), m_scn(scn), m_data(fn), m_dcmp_type(dcmp_type), m_sym(sym) {} - -std::vector routine_t::decompose() { - std::vector result; - - switch (m_dcmp_type) { - case function: { - std::vector relocs; - auto scn_relocs = reinterpret_cast( - m_scn->ptr_relocs + reinterpret_cast(m_img)); - - for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) { - auto scn_reloc = &scn_relocs[idx]; - // if the reloc is in the current function... - if (scn_reloc->virtual_address >= m_sym->value && - scn_reloc->virtual_address < m_sym->value + m_data.size()) { - auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index); - auto sym_name = symbol_t::name(m_img, sym_reloc); - auto sym_hash = decomp::symbol_t::hash(sym_name.data()); - relocs.push_back( - recomp::reloc_t(scn_reloc->virtual_address - m_sym->value, - sym_hash, sym_name.data())); - } - } - - result.push_back(decomp::symbol_t( - m_img, symbol_t::name(m_img, m_sym).data(), m_sym->value, m_data, - m_scn, m_sym, relocs, sym_type_t::function)); - break; - } - case instruction: { - std::uint32_t offset = {}; - xed_error_enum_t err; - - xed_decoded_inst_t instr; - xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b}; - xed_decoded_inst_zero_set_mode(&instr, &istate); - - // keep looping over the section, lower the number of bytes each time... - // - while ((err = xed_decode(&instr, m_data.data() + offset, - m_data.size() - offset)) == XED_ERROR_NONE) { - // symbol name is of the format: symbol@instroffset, I.E: main@11... - // - auto new_sym_name = symbol_t::name(m_img, m_sym); - - // first instruction doesnt need the @offset... - // - if (offset) - new_sym_name.append("@").append(std::to_string(offset)); - - std::vector relocs; - auto scn_relocs = reinterpret_cast( - m_scn->ptr_relocs + reinterpret_cast(m_img)); - - // find if this instruction has a relocation or not... - // if so, return the reloc_t... - // - auto reloc = std::find_if( - scn_relocs, scn_relocs + m_scn->num_relocs, - [&](coff::reloc_t reloc) { - return reloc.virtual_address >= m_sym->value + offset && - reloc.virtual_address < - m_sym->value + offset + - xed_decoded_inst_get_length(&instr); - }); - - // if there is indeed a reloc for this instruction... - // - if (reloc != scn_relocs + m_scn->num_relocs) { - auto sym_reloc = m_img->get_symbol(reloc->symbol_index); - auto sym_name = symbol_t::name(m_img, sym_reloc); - auto sym_hash = decomp::symbol_t::hash(sym_name.data()); - auto reloc_offset = reloc->virtual_address - m_sym->value - offset; - - relocs.push_back( - recomp::reloc_t(reloc_offset, sym_hash, sym_name.data())); - } - - // add a reloc to the next instruction... - // note that the offset is ZERO... comp_t will understand that - // relocs with offset ZERO means the next instructions... - // - auto next_inst_sym = - symbol_t::name(m_img, m_sym) - .append("@") - .append(std::to_string(offset + - xed_decoded_inst_get_length(&instr))); - - relocs.push_back(recomp::reloc_t( - 0, decomp::symbol_t::hash(next_inst_sym), next_inst_sym.data())); - - // get the instructions bytes - // - std::vector inst_bytes( - m_data.data() + offset, - m_data.data() + offset + xed_decoded_inst_get_length(&instr)); - - result.push_back(decomp::symbol_t(m_img, new_sym_name, offset, - inst_bytes, m_scn, m_sym, relocs, - sym_type_t::instruction)); - - // after creating the symbol and dealing with relocs then print the - // information we have concluded... - // - char buff[255]; - offset += xed_decoded_inst_get_length(&instr); - xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, NULL, - NULL, NULL); - - spdlog::info("{}: {}", new_sym_name, buff); - // need to set this so that instr can be used to decode again... - xed_decoded_inst_zero_set_mode(&instr, &istate); - } - - // remove the relocation to the next symbol from the last instruction - // - auto& last_inst = result.back(); - auto& last_inst_relocs = last_inst.relocs(); - last_inst_relocs.erase(last_inst_relocs.end() - 1); - break; + std::vector& fn) + : m_img(img), m_scn(scn), m_data(fn), m_sym(sym) {} + +decomp::symbol_t routine_t::decompose() { + std::vector relocs; + auto scn_relocs = reinterpret_cast( + m_scn->ptr_relocs + reinterpret_cast(m_img)); + + // extract all of the relocations that this function has... + // + for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) { + auto scn_reloc = &scn_relocs[idx]; + // if the reloc is in the current function... + if (scn_reloc->virtual_address >= m_sym->value && + scn_reloc->virtual_address < m_sym->value + m_data.size()) { + auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index); + auto sym_name = symbol_t::name(m_img, sym_reloc); + auto sym_hash = decomp::symbol_t::hash(sym_name.data()); + relocs.push_back( + recomp::reloc_t(scn_reloc->virtual_address - m_sym->value, sym_hash, + sym_name.data())); } - default: - break; } - return result; + // return the created symbol_t for this function... + // + return decomp::symbol_t(m_img, symbol_t::name(m_img, m_sym).data(), + m_sym->value, m_data, m_scn, m_sym, relocs, + sym_type_t::function); } coff::section_header_t* routine_t::scn() {