finished decomp of routines... its time to start on comp...

3.0
_xeroxz 2 years ago
parent 79db6d538c
commit 116eff8a32

@ -21,7 +21,7 @@ class symbol_table_t {
void update(std::size_t hash, decomp::symbol_t& sym);
void update(std::size_t hash, std::uintptr_t allocated_at);
void for_each(std::function<bool(decomp::symbol_t& sym)> fn);
void for_each(std::function<void(decomp::symbol_t& sym)> fn);
std::uint32_t size();
private:

@ -12,18 +12,28 @@ extern "C" {
#include <xed-interface.h>
}
#define INSTR_SPLIT_SECTION_NAME ".obf"
namespace theo::decomp {
enum decomp_type_t { none, instr_split };
class routine_t {
public:
explicit routine_t(coff::section_header_t* scn,
std::vector<std::uint8_t>& fn);
explicit routine_t(coff::symbol_t* sym,
coff::image_t* img,
coff::section_header_t* scn,
std::vector<std::uint8_t>& fn,
decomp_type_t dcmp_type);
std::vector<decomp::symbol_t> decompose();
coff::section_header_t* scn();
std::vector<std::uint8_t> data();
private:
coff::symbol_t* m_sym;
std::vector<std::uint8_t> m_data;
coff::image_t* m_img;
coff::section_header_t* m_scn;
decomp_type_t m_dcmp_type;
};
} // namespace theo::decomp

@ -11,7 +11,7 @@ class symbol_t {
explicit symbol_t(std::string name,
std::uintptr_t offset,
std::vector<std::uint8_t> data,
coff::section_header_t scn_hdr,
coff::section_header_t* scn,
std::vector<comp::reloc_t> relocs);
std::string name() const;
@ -29,7 +29,7 @@ class symbol_t {
std::string m_name;
std::uintptr_t m_offset, m_allocated_at;
std::vector<std::uint8_t> m_data;
coff::section_header_t m_scn_hdr;
coff::section_header_t* m_scn;
std::vector<comp::reloc_t> m_relocs;
};
} // namespace theo::decomp

@ -3,6 +3,6 @@ extern "C" int MessageBox(void* hWnd,
char* lpCaption,
void* uType);
void EntryPoint() {
__declspec(code_seg(".obf")) void EntryPoint() {
MessageBox(nullptr, "Hello World", "Hello World", nullptr);
}

@ -26,7 +26,7 @@ void symbol_table_t::update(std::size_t hash, std::uintptr_t allocated_at) {
m_table.insert({hash, v});
}
void symbol_table_t::for_each(std::function<bool(decomp::symbol_t& sym)> fn) {
void symbol_table_t::for_each(std::function<void(decomp::symbol_t& sym)> fn) {
std::for_each(m_table.begin(), m_table.end(), [&](auto v) { fn(v.second); });
}

@ -23,9 +23,16 @@ std::optional<comp::symbol_table_t*> decomp_t::decompose() {
for (auto idx = 0u; idx < syms_cnt; ++idx) {
auto sym = img->get_symbol(idx);
// if the symbol is a function then we are going to decompose it...
// data symbols are handled after this...
//
if (sym->has_section() &&
sym->derived_type == coff::derived_type_id::function) {
auto scn = img->get_section(sym->section_index - 1);
auto dcmp_type =
scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME
? decomp::decomp_type_t::instr_split
: decomp::decomp_type_t::none;
auto fn_size = scn->size_raw_data;
auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img);
@ -33,7 +40,7 @@ std::optional<comp::symbol_table_t*> decomp_t::decompose() {
sym->name.to_string(img->get_strings()), fn_size);
std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
decomp::routine_t rtn(scn, fn);
decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
auto syms = rtn.decompose();
spdlog::info("decomposed routine into {} symbols...", syms.size());

@ -1,33 +1,118 @@
#include <decomp/routine.hpp>
namespace theo::decomp {
routine_t::routine_t(coff::section_header_t* scn, std::vector<std::uint8_t>& fn)
: m_scn(scn), m_data(fn) {}
routine_t::routine_t(coff::symbol_t* sym,
coff::image_t* img,
coff::section_header_t* scn,
std::vector<std::uint8_t>& fn,
decomp_type_t dcmp_type)
: m_img(img), m_scn(scn), m_data(fn), m_dcmp_type(dcmp_type), m_sym(sym) {}
std::vector<decomp::symbol_t> routine_t::decompose() {
std::uint32_t offset = 0u;
xed_error_enum_t err;
xed_decoded_inst_t instr;
std::vector<xed_decoded_inst_t> instrs;
xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b};
xed_decoded_inst_zero_set_mode(&instr, &istate);
// keep looping over the section, lower the number of bytes each time...
//
while ((err = xed_decode(&instr, m_data.data() + offset,
m_data.size() - offset)) == XED_ERROR_NONE) {
char buff[255];
offset += xed_decoded_inst_get_length(&instr);
xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, 0, 0, 0);
spdlog::info("{}", buff);
instrs.push_back(instr);
// need to set this so that instr can be used to decode again...
xed_decoded_inst_zero_set_mode(&instr, &istate);
std::vector<decomp::symbol_t> result;
switch (m_dcmp_type) {
case none: {
std::vector<comp::reloc_t> relocs;
auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) {
auto scn_reloc = &scn_relocs[idx];
auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index);
auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
spdlog::info("{} reloc to: {} at offset: {}",
m_sym->name.to_string(m_img->get_strings()), sym_name,
scn_reloc->virtual_address);
relocs.push_back(comp::reloc_t(scn_reloc->virtual_address, sym_hash));
}
result.push_back(
decomp::symbol_t(m_sym->name.to_string(m_img->get_strings()).data(),
m_sym->value, m_data, m_scn, relocs));
break;
}
case instr_split: {
std::uint32_t offset = 0u;
xed_error_enum_t err;
xed_decoded_inst_t instr;
xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b};
xed_decoded_inst_zero_set_mode(&instr, &istate);
// keep looping over the section, lower the number of bytes each time...
//
while ((err = xed_decode(&instr, m_data.data() + offset,
m_data.size() - offset)) == XED_ERROR_NONE) {
// symbol name is of the format: symbol@instroffset, I.E: main@11...
//
auto new_sym_name =
std::string(m_sym->name.to_string(m_img->get_strings()));
// first instruction doesnt need the @offset...
//
if (offset)
new_sym_name.append("@").append(std::to_string(offset));
std::vector<comp::reloc_t> relocs;
auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
// find if this instruction has a relocation or not...
// if so, return the reloc_t...
//
auto reloc = std::find_if(
scn_relocs, scn_relocs + m_scn->num_relocs,
[&](coff::reloc_t reloc) {
return reloc.virtual_address >= offset &&
reloc.virtual_address <
offset + xed_decoded_inst_get_length(&instr);
});
// if there is indeed a reloc for this instruction...
//
if (reloc != scn_relocs + m_scn->num_relocs) {
auto sym_reloc = m_img->get_symbol(reloc->symbol_index);
auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
auto reloc_offset = reloc->virtual_address - offset;
spdlog::info("{} reloc to: {} at offset: {}", new_sym_name, sym_name,
reloc_offset);
relocs.push_back(comp::reloc_t(reloc_offset, sym_hash));
}
std::vector<std::uint8_t> inst_bytes(
m_data.data() + offset,
m_data.data() + offset + xed_decoded_inst_get_length(&instr));
result.push_back(
decomp::symbol_t(new_sym_name, offset, inst_bytes, m_scn, relocs));
// after creating the symbol and dealing with relocs then print the
// information we have concluded...
//
char buff[255];
offset += xed_decoded_inst_get_length(&instr);
xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, NULL,
NULL, NULL);
spdlog::info("{}: {}", new_sym_name, buff);
// need to set this so that instr can be used to decode again...
xed_decoded_inst_zero_set_mode(&instr, &istate);
}
break;
}
default:
break;
}
return {};
return result;
}
coff::section_header_t* routine_t::scn() {

@ -4,12 +4,12 @@ namespace theo::decomp {
symbol_t::symbol_t(std::string name,
std::uintptr_t offset,
std::vector<std::uint8_t> data,
coff::section_header_t scn_hdr,
coff::section_header_t* scn,
std::vector<comp::reloc_t> relocs)
: m_name(name),
m_offset(offset),
m_data(data),
m_scn_hdr(scn_hdr),
m_scn(scn),
m_relocs(relocs),
m_allocated_at(0) {}
@ -41,8 +41,7 @@ std::size_t symbol_t::hash() {
return hash(m_name);
}
std::size_t symbol_t::hash(const std::string& sym)
{
std::size_t symbol_t::hash(const std::string& sym) {
return std::hash<std::string>{}(sym);
}
} // namespace theo::decomp

@ -17,6 +17,9 @@ std::optional<std::uint32_t> theo_t::decompose() {
}
spdlog::info("decompose successful... {} symbols", res.value()->size());
res.value()->for_each([&](decomp::symbol_t& sym) {
spdlog::info("hash: {:X}, name: {}", sym.hash(), sym.name());
});
return res.value()->size();
}
} // namespace theo
Loading…
Cancel
Save