finished decomp of routines... its time to start on comp...

3.0
_xeroxz 2 years ago
parent 79db6d538c
commit 116eff8a32

@ -21,7 +21,7 @@ class symbol_table_t {
void update(std::size_t hash, decomp::symbol_t& sym); void update(std::size_t hash, decomp::symbol_t& sym);
void update(std::size_t hash, std::uintptr_t allocated_at); void update(std::size_t hash, std::uintptr_t allocated_at);
void for_each(std::function<bool(decomp::symbol_t& sym)> fn); void for_each(std::function<void(decomp::symbol_t& sym)> fn);
std::uint32_t size(); std::uint32_t size();
private: private:

@ -12,18 +12,28 @@ extern "C" {
#include <xed-interface.h> #include <xed-interface.h>
} }
#define INSTR_SPLIT_SECTION_NAME ".obf"
namespace theo::decomp { namespace theo::decomp {
enum decomp_type_t { none, instr_split };
class routine_t { class routine_t {
public: public:
explicit routine_t(coff::section_header_t* scn, explicit routine_t(coff::symbol_t* sym,
std::vector<std::uint8_t>& fn); coff::image_t* img,
coff::section_header_t* scn,
std::vector<std::uint8_t>& fn,
decomp_type_t dcmp_type);
std::vector<decomp::symbol_t> decompose(); std::vector<decomp::symbol_t> decompose();
coff::section_header_t* scn(); coff::section_header_t* scn();
std::vector<std::uint8_t> data(); std::vector<std::uint8_t> data();
private: private:
coff::symbol_t* m_sym;
std::vector<std::uint8_t> m_data; std::vector<std::uint8_t> m_data;
coff::image_t* m_img;
coff::section_header_t* m_scn; coff::section_header_t* m_scn;
decomp_type_t m_dcmp_type;
}; };
} // namespace theo::decomp } // namespace theo::decomp

@ -11,7 +11,7 @@ class symbol_t {
explicit symbol_t(std::string name, explicit symbol_t(std::string name,
std::uintptr_t offset, std::uintptr_t offset,
std::vector<std::uint8_t> data, std::vector<std::uint8_t> data,
coff::section_header_t scn_hdr, coff::section_header_t* scn,
std::vector<comp::reloc_t> relocs); std::vector<comp::reloc_t> relocs);
std::string name() const; std::string name() const;
@ -29,7 +29,7 @@ class symbol_t {
std::string m_name; std::string m_name;
std::uintptr_t m_offset, m_allocated_at; std::uintptr_t m_offset, m_allocated_at;
std::vector<std::uint8_t> m_data; std::vector<std::uint8_t> m_data;
coff::section_header_t m_scn_hdr; coff::section_header_t* m_scn;
std::vector<comp::reloc_t> m_relocs; std::vector<comp::reloc_t> m_relocs;
}; };
} // namespace theo::decomp } // namespace theo::decomp

@ -3,6 +3,6 @@ extern "C" int MessageBox(void* hWnd,
char* lpCaption, char* lpCaption,
void* uType); void* uType);
void EntryPoint() { __declspec(code_seg(".obf")) void EntryPoint() {
MessageBox(nullptr, "Hello World", "Hello World", nullptr); MessageBox(nullptr, "Hello World", "Hello World", nullptr);
} }

@ -26,7 +26,7 @@ void symbol_table_t::update(std::size_t hash, std::uintptr_t allocated_at) {
m_table.insert({hash, v}); m_table.insert({hash, v});
} }
void symbol_table_t::for_each(std::function<bool(decomp::symbol_t& sym)> fn) { void symbol_table_t::for_each(std::function<void(decomp::symbol_t& sym)> fn) {
std::for_each(m_table.begin(), m_table.end(), [&](auto v) { fn(v.second); }); std::for_each(m_table.begin(), m_table.end(), [&](auto v) { fn(v.second); });
} }

@ -23,9 +23,16 @@ std::optional<comp::symbol_table_t*> decomp_t::decompose() {
for (auto idx = 0u; idx < syms_cnt; ++idx) { for (auto idx = 0u; idx < syms_cnt; ++idx) {
auto sym = img->get_symbol(idx); auto sym = img->get_symbol(idx);
// if the symbol is a function then we are going to decompose it...
// data symbols are handled after this...
//
if (sym->has_section() && if (sym->has_section() &&
sym->derived_type == coff::derived_type_id::function) { sym->derived_type == coff::derived_type_id::function) {
auto scn = img->get_section(sym->section_index - 1); auto scn = img->get_section(sym->section_index - 1);
auto dcmp_type =
scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME
? decomp::decomp_type_t::instr_split
: decomp::decomp_type_t::none;
auto fn_size = scn->size_raw_data; auto fn_size = scn->size_raw_data;
auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img); auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img);
@ -33,7 +40,7 @@ std::optional<comp::symbol_table_t*> decomp_t::decompose() {
sym->name.to_string(img->get_strings()), fn_size); sym->name.to_string(img->get_strings()), fn_size);
std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size); std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
decomp::routine_t rtn(scn, fn); decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
auto syms = rtn.decompose(); auto syms = rtn.decompose();
spdlog::info("decomposed routine into {} symbols...", syms.size()); spdlog::info("decomposed routine into {} symbols...", syms.size());

@ -1,15 +1,45 @@
#include <decomp/routine.hpp> #include <decomp/routine.hpp>
namespace theo::decomp { namespace theo::decomp {
routine_t::routine_t(coff::section_header_t* scn, std::vector<std::uint8_t>& fn) routine_t::routine_t(coff::symbol_t* sym,
: m_scn(scn), m_data(fn) {} coff::image_t* img,
coff::section_header_t* scn,
std::vector<std::uint8_t>& fn,
decomp_type_t dcmp_type)
: m_img(img), m_scn(scn), m_data(fn), m_dcmp_type(dcmp_type), m_sym(sym) {}
std::vector<decomp::symbol_t> routine_t::decompose() { std::vector<decomp::symbol_t> routine_t::decompose() {
std::vector<decomp::symbol_t> result;
switch (m_dcmp_type) {
case none: {
std::vector<comp::reloc_t> relocs;
auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) {
auto scn_reloc = &scn_relocs[idx];
auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index);
auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
spdlog::info("{} reloc to: {} at offset: {}",
m_sym->name.to_string(m_img->get_strings()), sym_name,
scn_reloc->virtual_address);
relocs.push_back(comp::reloc_t(scn_reloc->virtual_address, sym_hash));
}
result.push_back(
decomp::symbol_t(m_sym->name.to_string(m_img->get_strings()).data(),
m_sym->value, m_data, m_scn, relocs));
break;
}
case instr_split: {
std::uint32_t offset = 0u; std::uint32_t offset = 0u;
xed_error_enum_t err; xed_error_enum_t err;
xed_decoded_inst_t instr; xed_decoded_inst_t instr;
std::vector<xed_decoded_inst_t> instrs;
xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b}; xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b};
xed_decoded_inst_zero_set_mode(&instr, &istate); xed_decoded_inst_zero_set_mode(&instr, &istate);
@ -17,17 +47,72 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
// //
while ((err = xed_decode(&instr, m_data.data() + offset, while ((err = xed_decode(&instr, m_data.data() + offset,
m_data.size() - offset)) == XED_ERROR_NONE) { m_data.size() - offset)) == XED_ERROR_NONE) {
// symbol name is of the format: symbol@instroffset, I.E: main@11...
//
auto new_sym_name =
std::string(m_sym->name.to_string(m_img->get_strings()));
// first instruction doesnt need the @offset...
//
if (offset)
new_sym_name.append("@").append(std::to_string(offset));
std::vector<comp::reloc_t> relocs;
auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
// find if this instruction has a relocation or not...
// if so, return the reloc_t...
//
auto reloc = std::find_if(
scn_relocs, scn_relocs + m_scn->num_relocs,
[&](coff::reloc_t reloc) {
return reloc.virtual_address >= offset &&
reloc.virtual_address <
offset + xed_decoded_inst_get_length(&instr);
});
// if there is indeed a reloc for this instruction...
//
if (reloc != scn_relocs + m_scn->num_relocs) {
auto sym_reloc = m_img->get_symbol(reloc->symbol_index);
auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
auto reloc_offset = reloc->virtual_address - offset;
spdlog::info("{} reloc to: {} at offset: {}", new_sym_name, sym_name,
reloc_offset);
relocs.push_back(comp::reloc_t(reloc_offset, sym_hash));
}
std::vector<std::uint8_t> inst_bytes(
m_data.data() + offset,
m_data.data() + offset + xed_decoded_inst_get_length(&instr));
result.push_back(
decomp::symbol_t(new_sym_name, offset, inst_bytes, m_scn, relocs));
// after creating the symbol and dealing with relocs then print the
// information we have concluded...
//
char buff[255]; char buff[255];
offset += xed_decoded_inst_get_length(&instr); offset += xed_decoded_inst_get_length(&instr);
xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, 0, 0, 0); xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, NULL,
spdlog::info("{}", buff); NULL, NULL);
instrs.push_back(instr);
spdlog::info("{}: {}", new_sym_name, buff);
// need to set this so that instr can be used to decode again... // need to set this so that instr can be used to decode again...
xed_decoded_inst_zero_set_mode(&instr, &istate); xed_decoded_inst_zero_set_mode(&instr, &istate);
} }
return {}; break;
}
default:
break;
}
return result;
} }
coff::section_header_t* routine_t::scn() { coff::section_header_t* routine_t::scn() {

@ -4,12 +4,12 @@ namespace theo::decomp {
symbol_t::symbol_t(std::string name, symbol_t::symbol_t(std::string name,
std::uintptr_t offset, std::uintptr_t offset,
std::vector<std::uint8_t> data, std::vector<std::uint8_t> data,
coff::section_header_t scn_hdr, coff::section_header_t* scn,
std::vector<comp::reloc_t> relocs) std::vector<comp::reloc_t> relocs)
: m_name(name), : m_name(name),
m_offset(offset), m_offset(offset),
m_data(data), m_data(data),
m_scn_hdr(scn_hdr), m_scn(scn),
m_relocs(relocs), m_relocs(relocs),
m_allocated_at(0) {} m_allocated_at(0) {}
@ -41,8 +41,7 @@ std::size_t symbol_t::hash() {
return hash(m_name); return hash(m_name);
} }
std::size_t symbol_t::hash(const std::string& sym) std::size_t symbol_t::hash(const std::string& sym) {
{
return std::hash<std::string>{}(sym); return std::hash<std::string>{}(sym);
} }
} // namespace theo::decomp } // namespace theo::decomp

@ -17,6 +17,9 @@ std::optional<std::uint32_t> theo_t::decompose() {
} }
spdlog::info("decompose successful... {} symbols", res.value()->size()); spdlog::info("decompose successful... {} symbols", res.value()->size());
res.value()->for_each([&](decomp::symbol_t& sym) {
spdlog::info("hash: {:X}, name: {}", sym.hash(), sym.name());
});
return res.value()->size(); return res.value()->size();
} }
} // namespace theo } // namespace theo
Loading…
Cancel
Save