removed the code that splits functions up. this code should be a pass,

not part of the framework itself. move this code. :)
master
_xeroxz 3 years ago
parent 9b2fb43fb9
commit dca70322fe

@ -66,14 +66,13 @@ class routine_t {
explicit routine_t(coff::symbol_t* sym,
coff::image_t* img,
coff::section_header_t* scn,
std::vector<std::uint8_t>& fn,
sym_type_t dcmp_type);
std::vector<std::uint8_t>& fn);
/// <summary>
/// decompose the function into symbol(s).
/// </summary>
/// <returns>symbol(s) of the function.</returns>
std::vector<decomp::symbol_t> decompose();
/// <returns>symbol of the function.</returns>
decomp::symbol_t decompose();
/// <summary>
/// gets the section header of the section in which the symbol is located in.
@ -93,6 +92,5 @@ class routine_t {
std::vector<std::uint8_t> m_data;
coff::image_t* m_img;
coff::section_header_t* m_scn;
sym_type_t m_dcmp_type;
};
} // namespace theo::decomp

@ -112,20 +112,17 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
if (sym->has_section()) {
if (sym->derived_type == coff::derived_type_id::function) {
auto scn = img->get_section(sym->section_index - 1);
auto dcmp_type =
scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME
? decomp::sym_type_t::instruction
: decomp::sym_type_t::function;
auto fn_size = next_sym(img, scn, sym);
auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img) +
sym->value;
// extract the bytes the function is composed of...
//
std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
decomp::routine_t rtn(sym, img, scn, fn);
auto syms = rtn.decompose();
m_syms->put_symbols(syms);
auto fsym = rtn.decompose();
m_syms->put_symbol(fsym);
// else the symbol isnt a function and its public or private (some data
// symbols are private)...
} else if (sym->storage_class == coff::storage_class_id::public_symbol ||

@ -34,135 +34,35 @@ namespace theo::decomp {
routine_t::routine_t(coff::symbol_t* sym,
coff::image_t* img,
coff::section_header_t* scn,
std::vector<std::uint8_t>& fn,
sym_type_t dcmp_type)
: m_img(img), m_scn(scn), m_data(fn), m_dcmp_type(dcmp_type), m_sym(sym) {}
std::vector<decomp::symbol_t> routine_t::decompose() {
std::vector<decomp::symbol_t> result;
switch (m_dcmp_type) {
case function: {
std::vector<recomp::reloc_t> relocs;
auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) {
auto scn_reloc = &scn_relocs[idx];
// if the reloc is in the current function...
if (scn_reloc->virtual_address >= m_sym->value &&
scn_reloc->virtual_address < m_sym->value + m_data.size()) {
auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index);
auto sym_name = symbol_t::name(m_img, sym_reloc);
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
relocs.push_back(
recomp::reloc_t(scn_reloc->virtual_address - m_sym->value,
sym_hash, sym_name.data()));
}
}
result.push_back(decomp::symbol_t(
m_img, symbol_t::name(m_img, m_sym).data(), m_sym->value, m_data,
m_scn, m_sym, relocs, sym_type_t::function));
break;
}
case instruction: {
std::uint32_t offset = {};
xed_error_enum_t err;
xed_decoded_inst_t instr;
xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b};
xed_decoded_inst_zero_set_mode(&instr, &istate);
// keep looping over the section, lower the number of bytes each time...
//
while ((err = xed_decode(&instr, m_data.data() + offset,
m_data.size() - offset)) == XED_ERROR_NONE) {
// symbol name is of the format: symbol@instroffset, I.E: main@11...
//
auto new_sym_name = symbol_t::name(m_img, m_sym);
// first instruction doesnt need the @offset...
//
if (offset)
new_sym_name.append("@").append(std::to_string(offset));
std::vector<recomp::reloc_t> relocs;
auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
// find if this instruction has a relocation or not...
// if so, return the reloc_t...
//
auto reloc = std::find_if(
scn_relocs, scn_relocs + m_scn->num_relocs,
[&](coff::reloc_t reloc) {
return reloc.virtual_address >= m_sym->value + offset &&
reloc.virtual_address <
m_sym->value + offset +
xed_decoded_inst_get_length(&instr);
});
// if there is indeed a reloc for this instruction...
//
if (reloc != scn_relocs + m_scn->num_relocs) {
auto sym_reloc = m_img->get_symbol(reloc->symbol_index);
auto sym_name = symbol_t::name(m_img, sym_reloc);
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
auto reloc_offset = reloc->virtual_address - m_sym->value - offset;
relocs.push_back(
recomp::reloc_t(reloc_offset, sym_hash, sym_name.data()));
}
// add a reloc to the next instruction...
// note that the offset is ZERO... comp_t will understand that
// relocs with offset ZERO means the next instructions...
//
auto next_inst_sym =
symbol_t::name(m_img, m_sym)
.append("@")
.append(std::to_string(offset +
xed_decoded_inst_get_length(&instr)));
relocs.push_back(recomp::reloc_t(
0, decomp::symbol_t::hash(next_inst_sym), next_inst_sym.data()));
// get the instructions bytes
//
std::vector<std::uint8_t> inst_bytes(
m_data.data() + offset,
m_data.data() + offset + xed_decoded_inst_get_length(&instr));
result.push_back(decomp::symbol_t(m_img, new_sym_name, offset,
inst_bytes, m_scn, m_sym, relocs,
sym_type_t::instruction));
// after creating the symbol and dealing with relocs then print the
// information we have concluded...
//
char buff[255];
offset += xed_decoded_inst_get_length(&instr);
xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, NULL,
NULL, NULL);
spdlog::info("{}: {}", new_sym_name, buff);
// need to set this so that instr can be used to decode again...
xed_decoded_inst_zero_set_mode(&instr, &istate);
}
// remove the relocation to the next symbol from the last instruction
//
auto& last_inst = result.back();
auto& last_inst_relocs = last_inst.relocs();
last_inst_relocs.erase(last_inst_relocs.end() - 1);
break;
std::vector<std::uint8_t>& fn)
: m_img(img), m_scn(scn), m_data(fn), m_sym(sym) {}
decomp::symbol_t routine_t::decompose() {
std::vector<recomp::reloc_t> relocs;
auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
// extract all of the relocations that this function has...
//
for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) {
auto scn_reloc = &scn_relocs[idx];
// if the reloc is in the current function...
if (scn_reloc->virtual_address >= m_sym->value &&
scn_reloc->virtual_address < m_sym->value + m_data.size()) {
auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index);
auto sym_name = symbol_t::name(m_img, sym_reloc);
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
relocs.push_back(
recomp::reloc_t(scn_reloc->virtual_address - m_sym->value, sym_hash,
sym_name.data()));
}
default:
break;
}
return result;
// return the created symbol_t for this function...
//
return decomp::symbol_t(m_img, symbol_t::name(m_img, m_sym).data(),
m_sym->value, m_data, m_scn, m_sym, relocs,
sym_type_t::function);
}
coff::section_header_t* routine_t::scn() {

Loading…
Cancel
Save