if the obj has more symbols with data than the max section size then the

compiler will start putting multiple symbols into a single section... i
fixed my code so that it can handle decomposing functions and
relocations with this fact...

however there is still an issue with private_symbol's... private symbols
with no name use the section name as their symbol name... this means
there can be duplicate symbol names... if the symbol is private and the
base type is none and it has a section, then we need to create a symbol
name...

create a function that handles both situations... do something like
std::string symbol_t::get_symbol_name(coff::symbol_t* sym)...
3.0
_xeroxz 2 years ago
parent a0d35c1d5e
commit 367a62d8de

@ -13,7 +13,7 @@
#include <coff/image.hpp>
namespace theo::decomp {
using sym_data_t = std::pair<coff::image_t*, coff::symbol_t*>;
using sym_data_t = std::tuple<coff::image_t*, coff::symbol_t*, std::uint32_t>;
class decomp_t {
public:
explicit decomp_t(std::vector<std::uint8_t>& lib,
@ -30,6 +30,9 @@ class decomp_t {
private:
std::uint32_t ext_used_syms(const std::string&& entry_sym);
std::optional<sym_data_t> get_symbol(const std::string_view& name);
std::uint32_t next_sym(coff::image_t* img,
coff::section_header_t* hdr,
coff::symbol_t* s);
const std::vector<std::uint8_t> m_lib;
std::vector<coff::image_t*> m_objs;

@ -6,15 +6,15 @@
namespace theo::recomp {
class reloc_t {
public:
explicit reloc_t(std::uint16_t offset,
explicit reloc_t(std::uint32_t offset,
std::size_t hash,
const std::string&& sym_name)
: m_offset(offset), m_hash(hash), m_sym_name(sym_name) {}
std::size_t hash() { return m_hash; }
std::string name() { return m_sym_name; }
std::uint16_t offset() { return m_offset; }
void offset(std::uint16_t offset) { m_offset = offset; }
std::uint32_t offset() { return m_offset; }
void offset(std::uint32_t offset) { m_offset = offset; }
void add_transform(
std::pair<obf::transform::transform_t*, std::uint32_t> entry) {
@ -31,6 +31,6 @@ class reloc_t {
m_transforms;
std::string m_sym_name;
std::size_t m_hash;
std::uint16_t m_offset;
std::uint32_t m_offset;
};
} // namespace theo::recomp

@ -27,7 +27,12 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
auto sym_name = sym->name.to_string(img->get_strings());
if (sym_name.length()) {
auto sym_hash = symbol_t::hash(sym_name.data());
m_lookup_tbl[sym_hash].push_back({img, sym});
auto sym_size =
sym->has_section()
? next_sym(img, img->get_section(sym->section_index - 1), sym)
: 0u;
m_lookup_tbl[sym_hash].push_back({img, sym, sym_size});
}
}
});
@ -41,7 +46,7 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
// extracted from the archive file...
//
std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) {
auto [img, sym] = data;
auto [img, sym, size] = data;
// populate section hash table with sections for the img of this
// symbol... only populate the hash table if its not been populated for
// this obj before...
@ -73,8 +78,10 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME
? decomp::sym_type_t::instruction
: decomp::sym_type_t::function;
auto fn_size = scn->size_raw_data;
auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img);
auto fn_size = next_sym(img, scn, sym);
auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img) +
sym->value;
std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
@ -97,10 +104,16 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
.append("!")
.append(std::to_string(img->file_header.timedate_stamp));
std::vector<std::uint8_t> scn_data(
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data,
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data +
scn->size_raw_data);
std::vector<std::uint8_t> scn_data;
if (scn->characteristics.cnt_uninit_data) {
scn_data.insert(scn_data.begin(), scn->size_raw_data, 0);
} else {
scn_data.insert(
scn_data.begin(),
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data,
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data +
scn->size_raw_data);
}
decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {},
{}, sym_type_t::section);
@ -137,30 +150,44 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
return m_syms;
}
std::uint32_t decomp_t::next_sym(coff::image_t* img,
coff::section_header_t* hdr,
coff::symbol_t* s) {
// loop over all symbols in this object...
// find the next symbol inside of the same section...
// if there is no next symbol then we use the end of the section...
std::uint32_t res = hdr->size_raw_data;
for (auto idx = 0u; idx < img->file_header.num_symbols; ++idx) {
auto q = img->get_symbol(idx);
if (q->derived_type == coff::derived_type_id::function &&
q->section_index == s->section_index)
if (q->value > s->value && q->value < res)
res = q->value;
}
return res;
}
std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) {
std::optional<std::pair<coff::image_t*, coff::symbol_t*>> entry;
std::optional<sym_data_t> entry;
if (!(entry = get_symbol(entry_sym.data())).has_value())
return 0u;
std::set<coff::symbol_t*> cache;
const auto finding_syms = [&]() -> bool {
for (auto itr = m_used_syms.begin(); itr != m_used_syms.end(); ++itr) {
auto [img, sym] = *itr;
if (sym->has_section() && !cache.count(sym)) {
auto [img, sym, size] = *itr;
if (sym->has_section() && !cache.count(sym) && size) {
auto scn = img->get_section(sym->section_index - 1);
auto num_relocs = scn->num_relocs;
auto relocs = reinterpret_cast<coff::reloc_t*>(
scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(img));
for (auto idx = 0u; idx < num_relocs; ++idx) {
auto reloc_sym = img->get_symbol(relocs[idx].symbol_index);
// if the symbol is defined in the current obj then we dont need to go
// looking for where its actually defined...
if (img->get_symbol(relocs[idx].symbol_index)->has_section()) {
sym_data_t sym_data = {img, reloc_sym};
if (m_used_syms.emplace(sym_data).second)
return true;
} else {
auto reloc = &relocs[idx];
// if the reloc is inside of the current symbol...
if (reloc->virtual_address >= sym->value &&
reloc->virtual_address < sym->value + size) {
auto reloc_sym = img->get_symbol(reloc->symbol_index);
auto sym_name = reloc_sym->name.to_string(img->get_strings());
entry = get_symbol(sym_name);
if (m_used_syms.emplace(entry.value()).second)
@ -183,15 +210,17 @@ std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) {
std::optional<sym_data_t> decomp_t::get_symbol(const std::string_view& name) {
coff::image_t* img = {};
coff::symbol_t* sym = {};
std::uint32_t size = {};
auto& syms = m_lookup_tbl[symbol_t::hash(name.data())];
for (auto idx = 0u; idx < syms.size(); ++idx) {
img = syms[idx].first;
sym = syms[idx].second;
img = std::get<0>(syms[idx]);
sym = std::get<1>(syms[idx]);
size = std::get<2>(syms[idx]);
if (sym->has_section())
return {{img, sym}};
return {{img, sym, size}};
}
return {{img, sym}};
return {{img, sym, size}};
}
std::vector<routine_t> decomp_t::rtns() {

@ -19,12 +19,16 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) {
auto scn_reloc = &scn_relocs[idx];
auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index);
auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
relocs.push_back(recomp::reloc_t(scn_reloc->virtual_address, sym_hash,
sym_name.data()));
// if the reloc is in the current function...
if (scn_reloc->virtual_address >= m_sym->value &&
scn_reloc->virtual_address < m_sym->value + m_data.size()) {
auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index);
auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
relocs.push_back(
recomp::reloc_t(scn_reloc->virtual_address - m_sym->value,
sym_hash, sym_name.data()));
}
}
result.push_back(decomp::symbol_t(
@ -33,7 +37,7 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
break;
}
case instruction: {
std::uint32_t offset = 0u;
std::uint32_t offset = {};
xed_error_enum_t err;
xed_decoded_inst_t instr;
@ -64,9 +68,10 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
auto reloc = std::find_if(
scn_relocs, scn_relocs + m_scn->num_relocs,
[&](coff::reloc_t reloc) {
return reloc.virtual_address >= offset &&
return reloc.virtual_address >= m_sym->value + offset &&
reloc.virtual_address <
offset + xed_decoded_inst_get_length(&instr);
m_sym->value + offset +
xed_decoded_inst_get_length(&instr);
});
// if there is indeed a reloc for this instruction...
@ -75,7 +80,7 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
auto sym_reloc = m_img->get_symbol(reloc->symbol_index);
auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
auto reloc_offset = reloc->virtual_address - offset;
auto reloc_offset = reloc->virtual_address - m_sym->value - offset;
relocs.push_back(
recomp::reloc_t(reloc_offset, sym_hash, sym_name.data()));

Loading…
Cancel
Save