|
|
@ -4,7 +4,8 @@ namespace theo::decomp {
|
|
|
|
decomp_t::decomp_t(std::vector<std::uint8_t>& lib, recomp::symbol_table_t* syms)
|
|
|
|
decomp_t::decomp_t(std::vector<std::uint8_t>& lib, recomp::symbol_table_t* syms)
|
|
|
|
: m_lib(lib), m_syms(syms) {}
|
|
|
|
: m_lib(lib), m_syms(syms) {}
|
|
|
|
|
|
|
|
|
|
|
|
std::optional<recomp::symbol_table_t*> decomp_t::decompose() {
|
|
|
|
std::optional<recomp::symbol_table_t*> decomp_t::decompose(
|
|
|
|
|
|
|
|
const std::string&& entry_sym) {
|
|
|
|
// extract obj files from the archive file...
|
|
|
|
// extract obj files from the archive file...
|
|
|
|
//
|
|
|
|
//
|
|
|
|
ar::view<false> lib(m_lib.data(), m_lib.size());
|
|
|
|
ar::view<false> lib(m_lib.data(), m_lib.size());
|
|
|
@ -20,115 +21,114 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose() {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::for_each(m_objs.begin(), m_objs.end(), [&](coff::image_t* img) {
|
|
|
|
|
|
|
|
for (auto idx = 0u; idx < img->file_header.num_symbols; ++idx) {
|
|
|
|
|
|
|
|
auto sym = img->get_symbol(idx);
|
|
|
|
|
|
|
|
auto sym_name = sym->name.to_string(img->get_strings());
|
|
|
|
|
|
|
|
if (sym_name.length()) {
|
|
|
|
|
|
|
|
auto sym_hash = symbol_t::hash(sym_name.data());
|
|
|
|
|
|
|
|
m_lookup_tbl[sym_hash].push_back({img, sym});
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// extract used symbols from objs and create a nice little set of them so that
|
|
|
|
|
|
|
|
// we can easily decompose them... no need deal with every single symbol...
|
|
|
|
|
|
|
|
spdlog::info("extracted {} symbols being used...",
|
|
|
|
|
|
|
|
ext_used_syms(entry_sym.data()));
|
|
|
|
|
|
|
|
|
|
|
|
// generate symbols, populate section hash table, for each object file
|
|
|
|
// generate symbols, populate section hash table, for each object file
|
|
|
|
// extracted from the archive file...
|
|
|
|
// extracted from the archive file...
|
|
|
|
//
|
|
|
|
//
|
|
|
|
std::for_each(m_objs.begin(), m_objs.end(), [&](coff::image_t* img) {
|
|
|
|
std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) {
|
|
|
|
// populate section hash table...
|
|
|
|
auto [img, sym] = data;
|
|
|
|
|
|
|
|
// populate section hash table with sections for the img of this
|
|
|
|
|
|
|
|
// symbol... only populate the hash table if its not been populated for
|
|
|
|
|
|
|
|
// this obj before...
|
|
|
|
//
|
|
|
|
//
|
|
|
|
for (auto idx = 0u; idx < img->file_header.num_sections; ++idx) {
|
|
|
|
if (m_processed_objs.emplace(img).second) {
|
|
|
|
auto scn = img->get_section(idx);
|
|
|
|
for (auto idx = 0u; idx < img->file_header.num_sections; ++idx) {
|
|
|
|
auto scn_sym_name =
|
|
|
|
auto scn = img->get_section(idx);
|
|
|
|
std::string(scn->name.to_string(img->get_strings()))
|
|
|
|
auto scn_sym_name =
|
|
|
|
.append("#")
|
|
|
|
std::string(scn->name.to_string(img->get_strings()))
|
|
|
|
.append(std::to_string(idx))
|
|
|
|
.append("#")
|
|
|
|
.append("!")
|
|
|
|
.append(std::to_string(idx))
|
|
|
|
.append(std::to_string(img->file_header.timedate_stamp));
|
|
|
|
.append("!")
|
|
|
|
|
|
|
|
.append(std::to_string(img->file_header.timedate_stamp));
|
|
|
|
// hash the name of the section + the index + the timestamp of the obj
|
|
|
|
|
|
|
|
// file it is in...
|
|
|
|
// hash the name of the section + the index + the timestamp of the
|
|
|
|
//
|
|
|
|
// obj file it is in...
|
|
|
|
m_scn_hash_tbl.insert({scn, decomp::symbol_t::hash(scn_sym_name)});
|
|
|
|
//
|
|
|
|
|
|
|
|
m_scn_hash_tbl.insert({scn, decomp::symbol_t::hash(scn_sym_name)});
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
auto syms_cnt = img->file_header.num_symbols;
|
|
|
|
// if the symbol is a function then we are going to decompose it...
|
|
|
|
for (auto idx = 0u; idx < syms_cnt; ++idx) {
|
|
|
|
// data symbols are handled after this...
|
|
|
|
auto sym = img->get_symbol(idx);
|
|
|
|
//
|
|
|
|
|
|
|
|
if (sym->has_section()) {
|
|
|
|
// if the symbol is a function then we are going to decompose it...
|
|
|
|
if (sym->derived_type == coff::derived_type_id::function) {
|
|
|
|
// data symbols are handled after this...
|
|
|
|
auto scn = img->get_section(sym->section_index - 1);
|
|
|
|
//
|
|
|
|
auto dcmp_type =
|
|
|
|
if (sym->has_section()) {
|
|
|
|
scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME
|
|
|
|
if (sym->derived_type == coff::derived_type_id::function) {
|
|
|
|
? decomp::sym_type_t::instruction
|
|
|
|
auto scn = img->get_section(sym->section_index - 1);
|
|
|
|
: decomp::sym_type_t::function;
|
|
|
|
auto dcmp_type = scn->name.to_string(img->get_strings()) ==
|
|
|
|
auto fn_size = scn->size_raw_data;
|
|
|
|
INSTR_SPLIT_SECTION_NAME
|
|
|
|
auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img);
|
|
|
|
? decomp::sym_type_t::instruction
|
|
|
|
|
|
|
|
: decomp::sym_type_t::function;
|
|
|
|
std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
|
|
|
|
auto fn_size = scn->size_raw_data;
|
|
|
|
decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
|
|
|
|
auto fn_bgn =
|
|
|
|
|
|
|
|
scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img);
|
|
|
|
auto syms = rtn.decompose();
|
|
|
|
|
|
|
|
m_syms->add_symbols(syms);
|
|
|
|
spdlog::info("decomposing function: {} size: {}",
|
|
|
|
} else if (sym->storage_class == coff::storage_class_id::public_symbol ||
|
|
|
|
sym->name.to_string(img->get_strings()), fn_size);
|
|
|
|
sym->storage_class == coff::storage_class_id::private_symbol) {
|
|
|
|
|
|
|
|
auto scn = img->get_section(sym->section_index - 1);
|
|
|
|
std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
|
|
|
|
auto scn_sym = m_syms->sym_from_hash(m_scn_hash_tbl[scn]);
|
|
|
|
decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
|
|
|
|
|
|
|
|
|
|
|
|
// if the section doesnt have a symbol then make one and put it into
|
|
|
|
auto syms = rtn.decompose();
|
|
|
|
// the symbol table...
|
|
|
|
spdlog::info("decomposed routine into {} symbols...", syms.size());
|
|
|
|
//
|
|
|
|
m_syms->add_symbols(syms);
|
|
|
|
if (!scn_sym.has_value()) {
|
|
|
|
} else if (sym->storage_class ==
|
|
|
|
auto scn_sym_name =
|
|
|
|
coff::storage_class_id::public_symbol) {
|
|
|
|
std::string(scn->name.to_string(img->get_strings()))
|
|
|
|
auto scn = img->get_section(sym->section_index - 1);
|
|
|
|
.append("#")
|
|
|
|
auto scn_sym = m_syms->sym_from_hash(m_scn_hash_tbl[scn]);
|
|
|
|
.append(std::to_string(sym->section_index - 1))
|
|
|
|
|
|
|
|
.append("!")
|
|
|
|
// if the section doesnt have a symbol then make one and put it into
|
|
|
|
.append(std::to_string(img->file_header.timedate_stamp));
|
|
|
|
// the symbol table...
|
|
|
|
|
|
|
|
//
|
|
|
|
std::vector<std::uint8_t> scn_data(
|
|
|
|
if (!scn_sym.has_value()) {
|
|
|
|
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data,
|
|
|
|
auto scn_sym_name =
|
|
|
|
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data +
|
|
|
|
std::string(scn->name.to_string(img->get_strings()))
|
|
|
|
scn->size_raw_data);
|
|
|
|
.append("#")
|
|
|
|
|
|
|
|
.append(std::to_string(sym->section_index - 1))
|
|
|
|
decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {},
|
|
|
|
.append("!")
|
|
|
|
{}, sym_type_t::section);
|
|
|
|
.append(std::to_string(img->file_header.timedate_stamp));
|
|
|
|
|
|
|
|
|
|
|
|
m_syms->add_symbol(new_scn_sym);
|
|
|
|
std::vector<std::uint8_t> scn_data(
|
|
|
|
}
|
|
|
|
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data,
|
|
|
|
|
|
|
|
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data +
|
|
|
|
|
|
|
|
scn->size_raw_data);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn,
|
|
|
|
|
|
|
|
{}, {}, sym_type_t::section);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spdlog::info(
|
|
|
|
|
|
|
|
"generating symbol for section: {} sym name: {} hash: {:X} "
|
|
|
|
|
|
|
|
"section size: {}",
|
|
|
|
|
|
|
|
scn->name.to_string(img->get_strings()), new_scn_sym.name(),
|
|
|
|
|
|
|
|
new_scn_sym.hash(), scn->size_raw_data);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
m_syms->add_symbol(new_scn_sym);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// create a symbol for the data...
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
decomp::symbol_t new_sym(
|
|
|
|
|
|
|
|
img, sym->name.to_string(img->get_strings()).data(), sym->value,
|
|
|
|
|
|
|
|
{}, scn, sym, {}, sym_type_t::data);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spdlog::info("adding data symbol: {} located inside of section: {}",
|
|
|
|
// create a symbol for the data...
|
|
|
|
new_sym.name(),
|
|
|
|
//
|
|
|
|
m_syms->sym_from_hash(m_scn_hash_tbl[new_sym.scn()])
|
|
|
|
decomp::symbol_t new_sym(
|
|
|
|
.value()
|
|
|
|
img, sym->name.to_string(img->get_strings()).data(), sym->value, {},
|
|
|
|
.name());
|
|
|
|
scn, sym, {}, sym_type_t::data);
|
|
|
|
|
|
|
|
|
|
|
|
m_syms->add_symbol(new_sym);
|
|
|
|
m_syms->add_symbol(new_sym);
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (sym->storage_class ==
|
|
|
|
|
|
|
|
coff::storage_class_id::
|
|
|
|
|
|
|
|
external_definition) { // else if the symbol has no
|
|
|
|
|
|
|
|
// section... these symbols require
|
|
|
|
|
|
|
|
// the linker to allocate space for
|
|
|
|
|
|
|
|
// them...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::uint8_t> data(sym->value, 0);
|
|
|
|
|
|
|
|
decomp::symbol_t bss_sym(img,
|
|
|
|
|
|
|
|
sym->name.to_string(img->get_strings()).data(),
|
|
|
|
|
|
|
|
{}, data, {}, sym, {}, sym_type_t::data);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
m_syms->add_symbol(bss_sym);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (sym->storage_class ==
|
|
|
|
|
|
|
|
coff::storage_class_id::
|
|
|
|
|
|
|
|
external_definition) { // else if the symbol has no
|
|
|
|
|
|
|
|
// section... these symbols
|
|
|
|
|
|
|
|
// require the linker to allocate
|
|
|
|
|
|
|
|
// space for them...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::uint8_t> data(sym->value, 0);
|
|
|
|
|
|
|
|
decomp::symbol_t bss_sym(img,
|
|
|
|
|
|
|
|
sym->name.to_string(img->get_strings()).data(),
|
|
|
|
|
|
|
|
{}, data, {}, sym, {}, sym_type_t::data);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
m_syms->add_symbol(bss_sym);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
@ -137,6 +137,63 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose() {
|
|
|
|
return m_syms;
|
|
|
|
return m_syms;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) {
|
|
|
|
|
|
|
|
std::optional<std::pair<coff::image_t*, coff::symbol_t*>> entry;
|
|
|
|
|
|
|
|
if (!(entry = get_symbol(entry_sym.data())).has_value())
|
|
|
|
|
|
|
|
return 0u;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::set<coff::symbol_t*> cache;
|
|
|
|
|
|
|
|
const auto finding_syms = [&]() -> bool {
|
|
|
|
|
|
|
|
for (auto itr = m_used_syms.begin(); itr != m_used_syms.end(); ++itr) {
|
|
|
|
|
|
|
|
auto [img, sym] = *itr;
|
|
|
|
|
|
|
|
if (sym->has_section() && !cache.count(sym)) {
|
|
|
|
|
|
|
|
auto scn = img->get_section(sym->section_index - 1);
|
|
|
|
|
|
|
|
auto num_relocs = scn->num_relocs;
|
|
|
|
|
|
|
|
auto relocs = reinterpret_cast<coff::reloc_t*>(
|
|
|
|
|
|
|
|
scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(img));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (auto idx = 0u; idx < num_relocs; ++idx) {
|
|
|
|
|
|
|
|
auto reloc_sym = img->get_symbol(relocs[idx].symbol_index);
|
|
|
|
|
|
|
|
// if the symbol is defined in the current obj then we dont need to go
|
|
|
|
|
|
|
|
// looking for where its actually defined...
|
|
|
|
|
|
|
|
if (img->get_symbol(relocs[idx].symbol_index)->has_section()) {
|
|
|
|
|
|
|
|
sym_data_t sym_data = {img, reloc_sym};
|
|
|
|
|
|
|
|
if (m_used_syms.emplace(sym_data).second)
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
auto sym_name = reloc_sym->name.to_string(img->get_strings());
|
|
|
|
|
|
|
|
entry = get_symbol(sym_name);
|
|
|
|
|
|
|
|
if (m_used_syms.emplace(entry.value()).second)
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
cache.emplace(sym);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
m_used_syms.emplace(entry.value());
|
|
|
|
|
|
|
|
for (m_used_syms.emplace(entry.value()); finding_syms();)
|
|
|
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return m_used_syms.size();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::optional<sym_data_t> decomp_t::get_symbol(const std::string_view& name) {
|
|
|
|
|
|
|
|
coff::image_t* img = {};
|
|
|
|
|
|
|
|
coff::symbol_t* sym = {};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto& syms = m_lookup_tbl[symbol_t::hash(name.data())];
|
|
|
|
|
|
|
|
for (auto idx = 0u; idx < syms.size(); ++idx) {
|
|
|
|
|
|
|
|
img = syms[idx].first;
|
|
|
|
|
|
|
|
sym = syms[idx].second;
|
|
|
|
|
|
|
|
if (sym->has_section())
|
|
|
|
|
|
|
|
return {{img, sym}};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return {{img, sym}};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<routine_t> decomp_t::rtns() {
|
|
|
|
std::vector<routine_t> decomp_t::rtns() {
|
|
|
|
return m_rtns;
|
|
|
|
return m_rtns;
|
|
|
|
}
|
|
|
|
}
|
|
|
|