wrote a bunch of code optimizations, wrote an algo to get all the

symbols that the code uses...
3.0
_xeroxz 2 years ago
parent 17b2371910
commit 1c51f37f60

@ -3,6 +3,7 @@
#include <cstdint>
#include <linuxpe>
#include <optional>
#include <set>
#include <vector>
#include <decomp/routine.hpp>
@ -12,6 +13,7 @@
#include <coff/image.hpp>
namespace theo::decomp {
using sym_data_t = std::pair<coff::image_t*, coff::symbol_t*>;
class decomp_t {
public:
explicit decomp_t(std::vector<std::uint8_t>& lib,
@ -22,13 +24,20 @@ class decomp_t {
std::vector<coff::image_t*> objs();
recomp::symbol_table_t* syms();
std::map<coff::section_header_t*, std::size_t>& scn_hash_tbl();
std::optional<recomp::symbol_table_t*> decompose();
std::optional<recomp::symbol_table_t*> decompose(
const std::string&& entry_sym);
private:
std::uint32_t ext_used_syms(const std::string&& entry_sym);
std::optional<sym_data_t> get_symbol(const std::string_view& name);
const std::vector<std::uint8_t> m_lib;
std::vector<coff::image_t*> m_objs;
std::vector<routine_t> m_rtns;
std::set<sym_data_t> m_used_syms;
std::set<coff::image_t*> m_processed_objs;
std::map<coff::section_header_t*, std::size_t> m_scn_hash_tbl;
std::map<std::size_t, std::vector<sym_data_t>> m_lookup_tbl;
recomp::symbol_table_t* m_syms;
};
} // namespace theo::decomp

@ -16,8 +16,8 @@ class symbol_table_t {
void add_symbol(decomp::symbol_t& sym);
void add_symbols(std::vector<decomp::symbol_t>& syms);
std::optional<decomp::symbol_t> sym_from_hash(std::size_t hash);
std::optional<decomp::symbol_t> sym_from_alloc(std::uintptr_t allocated_at);
std::optional<decomp::symbol_t*> sym_from_hash(std::size_t hash);
std::optional<decomp::symbol_t*> sym_from_alloc(std::uintptr_t allocated_at);
void update(std::size_t hash, decomp::symbol_t& sym);
void update(std::size_t hash, std::uintptr_t allocated_at);

@ -25,13 +25,16 @@ using lnk_fns_t =
class theo_t {
public:
explicit theo_t(std::vector<std::uint8_t>& lib, lnk_fns_t lnkr_fns);
explicit theo_t(std::vector<std::uint8_t>& lib,
lnk_fns_t lnkr_fns,
const std::string&& entry_sym);
std::optional<std::uint32_t> decompose();
std::uintptr_t compose(const std::string&& entry_sym);
std::uintptr_t compose();
std::uintptr_t resolve(const std::string&& sym);
private:
std::string m_entry_sym;
decomp::decomp_t m_dcmp;
recomp::recomp_t m_recmp;
recomp::symbol_table_t m_sym_tbl;

@ -1,4 +1,6 @@
#include <Windows.h>
#include <psapi.h>
#include <filesystem>
#include <fstream>
@ -18,6 +20,9 @@ int main(int argc, char* argv[]) {
fdata.resize(fsize);
f.read((char*)fdata.data(), fsize);
LoadLibraryA("user32.dll");
LoadLibraryA("win32u.dll");
theo::recomp::allocator_t allocator =
[&](std::uint32_t size,
coff::section_characteristics_t section_type) -> std::uintptr_t {
@ -32,11 +37,26 @@ int main(int argc, char* argv[]) {
};
theo::recomp::resolver_t resolver = [&](std::string sym) -> std::uintptr_t {
return reinterpret_cast<std::uintptr_t>(
GetProcAddress(LoadLibraryA("user32.dll"), sym.data()));
auto loaded_modules = std::make_unique<HMODULE[]>(64);
std::uintptr_t result = 0u, loaded_module_sz = 0u;
if (!EnumProcessModules(GetCurrentProcess(), loaded_modules.get(), 512,
(PDWORD)&loaded_module_sz))
return {};
for (auto i = 0u; i < loaded_module_sz / 8u; i++) {
wchar_t file_name[MAX_PATH] = L"";
if (!GetModuleFileNameExW(GetCurrentProcess(), loaded_modules.get()[i],
file_name, _countof(file_name)))
continue;
if ((result = reinterpret_cast<std::uintptr_t>(
GetProcAddress(LoadLibraryW(file_name), sym.c_str()))))
break;
}
return result;
};
theo::theo_t t(fdata, {allocator, copier, resolver});
theo::theo_t t(fdata, {allocator, copier, resolver}, "main");
auto res = t.decompose();
if (!res.has_value()) {
@ -45,7 +65,7 @@ int main(int argc, char* argv[]) {
}
spdlog::info("decomposed {} symbols...", res.value());
auto entry_pnt = t.compose("EntryPoint");
auto entry_pnt = t.compose();
spdlog::info("entry point address: {:X}", entry_pnt);
std::getchar();
reinterpret_cast<void (*)()>(entry_pnt)();

@ -4,7 +4,8 @@ namespace theo::decomp {
decomp_t::decomp_t(std::vector<std::uint8_t>& lib, recomp::symbol_table_t* syms)
: m_lib(lib), m_syms(syms) {}
std::optional<recomp::symbol_table_t*> decomp_t::decompose() {
std::optional<recomp::symbol_table_t*> decomp_t::decompose(
const std::string&& entry_sym) {
// extract obj files from the archive file...
//
ar::view<false> lib(m_lib.data(), m_lib.size());
@ -20,115 +21,114 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose() {
}
});
std::for_each(m_objs.begin(), m_objs.end(), [&](coff::image_t* img) {
for (auto idx = 0u; idx < img->file_header.num_symbols; ++idx) {
auto sym = img->get_symbol(idx);
auto sym_name = sym->name.to_string(img->get_strings());
if (sym_name.length()) {
auto sym_hash = symbol_t::hash(sym_name.data());
m_lookup_tbl[sym_hash].push_back({img, sym});
}
}
});
// extract used symbols from objs and create a nice little set of them so that
// we can easily decompose them... no need deal with every single symbol...
spdlog::info("extracted {} symbols being used...",
ext_used_syms(entry_sym.data()));
// generate symbols, populate section hash table, for each object file
// extracted from the archive file...
//
std::for_each(m_objs.begin(), m_objs.end(), [&](coff::image_t* img) {
// populate section hash table...
std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) {
auto [img, sym] = data;
// populate section hash table with sections for the img of this
// symbol... only populate the hash table if its not been populated for
// this obj before...
//
for (auto idx = 0u; idx < img->file_header.num_sections; ++idx) {
auto scn = img->get_section(idx);
auto scn_sym_name =
std::string(scn->name.to_string(img->get_strings()))
.append("#")
.append(std::to_string(idx))
.append("!")
.append(std::to_string(img->file_header.timedate_stamp));
// hash the name of the section + the index + the timestamp of the obj
// file it is in...
//
m_scn_hash_tbl.insert({scn, decomp::symbol_t::hash(scn_sym_name)});
if (m_processed_objs.emplace(img).second) {
for (auto idx = 0u; idx < img->file_header.num_sections; ++idx) {
auto scn = img->get_section(idx);
auto scn_sym_name =
std::string(scn->name.to_string(img->get_strings()))
.append("#")
.append(std::to_string(idx))
.append("!")
.append(std::to_string(img->file_header.timedate_stamp));
// hash the name of the section + the index + the timestamp of the
// obj file it is in...
//
m_scn_hash_tbl.insert({scn, decomp::symbol_t::hash(scn_sym_name)});
}
}
auto syms_cnt = img->file_header.num_symbols;
for (auto idx = 0u; idx < syms_cnt; ++idx) {
auto sym = img->get_symbol(idx);
// if the symbol is a function then we are going to decompose it...
// data symbols are handled after this...
//
if (sym->has_section()) {
if (sym->derived_type == coff::derived_type_id::function) {
auto scn = img->get_section(sym->section_index - 1);
auto dcmp_type = scn->name.to_string(img->get_strings()) ==
INSTR_SPLIT_SECTION_NAME
? decomp::sym_type_t::instruction
: decomp::sym_type_t::function;
auto fn_size = scn->size_raw_data;
auto fn_bgn =
scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img);
spdlog::info("decomposing function: {} size: {}",
sym->name.to_string(img->get_strings()), fn_size);
std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
auto syms = rtn.decompose();
spdlog::info("decomposed routine into {} symbols...", syms.size());
m_syms->add_symbols(syms);
} else if (sym->storage_class ==
coff::storage_class_id::public_symbol) {
auto scn = img->get_section(sym->section_index - 1);
auto scn_sym = m_syms->sym_from_hash(m_scn_hash_tbl[scn]);
// if the section doesnt have a symbol then make one and put it into
// the symbol table...
//
if (!scn_sym.has_value()) {
auto scn_sym_name =
std::string(scn->name.to_string(img->get_strings()))
.append("#")
.append(std::to_string(sym->section_index - 1))
.append("!")
.append(std::to_string(img->file_header.timedate_stamp));
std::vector<std::uint8_t> scn_data(
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data,
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data +
scn->size_raw_data);
decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn,
{}, {}, sym_type_t::section);
spdlog::info(
"generating symbol for section: {} sym name: {} hash: {:X} "
"section size: {}",
scn->name.to_string(img->get_strings()), new_scn_sym.name(),
new_scn_sym.hash(), scn->size_raw_data);
m_syms->add_symbol(new_scn_sym);
}
// create a symbol for the data...
//
decomp::symbol_t new_sym(
img, sym->name.to_string(img->get_strings()).data(), sym->value,
{}, scn, sym, {}, sym_type_t::data);
// if the symbol is a function then we are going to decompose it...
// data symbols are handled after this...
//
if (sym->has_section()) {
if (sym->derived_type == coff::derived_type_id::function) {
auto scn = img->get_section(sym->section_index - 1);
auto dcmp_type =
scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME
? decomp::sym_type_t::instruction
: decomp::sym_type_t::function;
auto fn_size = scn->size_raw_data;
auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img);
std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
auto syms = rtn.decompose();
m_syms->add_symbols(syms);
} else if (sym->storage_class == coff::storage_class_id::public_symbol ||
sym->storage_class == coff::storage_class_id::private_symbol) {
auto scn = img->get_section(sym->section_index - 1);
auto scn_sym = m_syms->sym_from_hash(m_scn_hash_tbl[scn]);
// if the section doesnt have a symbol then make one and put it into
// the symbol table...
//
if (!scn_sym.has_value()) {
auto scn_sym_name =
std::string(scn->name.to_string(img->get_strings()))
.append("#")
.append(std::to_string(sym->section_index - 1))
.append("!")
.append(std::to_string(img->file_header.timedate_stamp));
std::vector<std::uint8_t> scn_data(
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data,
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data +
scn->size_raw_data);
decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {},
{}, sym_type_t::section);
m_syms->add_symbol(new_scn_sym);
}
spdlog::info("adding data symbol: {} located inside of section: {}",
new_sym.name(),
m_syms->sym_from_hash(m_scn_hash_tbl[new_sym.scn()])
.value()
.name());
// create a symbol for the data...
//
decomp::symbol_t new_sym(
img, sym->name.to_string(img->get_strings()).data(), sym->value, {},
scn, sym, {}, sym_type_t::data);
m_syms->add_symbol(new_sym);
}
} else if (sym->storage_class ==
coff::storage_class_id::
external_definition) { // else if the symbol has no
// section... these symbols require
// the linker to allocate space for
// them...
std::vector<std::uint8_t> data(sym->value, 0);
decomp::symbol_t bss_sym(img,
sym->name.to_string(img->get_strings()).data(),
{}, data, {}, sym, {}, sym_type_t::data);
m_syms->add_symbol(bss_sym);
m_syms->add_symbol(new_sym);
}
} else if (sym->storage_class ==
coff::storage_class_id::
external_definition) { // else if the symbol has no
// section... these symbols
// require the linker to allocate
// space for them...
std::vector<std::uint8_t> data(sym->value, 0);
decomp::symbol_t bss_sym(img,
sym->name.to_string(img->get_strings()).data(),
{}, data, {}, sym, {}, sym_type_t::data);
m_syms->add_symbol(bss_sym);
}
});
@ -137,6 +137,63 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose() {
return m_syms;
}
std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) {
std::optional<std::pair<coff::image_t*, coff::symbol_t*>> entry;
if (!(entry = get_symbol(entry_sym.data())).has_value())
return 0u;
std::set<coff::symbol_t*> cache;
const auto finding_syms = [&]() -> bool {
for (auto itr = m_used_syms.begin(); itr != m_used_syms.end(); ++itr) {
auto [img, sym] = *itr;
if (sym->has_section() && !cache.count(sym)) {
auto scn = img->get_section(sym->section_index - 1);
auto num_relocs = scn->num_relocs;
auto relocs = reinterpret_cast<coff::reloc_t*>(
scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(img));
for (auto idx = 0u; idx < num_relocs; ++idx) {
auto reloc_sym = img->get_symbol(relocs[idx].symbol_index);
// if the symbol is defined in the current obj then we dont need to go
// looking for where its actually defined...
if (img->get_symbol(relocs[idx].symbol_index)->has_section()) {
sym_data_t sym_data = {img, reloc_sym};
if (m_used_syms.emplace(sym_data).second)
return true;
} else {
auto sym_name = reloc_sym->name.to_string(img->get_strings());
entry = get_symbol(sym_name);
if (m_used_syms.emplace(entry.value()).second)
return true;
}
}
cache.emplace(sym);
}
}
return false;
};
m_used_syms.emplace(entry.value());
for (m_used_syms.emplace(entry.value()); finding_syms();)
;
return m_used_syms.size();
}
std::optional<sym_data_t> decomp_t::get_symbol(const std::string_view& name) {
coff::image_t* img = {};
coff::symbol_t* sym = {};
auto& syms = m_lookup_tbl[symbol_t::hash(name.data())];
for (auto idx = 0u; idx < syms.size(); ++idx) {
img = syms[idx].first;
sym = syms[idx].second;
if (sym->has_section())
return {{img, sym}};
}
return {{img, sym}};
}
std::vector<routine_t> decomp_t::rtns() {
return m_rtns;
}

@ -23,10 +23,6 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
spdlog::info("{} reloc to: {} hash: {:X} at offset: {}",
m_sym->name.to_string(m_img->get_strings()), sym_name,
sym_hash, scn_reloc->virtual_address);
relocs.push_back(recomp::reloc_t(scn_reloc->virtual_address, sym_hash,
sym_name.data()));
}
@ -81,9 +77,6 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
auto sym_hash = decomp::symbol_t::hash(sym_name.data());
auto reloc_offset = reloc->virtual_address - offset;
spdlog::info("{} reloc to: {} at offset: {}", new_sym_name, sym_name,
reloc_offset);
relocs.push_back(
recomp::reloc_t(reloc_offset, sym_hash, sym_name.data()));
}

@ -42,7 +42,7 @@ void recomp_t::allocate() {
assert(scn_sym.has_value());
}
sym.allocated_at(scn_sym.value().allocated_at() + sym.offset());
sym.allocated_at(scn_sym.value()->allocated_at() + sym.offset());
} else { // else if there is no section then we allocate based upon the
// size of the symbol... this is only done for symbols that are
// bss...
@ -64,7 +64,8 @@ void recomp_t::resolve() {
// resolve relocations in all symbols...
//
m_dcmp->syms()->for_each([&](theo::decomp::symbol_t& sym) {
std::for_each(sym.relocs().begin(), sym.relocs().end(), [&](reloc_t reloc) {
auto& relocs = sym.relocs();
std::for_each(relocs.begin(), relocs.end(), [&](reloc_t& reloc) {
if (reloc.offset() > sym.data().size()) {
spdlog::error(
"invalid relocation... writing outside of symbol length... offset: "
@ -79,7 +80,7 @@ void recomp_t::resolve() {
//
auto reloc_sym = m_dcmp->syms()->sym_from_hash(reloc.hash());
auto allocated_at = reloc_sym.has_value()
? reloc_sym.value().allocated_at()
? reloc_sym.value()->allocated_at()
: m_resolver(reloc.name());
if (!allocated_at) {
@ -136,6 +137,6 @@ void recomp_t::resolver(resolver_t resolve) {
std::uintptr_t recomp_t::resolve(const std::string&& sym) {
auto res = m_dcmp->syms()->sym_from_hash(decomp::symbol_t::hash(sym));
return res.has_value() ? res->allocated_at() : 0;
return res.has_value() ? res.value()->allocated_at() : 0;
}
} // namespace theo::recomp

@ -31,13 +31,13 @@ void symbol_table_t::for_each(std::function<void(decomp::symbol_t& sym)> fn) {
fn(itr->second);
}
std::optional<decomp::symbol_t> symbol_table_t::sym_from_hash(
std::optional<decomp::symbol_t*> symbol_table_t::sym_from_hash(
std::size_t hash) {
return m_table.count(hash) ? m_table.at(hash)
: std::optional<decomp::symbol_t>{};
return m_table.count(hash) ? &m_table.at(hash)
: std::optional<decomp::symbol_t*>{};
}
std::optional<decomp::symbol_t> symbol_table_t::sym_from_alloc(
std::optional<decomp::symbol_t*> symbol_table_t::sym_from_alloc(
std::uintptr_t allocated_at) {
auto res =
std::find_if(m_table.begin(), m_table.end(),
@ -45,10 +45,11 @@ std::optional<decomp::symbol_t> symbol_table_t::sym_from_alloc(
return itr.second.allocated_at() == allocated_at;
});
return res != m_table.end() ? res->second : std::optional<decomp::symbol_t>{};
return res != m_table.end() ? &res->second
: std::optional<decomp::symbol_t*>{};
}
std::uint32_t symbol_table_t::size() {
return m_table.size();
}
} // namespace theo::comp
} // namespace theo::recomp

@ -1,8 +1,10 @@
#include <theo.hpp>
namespace theo {
theo_t::theo_t(std::vector<std::uint8_t>& lib, lnk_fns_t lnkr_fns)
: m_dcmp(lib, &m_sym_tbl), m_recmp(&m_dcmp) {
theo_t::theo_t(std::vector<std::uint8_t>& lib,
lnk_fns_t lnkr_fns,
const std::string&& entry_sym)
: m_dcmp(lib, &m_sym_tbl), m_recmp(&m_dcmp), m_entry_sym(entry_sym) {
// init enc/dec tables only once... add obfuscation passes to the engine...
//
if (static std::atomic_bool v = true; v.exchange(false)) {
@ -23,32 +25,25 @@ theo_t::theo_t(std::vector<std::uint8_t>& lib, lnk_fns_t lnkr_fns)
}
std::optional<std::uint32_t> theo_t::decompose() {
auto res = m_dcmp.decompose();
auto res = m_dcmp.decompose(m_entry_sym.data());
if (!res.has_value()) {
spdlog::error("failed to decompose...\n");
return {};
}
spdlog::info("decompose successful... {} symbols", res.value()->size());
res.value()->for_each([&](decomp::symbol_t& sym) {
spdlog::info("hash: {:X}, name: {}", sym.hash(), sym.name());
});
return res.value()->size();
}
std::uintptr_t theo_t::compose(const std::string&& entry_sym) {
std::uintptr_t theo_t::compose() {
// run obfuscation engine on all symbols...
//
auto engine = obf::engine_t::get();
m_sym_tbl.for_each([&](decomp::symbol_t& sym) { engine->run(&sym); });
m_recmp.allocate();
m_sym_tbl.for_each([&](decomp::symbol_t& sym) {
spdlog::info("{} allocated at {:X}", sym.name(), sym.allocated_at());
});
m_recmp.resolve();
m_recmp.copy_syms();
return m_recmp.resolve(entry_sym.data());
return m_recmp.resolve(m_entry_sym.data());
}
} // namespace theo
Loading…
Cancel
Save