Theodosius v3.0
Jit linker, symbol mapper, and obfuscator
Public Member Functions
theo::decomp::decomp_t Class Reference

the main decomposition class which is responsible for breaking down lib file into coff files, and extracted used symbols from the coff files. More...

#include "decomp.hpp"

Public Member Functions

 decomp_t (std::vector< std::uint8_t > &lib, recomp::symbol_table_t *syms)
 the explicit constructor for decomp_t More...
 
std::vector< routine_trtns ()
 gets all of the routine objects. More...
 
std::vector< std::uint8_t > lib ()
 gets a vector of bytes consisting of the lib file. More...
 
std::vector< std::vector< std::uint8_t > > objs ()
 gets all the obj files as a vector of a vector of bytes. More...
 
recomp::symbol_table_tsyms ()
 gets the symbol table. More...
 
std::map< coff::section_header_t *, std::size_t > & scn_hash_tbl ()
 gets the section hash table section header --> hash of the section header ptr. More...
 
std::optional< recomp::symbol_table_t * > decompose (std::string &entry_sym)
 decomposes (extracts) the symbols used. this function determines all used symbols given the entry point. More...
 

Detailed Description

the main decomposition class which is responsible for breaking down lib file into coff files, and extracted used symbols from the coff files.

Definition at line 61 of file decomp.hpp.

Constructor & Destructor Documentation

◆ decomp_t()

theo::decomp::decomp_t::decomp_t ( std::vector< std::uint8_t > &  lib,
recomp::symbol_table_t syms 
)
explicit

the explicit constructor for decomp_t

Parameters
libvector of bytes containing the lib file.
symssymbol table that gets populated and managed by this class.

Definition at line 34 of file decomp.cpp.

35 : m_lib(lib), m_syms(syms) {}

Member Function Documentation

◆ decompose()

std::optional< recomp::symbol_table_t * > theo::decomp::decomp_t::decompose ( std::string &  entry_sym)

decomposes (extracts) the symbols used. this function determines all used symbols given the entry point.

Parameters
entry_symthe entry point symbol name.
Returns
returns an optional pointer to the symbol table. no value in the optional object on failure.

Definition at line 37 of file decomp.cpp.

38 {
39 // extract obj files from the archive file...
40 //
41 ar::view<false> lib(m_lib.data(), m_lib.size());
42 std::for_each(
43 lib.begin(), lib.end(),
44 [&](std::pair<std::string_view, ar::entry_t&> itr) {
45 // if the entry isnt the symbol table or the string table
46 // then we know its an obj file...
47 //
48 if (!itr.second.is_symbol_table() && !itr.second.is_string_table()) {
49 spdlog::info("extracted obj from archive: {}", itr.first);
50 std::vector<std::uint8_t> data(itr.second.begin(), itr.second.end());
51 m_objs.push_back(data);
52 }
53 });
54
55 std::for_each(
56 m_objs.begin(), m_objs.end(), [&](std::vector<std::uint8_t>& img_data) {
57 auto img = reinterpret_cast<coff::image_t*>(img_data.data());
58 for (auto idx = 0u; idx < img->file_header.num_symbols; ++idx) {
59 auto sym = img->get_symbol(idx);
60 if (sym->section_index - 1 > img->file_header.num_sections)
61 continue;
62
63 auto sym_name = symbol_t::name(img, sym);
64 if (sym_name.length()) {
65 auto sym_hash = symbol_t::hash(sym_name.data());
66 auto sym_size =
67 sym->has_section()
68 ? next_sym(img, img->get_section(sym->section_index - 1),
69 sym)
70 : 0u;
71
72 m_lookup_tbl[sym_hash].emplace_back(img, sym, sym_size);
73 }
74 }
75 });
76
77 // extract used symbols from objs and create a nice little set of them so that
78 // we can easily decompose them... no need deal with every single symbol...
79 spdlog::info("extracted {} symbols being used...",
80 ext_used_syms(entry_sym.data()));
81
82 // generate symbols, populate section hash table, for each object file
83 // extracted from the archive file...
84 //
85 std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) {
86 auto [img, sym, size] = data;
87
88 // populate section hash table with sections for the img of this
89 // symbol... only populate the hash table if its not been populated for
90 // this obj before...
91 //
92 if (m_processed_objs.emplace(img).second) {
93 for (auto idx = 0u; idx < img->file_header.num_sections; ++idx) {
94 auto scn = img->get_section(idx);
95 auto scn_sym_name =
96 std::string(scn->name.to_string(img->get_strings()))
97 .append("#")
98 .append(std::to_string(idx))
99 .append("!")
100 .append(std::to_string(img->file_header.timedate_stamp));
101
102 // hash the name of the section + the index + the timestamp of the
103 // obj file it is in...
104 //
105 m_scn_hash_tbl.insert({scn, decomp::symbol_t::hash(scn_sym_name)});
106 }
107 }
108
109 // if the symbol is a function then we are going to decompose it...
110 // data symbols are handled after this...
111 //
112 if (sym->has_section()) {
113 if (sym->derived_type == coff::derived_type_id::function) {
114 auto scn = img->get_section(sym->section_index - 1);
115 auto dcmp_type =
116 scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME
117 ? decomp::sym_type_t::instruction
118 : decomp::sym_type_t::function;
119
120 auto fn_size = next_sym(img, scn, sym);
121 auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img) +
122 sym->value;
123
124 std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
125 decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
126
127 auto syms = rtn.decompose();
128 m_syms->put_symbols(syms);
129 } else if (sym->storage_class == coff::storage_class_id::public_symbol ||
130 sym->storage_class == coff::storage_class_id::private_symbol) {
131 auto scn = img->get_section(sym->section_index - 1);
132 auto scn_sym = m_syms->sym_from_hash(m_scn_hash_tbl[scn]);
133
134 // if the section doesnt have a symbol then make one and put it into
135 // the symbol table...
136 //
137 if (!scn_sym.has_value()) {
138 auto scn_sym_name =
139 std::string(scn->name.to_string(img->get_strings()))
140 .append("#")
141 .append(std::to_string(sym->section_index - 1))
142 .append("!")
143 .append(std::to_string(img->file_header.timedate_stamp));
144
145 std::vector<std::uint8_t> scn_data(scn->size_raw_data);
146 if (scn->characteristics.cnt_uninit_data) {
147 scn_data.insert(scn_data.begin(), scn->size_raw_data, 0);
148 } else {
149 scn_data.insert(
150 scn_data.begin(),
151 reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data,
152 reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data +
153 scn->size_raw_data);
154 }
155
156 std::vector<recomp::reloc_t> relocs;
157 auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
158 scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(img));
159
160 for (auto idx = 0u; idx < scn->num_relocs; ++idx) {
161 auto scn_reloc = &scn_relocs[idx];
162 auto sym_reloc = img->get_symbol(scn_relocs[idx].symbol_index);
163 auto sym_name = symbol_t::name(img, sym_reloc);
164 auto sym_hash = decomp::symbol_t::hash(sym_name.data());
165 relocs.push_back(
166 recomp::reloc_t(scn_reloc->virtual_address - sym->value,
167 sym_hash, sym_name.data()));
168 }
169
170 decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {},
171 relocs, sym_type_t::section);
172
173 m_syms->put_symbol(new_scn_sym);
174 }
175
176 // create a symbol for the data...
177 //
178 decomp::symbol_t new_sym(img, symbol_t::name(img, sym).data(),
179 sym->value, {}, scn, sym, {},
181
182 m_syms->put_symbol(new_sym);
183 }
184 } else if (sym->storage_class ==
185 coff::storage_class_id::
186 external_definition) { // else if the symbol has no
187 // section... these symbols
188 // require the linker to allocate
189 // space for them...
190
191 std::vector<std::uint8_t> data(sym->value, 0);
192 decomp::symbol_t bss_sym(img, symbol_t::name(img, sym).data(), {}, data,
193 {}, sym, {}, sym_type_t::data);
194
195 m_syms->put_symbol(bss_sym);
196 }
197 });
198
199 // return the extract symbols to the caller...
200 //
201 return m_syms;
202}

References lib().

Referenced by theo::theo_t::decompose().

◆ lib()

std::vector< std::uint8_t > theo::decomp::decomp_t::lib ( )

gets a vector of bytes consisting of the lib file.

Returns
a vector of bytes consisting of the lib file.

Definition at line 285 of file decomp.cpp.

285 {
286 return m_lib;
287}

Referenced by decompose().

◆ objs()

std::vector< std::vector< std::uint8_t > > theo::decomp::decomp_t::objs ( )

gets all the obj files as a vector of a vector of bytes.

Returns
all the obj files as a vector of a vector of bytes.

Definition at line 289 of file decomp.cpp.

289 {
290 return m_objs;
291}

◆ rtns()

std::vector< routine_t > theo::decomp::decomp_t::rtns ( )

gets all of the routine objects.

Returns
vector of routine objects.

Definition at line 281 of file decomp.cpp.

281 {
282 return m_rtns;
283}

◆ scn_hash_tbl()

std::map< coff::section_header_t *, std::size_t > & theo::decomp::decomp_t::scn_hash_tbl ( )

gets the section hash table section header --> hash of the section header ptr.

Returns
the section hash table section header --> hash of the section header ptr.

Definition at line 297 of file decomp.cpp.

297 {
298 return m_scn_hash_tbl;
299}

◆ syms()

recomp::symbol_table_t * theo::decomp::decomp_t::syms ( )

gets the symbol table.

Returns
the symbol table.

Definition at line 293 of file decomp.cpp.

293 {
294 return m_syms;
295}

Referenced by theo::recomp::recomp_t::allocate().


The documentation for this class was generated from the following files: