Theodosius  v3.0
Jit linker, mapper, obfuscator, and mutator
Public Member Functions | List of all members
theo::decomp::decomp_t Class Reference

the main decomposition class which is responsible for breaking down lib file into coff files, and extracted used symbols from the coff files. More...

#include <decomp.hpp>

Public Member Functions

 decomp_t (std::vector< std::uint8_t > &lib, recomp::symbol_table_t *syms)
 the explicit constructor for decomp_t More...
 
std::vector< routine_trtns ()
 gets all of the routine objects. More...
 
std::vector< std::uint8_t > lib ()
 gets a vector of bytes consisting of the lib file. More...
 
std::vector< std::vector< std::uint8_t > > objs ()
 gets all the obj files as a vector of a vector of bytes. More...
 
recomp::symbol_table_tsyms ()
 gets the symbol table. More...
 
std::map< coff::section_header_t *, std::size_t > & scn_hash_tbl ()
 gets the section hash table section header --> hash of the section header ptr. More...
 
std::optional< recomp::symbol_table_t * > decompose (std::string &entry_sym)
 decomposes (extracts) the symbols used. this function determines all used symbols given the entry point. More...
 

Detailed Description

the main decomposition class which is responsible for breaking down lib file into coff files, and extracted used symbols from the coff files.

Constructor & Destructor Documentation

◆ decomp_t()

theo::decomp::decomp_t::decomp_t ( std::vector< std::uint8_t > &  lib,
recomp::symbol_table_t syms 
)
explicit

the explicit constructor for decomp_t

Parameters
libvector of bytes containing the lib file.
symssymbol table that gets populated and managed by this class.
35  : m_lib(lib), m_syms(syms) {}
recomp::symbol_table_t * syms()
gets the symbol table.
Definition: decomp.cpp:293
std::vector< std::uint8_t > lib()
gets a vector of bytes consisting of the lib file.
Definition: decomp.cpp:285

Member Function Documentation

◆ decompose()

std::optional< recomp::symbol_table_t * > theo::decomp::decomp_t::decompose ( std::string &  entry_sym)

decomposes (extracts) the symbols used. this function determines all used symbols given the entry point.

Parameters
entry_symthe entry point symbol name.
Returns
returns an optional pointer to the symbol table. no value in the optional object on failure.
38  {
39  // extract obj files from the archive file...
40  //
41  ar::view<false> lib(m_lib.data(), m_lib.size());
42  std::for_each(
43  lib.begin(), lib.end(),
44  [&](std::pair<std::string_view, ar::entry_t&> itr) {
45  // if the entry isnt the symbol table or the string table
46  // then we know its an obj file...
47  //
48  if (!itr.second.is_symbol_table() && !itr.second.is_string_table()) {
49  spdlog::info("extracted obj from archive: {}", itr.first);
50  std::vector<std::uint8_t> data(itr.second.begin(), itr.second.end());
51  m_objs.push_back(data);
52  }
53  });
54 
55  std::for_each(
56  m_objs.begin(), m_objs.end(), [&](std::vector<std::uint8_t>& img_data) {
57  auto img = reinterpret_cast<coff::image_t*>(img_data.data());
58  for (auto idx = 0u; idx < img->file_header.num_symbols; ++idx) {
59  auto sym = img->get_symbol(idx);
60  if (sym->section_index - 1 > img->file_header.num_sections)
61  continue;
62 
63  auto sym_name = symbol_t::name(img, sym);
64  if (sym_name.length()) {
65  auto sym_hash = symbol_t::hash(sym_name.data());
66  auto sym_size =
67  sym->has_section()
68  ? next_sym(img, img->get_section(sym->section_index - 1),
69  sym)
70  : 0u;
71 
72  m_lookup_tbl[sym_hash].emplace_back(img, sym, sym_size);
73  }
74  }
75  });
76 
77  // extract used symbols from objs and create a nice little set of them so that
78  // we can easily decompose them... no need deal with every single symbol...
79  spdlog::info("extracted {} symbols being used...",
80  ext_used_syms(entry_sym.data()));
81 
82  // generate symbols, populate section hash table, for each object file
83  // extracted from the archive file...
84  //
85  std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) {
86  auto [img, sym, size] = data;
87 
88  // populate section hash table with sections for the img of this
89  // symbol... only populate the hash table if its not been populated for
90  // this obj before...
91  //
92  if (m_processed_objs.emplace(img).second) {
93  for (auto idx = 0u; idx < img->file_header.num_sections; ++idx) {
94  auto scn = img->get_section(idx);
95  auto scn_sym_name =
96  std::string(scn->name.to_string(img->get_strings()))
97  .append("#")
98  .append(std::to_string(idx))
99  .append("!")
100  .append(std::to_string(img->file_header.timedate_stamp));
101 
102  // hash the name of the section + the index + the timestamp of the
103  // obj file it is in...
104  //
105  m_scn_hash_tbl.insert({scn, decomp::symbol_t::hash(scn_sym_name)});
106  }
107  }
108 
109  // if the symbol is a function then we are going to decompose it...
110  // data symbols are handled after this...
111  //
112  if (sym->has_section()) {
113  if (sym->derived_type == coff::derived_type_id::function) {
114  auto scn = img->get_section(sym->section_index - 1);
115  auto dcmp_type =
116  scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME
117  ? decomp::sym_type_t::instruction
118  : decomp::sym_type_t::function;
119 
120  auto fn_size = next_sym(img, scn, sym);
121  auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img) +
122  sym->value;
123 
124  std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
125  decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
126 
127  auto syms = rtn.decompose();
128  m_syms->put_symbols(syms);
129  } else if (sym->storage_class == coff::storage_class_id::public_symbol ||
130  sym->storage_class == coff::storage_class_id::private_symbol) {
131  auto scn = img->get_section(sym->section_index - 1);
132  auto scn_sym = m_syms->sym_from_hash(m_scn_hash_tbl[scn]);
133 
134  // if the section doesnt have a symbol then make one and put it into
135  // the symbol table...
136  //
137  if (!scn_sym.has_value()) {
138  auto scn_sym_name =
139  std::string(scn->name.to_string(img->get_strings()))
140  .append("#")
141  .append(std::to_string(sym->section_index - 1))
142  .append("!")
143  .append(std::to_string(img->file_header.timedate_stamp));
144 
145  std::vector<std::uint8_t> scn_data(scn->size_raw_data);
146  if (scn->characteristics.cnt_uninit_data) {
147  scn_data.insert(scn_data.begin(), scn->size_raw_data, 0);
148  } else {
149  scn_data.insert(
150  scn_data.begin(),
151  reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data,
152  reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data +
153  scn->size_raw_data);
154  }
155 
156  std::vector<recomp::reloc_t> relocs;
157  auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
158  scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(img));
159 
160  for (auto idx = 0u; idx < scn->num_relocs; ++idx) {
161  auto scn_reloc = &scn_relocs[idx];
162  auto sym_reloc = img->get_symbol(scn_relocs[idx].symbol_index);
163  auto sym_name = symbol_t::name(img, sym_reloc);
164  auto sym_hash = decomp::symbol_t::hash(sym_name.data());
165  relocs.push_back(
166  recomp::reloc_t(scn_reloc->virtual_address - sym->value,
167  sym_hash, sym_name.data()));
168  }
169 
170  decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {},
171  relocs, sym_type_t::section);
172 
173  m_syms->put_symbol(new_scn_sym);
174  }
175 
176  // create a symbol for the data...
177  //
178  decomp::symbol_t new_sym(img, symbol_t::name(img, sym).data(),
179  sym->value, {}, scn, sym, {},
181 
182  m_syms->put_symbol(new_sym);
183  }
184  } else if (sym->storage_class ==
185  coff::storage_class_id::
186  external_definition) { // else if the symbol has no
187  // section... these symbols
188  // require the linker to allocate
189  // space for them...
190 
191  std::vector<std::uint8_t> data(sym->value, 0);
192  decomp::symbol_t bss_sym(img, symbol_t::name(img, sym).data(), {}, data,
193  {}, sym, {}, sym_type_t::data);
194 
195  m_syms->put_symbol(bss_sym);
196  }
197  });
198 
199  // return the extract symbols to the caller...
200  //
201  return m_syms;
202 }
std::string name() const
gets the name of the symbol.
Definition: symbol.cpp:52
void put_symbol(decomp::symbol_t &sym)
add symbol to m_table
Definition: symbol_table.cpp:40
std::tuple< coff::image_t *, coff::symbol_t *, std::uint32_t > sym_data_t
meta symbol data. consists of the coff image which contains the coff symbol, the coff symbol itself,...
Definition: decomp.hpp:55
@ data
Definition: symbol.hpp:46

◆ lib()

std::vector< std::uint8_t > theo::decomp::decomp_t::lib ( )

gets a vector of bytes consisting of the lib file.

Returns
a vector of bytes consisting of the lib file.
285  {
286  return m_lib;
287 }

◆ objs()

std::vector< std::vector< std::uint8_t > > theo::decomp::decomp_t::objs ( )

gets all the obj files as a vector of a vector of bytes.

Returns
all the obj files as a vector of a vector of bytes.
289  {
290  return m_objs;
291 }

◆ rtns()

std::vector< routine_t > theo::decomp::decomp_t::rtns ( )

gets all of the routine objects.

Returns
vector of routine objects.
281  {
282  return m_rtns;
283 }

◆ scn_hash_tbl()

std::map< coff::section_header_t *, std::size_t > & theo::decomp::decomp_t::scn_hash_tbl ( )

gets the section hash table section header --> hash of the section header ptr.

Returns
the section hash table section header --> hash of the section header ptr.
297  {
298  return m_scn_hash_tbl;
299 }

◆ syms()

recomp::symbol_table_t * theo::decomp::decomp_t::syms ( )

gets the symbol table.

Returns
the symbol table.
293  {
294  return m_syms;
295 }

The documentation for this class was generated from the following files: