Theodosius  v3.0
Jit linker, mapper, obfuscator, and mutator
routine.cpp
Go to the documentation of this file.
1 // Copyright (c) 2022, _xeroxz
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // 1. Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 //
10 // 2. Redistributions in binary form must reproduce the above copyright notice,
11 // this list of conditions and the following disclaimer in the documentation
12 // and/or other materials provided with the distribution.
13 //
14 // 3. Neither the name of the copyright holder nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 // POSSIBILITY OF SUCH DAMAGE.
29 //
30 
31 #include <decomp/routine.hpp>
32 
33 namespace theo::decomp {
34 routine_t::routine_t(coff::symbol_t* sym,
35  coff::image_t* img,
36  coff::section_header_t* scn,
37  std::vector<std::uint8_t>& fn,
38  sym_type_t dcmp_type)
39  : m_img(img), m_scn(scn), m_data(fn), m_dcmp_type(dcmp_type), m_sym(sym) {}
40 
41 std::vector<decomp::symbol_t> routine_t::decompose() {
42  std::vector<decomp::symbol_t> result;
43 
44  switch (m_dcmp_type) {
45  case function: {
46  std::vector<recomp::reloc_t> relocs;
47  auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
48  m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
49 
50  for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) {
51  auto scn_reloc = &scn_relocs[idx];
52  // if the reloc is in the current function...
53  if (scn_reloc->virtual_address >= m_sym->value &&
54  scn_reloc->virtual_address < m_sym->value + m_data.size()) {
55  auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index);
56  auto sym_name = symbol_t::name(m_img, sym_reloc);
57  auto sym_hash = decomp::symbol_t::hash(sym_name.data());
58  relocs.push_back(
59  recomp::reloc_t(scn_reloc->virtual_address - m_sym->value,
60  sym_hash, sym_name.data()));
61  }
62  }
63 
64  result.push_back(decomp::symbol_t(
65  m_img, symbol_t::name(m_img, m_sym).data(), m_sym->value, m_data,
66  m_scn, m_sym, relocs, sym_type_t::function));
67  break;
68  }
69  case instruction: {
70  std::uint32_t offset = {};
71  xed_error_enum_t err;
72 
73  xed_decoded_inst_t instr;
74  xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b};
75  xed_decoded_inst_zero_set_mode(&instr, &istate);
76 
77  // keep looping over the section, lower the number of bytes each time...
78  //
79  while ((err = xed_decode(&instr, m_data.data() + offset,
80  m_data.size() - offset)) == XED_ERROR_NONE) {
81  // symbol name is of the format: symbol@instroffset, I.E: main@11...
82  //
83  auto new_sym_name = symbol_t::name(m_img, m_sym);
84 
85  // first instruction doesnt need the @offset...
86  //
87  if (offset)
88  new_sym_name.append("@").append(std::to_string(offset));
89 
90  std::vector<recomp::reloc_t> relocs;
91  auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
92  m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
93 
94  // find if this instruction has a relocation or not...
95  // if so, return the reloc_t...
96  //
97  auto reloc = std::find_if(
98  scn_relocs, scn_relocs + m_scn->num_relocs,
99  [&](coff::reloc_t reloc) {
100  return reloc.virtual_address >= m_sym->value + offset &&
101  reloc.virtual_address <
102  m_sym->value + offset +
103  xed_decoded_inst_get_length(&instr);
104  });
105 
106  // if there is indeed a reloc for this instruction...
107  //
108  if (reloc != scn_relocs + m_scn->num_relocs) {
109  auto sym_reloc = m_img->get_symbol(reloc->symbol_index);
110  auto sym_name = symbol_t::name(m_img, sym_reloc);
111  auto sym_hash = decomp::symbol_t::hash(sym_name.data());
112  auto reloc_offset = reloc->virtual_address - m_sym->value - offset;
113 
114  relocs.push_back(
115  recomp::reloc_t(reloc_offset, sym_hash, sym_name.data()));
116  }
117 
118  // add a reloc to the next instruction...
119  // note that the offset is ZERO... comp_t will understand that
120  // relocs with offset ZERO means the next instructions...
121  //
122  auto next_inst_sym =
123  symbol_t::name(m_img, m_sym)
124  .append("@")
125  .append(std::to_string(offset +
126  xed_decoded_inst_get_length(&instr)));
127 
128  relocs.push_back(recomp::reloc_t(
129  0, decomp::symbol_t::hash(next_inst_sym), next_inst_sym.data()));
130 
131  // get the instructions bytes
132  //
133  std::vector<std::uint8_t> inst_bytes(
134  m_data.data() + offset,
135  m_data.data() + offset + xed_decoded_inst_get_length(&instr));
136 
137  result.push_back(decomp::symbol_t(m_img, new_sym_name, offset,
138  inst_bytes, m_scn, m_sym, relocs,
140 
141  // after creating the symbol and dealing with relocs then print the
142  // information we have concluded...
143  //
144  char buff[255];
145  offset += xed_decoded_inst_get_length(&instr);
146  xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, NULL,
147  NULL, NULL);
148 
149  spdlog::info("{}: {}", new_sym_name, buff);
150  // need to set this so that instr can be used to decode again...
151  xed_decoded_inst_zero_set_mode(&instr, &istate);
152  }
153 
154  // remove the relocation to the next symbol from the last instruction
155  //
156  auto& last_inst = result.back();
157  auto& last_inst_relocs = last_inst.relocs();
158  last_inst_relocs.erase(last_inst_relocs.end() - 1);
159  break;
160  }
161  default:
162  break;
163  }
164 
165  return result;
166 }
167 
168 coff::section_header_t* routine_t::scn() {
169  return m_scn;
170 }
171 
172 std::vector<std::uint8_t> routine_t::data() {
173  return m_data;
174 }
175 } // namespace theo::decomp
std::vector< decomp::symbol_t > decompose()
decompose the function into symbol(s).
Definition: routine.cpp:41
std::vector< std::uint8_t > data()
gets the function bytes.
Definition: routine.cpp:172
routine_t(coff::symbol_t *sym, coff::image_t *img, coff::section_header_t *scn, std::vector< std::uint8_t > &fn, sym_type_t dcmp_type)
the explicit constructor for routine_t.
Definition: routine.cpp:34
coff::section_header_t * scn()
gets the section header of the section in which the symbol is located in.
Definition: routine.cpp:168
symbol_t is an abstraction upon the coff symbol. this allows for easier manipulation of the symbol....
Definition: symbol.hpp:53
std::string name() const
gets the name of the symbol.
Definition: symbol.cpp:52
std::size_t hash()
gets the hash of the symbol name.
Definition: symbol.cpp:88
meta data about a relocation for a symbol
Definition: reloc.hpp:41
the namespace that contains all of the decomposition related code.
Definition: decomp.hpp:49
sym_type_t
meta symbol type. this is an abstraction upon the coff symbol storage/class type.
Definition: symbol.hpp:43