Theodosius v3.0
Jit linker, symbol mapper, and obfuscator
routine.cpp
Go to the documentation of this file.
1// Copyright (c) 2022, _xeroxz
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9//
10// 2. Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13//
14// 3. Neither the name of the copyright holder nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28// POSSIBILITY OF SUCH DAMAGE.
29//
30
31#include <decomp/routine.hpp>
32
33namespace theo::decomp {
34routine_t::routine_t(coff::symbol_t* sym,
35 coff::image_t* img,
36 coff::section_header_t* scn,
37 std::vector<std::uint8_t>& fn,
38 sym_type_t dcmp_type)
39 : m_img(img), m_scn(scn), m_data(fn), m_dcmp_type(dcmp_type), m_sym(sym) {}
40
41std::vector<decomp::symbol_t> routine_t::decompose() {
42 std::vector<decomp::symbol_t> result;
43
44 switch (m_dcmp_type) {
45 case function: {
46 std::vector<recomp::reloc_t> relocs;
47 auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
48 m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
49
50 for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) {
51 auto scn_reloc = &scn_relocs[idx];
52 // if the reloc is in the current function...
53 if (scn_reloc->virtual_address >= m_sym->value &&
54 scn_reloc->virtual_address < m_sym->value + m_data.size()) {
55 auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index);
56 auto sym_name = symbol_t::name(m_img, sym_reloc);
57 auto sym_hash = decomp::symbol_t::hash(sym_name.data());
58 relocs.push_back(
59 recomp::reloc_t(scn_reloc->virtual_address - m_sym->value,
60 sym_hash, sym_name.data()));
61 }
62 }
63
64 result.push_back(decomp::symbol_t(
65 m_img, symbol_t::name(m_img, m_sym).data(), m_sym->value, m_data,
66 m_scn, m_sym, relocs, sym_type_t::function));
67 break;
68 }
69 case instruction: {
70 std::uint32_t offset = {};
71 xed_error_enum_t err;
72
73 xed_decoded_inst_t instr;
74 xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b};
75 xed_decoded_inst_zero_set_mode(&instr, &istate);
76
77 // keep looping over the section, lower the number of bytes each time...
78 //
79 while ((err = xed_decode(&instr, m_data.data() + offset,
80 m_data.size() - offset)) == XED_ERROR_NONE) {
81 // symbol name is of the format: symbol@instroffset, I.E: main@11...
82 //
83 auto new_sym_name = symbol_t::name(m_img, m_sym);
84
85 // first instruction doesnt need the @offset...
86 //
87 if (offset)
88 new_sym_name.append("@").append(std::to_string(offset));
89
90 std::vector<recomp::reloc_t> relocs;
91 auto scn_relocs = reinterpret_cast<coff::reloc_t*>(
92 m_scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(m_img));
93
94 // find if this instruction has a relocation or not...
95 // if so, return the reloc_t...
96 //
97 auto reloc = std::find_if(
98 scn_relocs, scn_relocs + m_scn->num_relocs,
99 [&](coff::reloc_t reloc) {
100 return reloc.virtual_address >= m_sym->value + offset &&
101 reloc.virtual_address <
102 m_sym->value + offset +
103 xed_decoded_inst_get_length(&instr);
104 });
105
106 // if there is indeed a reloc for this instruction...
107 //
108 if (reloc != scn_relocs + m_scn->num_relocs) {
109 auto sym_reloc = m_img->get_symbol(reloc->symbol_index);
110 auto sym_name = symbol_t::name(m_img, sym_reloc);
111 auto sym_hash = decomp::symbol_t::hash(sym_name.data());
112 auto reloc_offset = reloc->virtual_address - m_sym->value - offset;
113
114 relocs.push_back(
115 recomp::reloc_t(reloc_offset, sym_hash, sym_name.data()));
116 }
117
118 // add a reloc to the next instruction...
119 // note that the offset is ZERO... comp_t will understand that
120 // relocs with offset ZERO means the next instructions...
121 //
122 auto next_inst_sym =
123 symbol_t::name(m_img, m_sym)
124 .append("@")
125 .append(std::to_string(offset +
126 xed_decoded_inst_get_length(&instr)));
127
128 relocs.push_back(recomp::reloc_t(
129 0, decomp::symbol_t::hash(next_inst_sym), next_inst_sym.data()));
130
131 // get the instructions bytes
132 //
133 std::vector<std::uint8_t> inst_bytes(
134 m_data.data() + offset,
135 m_data.data() + offset + xed_decoded_inst_get_length(&instr));
136
137 result.push_back(decomp::symbol_t(m_img, new_sym_name, offset,
138 inst_bytes, m_scn, m_sym, relocs,
140
141 // after creating the symbol and dealing with relocs then print the
142 // information we have concluded...
143 //
144 char buff[255];
145 offset += xed_decoded_inst_get_length(&instr);
146 xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, NULL,
147 NULL, NULL);
148
149 spdlog::info("{}: {}", new_sym_name, buff);
150 // need to set this so that instr can be used to decode again...
151 xed_decoded_inst_zero_set_mode(&instr, &istate);
152 }
153
154 // remove the relocation to the next symbol from the last instruction
155 //
156 auto& last_inst = result.back();
157 auto& last_inst_relocs = last_inst.relocs();
158 last_inst_relocs.erase(last_inst_relocs.end() - 1);
159 break;
160 }
161 default:
162 break;
163 }
164
165 return result;
166}
167
168coff::section_header_t* routine_t::scn() {
169 return m_scn;
170}
171
172std::vector<std::uint8_t> routine_t::data() {
173 return m_data;
174}
175} // namespace theo::decomp