Theodosius v3.0
Jit linker, symbol mapper, and obfuscator
decomp.hpp
Go to the documentation of this file.
1// Copyright (c) 2022, _xeroxz
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9//
10// 2. Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13//
14// 3. Neither the name of the copyright holder nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28// POSSIBILITY OF SUCH DAMAGE.
29//
30
31#pragma once
32#include <spdlog/spdlog.h>
33#include <cstdint>
34#include <linuxpe>
35#include <optional>
36#include <set>
37#include <tuple>
38#include <vector>
39
40#include <decomp/routine.hpp>
42
43#include <coff/archive.hpp>
44#include <coff/image.hpp>
45
46/// <summary>
47/// the namespace that contains all of the decomposition related code.
48/// </summary>
49namespace theo::decomp {
50
51/// <summary>
52/// meta symbol data. consists of the coff image which contains the coff symbol,
53/// the coff symbol itself, and the size (if any) of the symbol.
54/// </summary>
55using sym_data_t = std::tuple<coff::image_t*, coff::symbol_t*, std::uint32_t>;
56
57/// <summary>
58/// the main decomposition class which is responsible for breaking down lib file
59/// into coff files, and extracted used symbols from the coff files.
60/// </summary>
61class decomp_t {
62 public:
63 /// <summary>
64 /// the explicit constructor for decomp_t
65 /// </summary>
66 /// <param name="lib">vector of bytes containing the lib file.</param>
67 /// <param name="syms">symbol table that gets populated and managed by this
68 /// class.</param>
69 explicit decomp_t(std::vector<std::uint8_t>& lib,
71
72 /// <summary>
73 /// gets all of the routine objects.
74 /// </summary>
75 /// <returns>vector of routine objects.</returns>
76 std::vector<routine_t> rtns();
77
78 /// <summary>
79 /// gets a vector of bytes consisting of the lib file.
80 /// </summary>
81 /// <returns>a vector of bytes consisting of the lib file.</returns>
82 std::vector<std::uint8_t> lib();
83
84 /// <summary>
85 /// gets all the obj files as a vector of a vector of bytes.
86 /// </summary>
87 /// <returns>all the obj files as a vector of a vector of bytes.</returns>
88 std::vector<std::vector<std::uint8_t>> objs();
89
90 /// <summary>
91 /// gets the symbol table.
92 /// </summary>
93 /// <returns>the symbol table.</returns>
95
96 /// <summary>
97 /// gets the section hash table section header --> hash of the section header
98 /// ptr.
99 /// </summary>
100 /// <returns>the section hash table section header --> hash of the section
101 /// header ptr.</returns>
102 std::map<coff::section_header_t*, std::size_t>& scn_hash_tbl();
103
104 /// <summary>
105 /// decomposes (extracts) the symbols used. this function determines all used
106 /// symbols given the entry point.
107 /// </summary>
108 /// <param name="entry_sym">the entry point symbol name.</param>
109 /// <returns>returns an optional pointer to the symbol table. no value in the
110 /// optional object on failure.</returns>
111 std::optional<recomp::symbol_table_t*> decompose(std::string& entry_sym);
112
113 private:
114 /// <summary>
115 /// extracts used symbols from coff files.
116 /// </summary>
117 /// <param name="entry_sym">the entry point symbol name</param>
118 /// <returns>number of symbols used</returns>
119 std::uint32_t ext_used_syms(const std::string&& entry_sym);
120
121 /// <summary>
122 /// get symbol meta data by name.
123 /// </summary>
124 /// <param name="name">symbol name</param>
125 /// <returns>optional symbol meta data if it exists.</returns>
126 std::optional<sym_data_t> get_symbol(const std::string_view& name);
127
128 /// <summary>
129 /// the next symbol in the section.
130 /// </summary>
131 /// <param name="img">coff image that contains the symbol.</param>
132 /// <param name="hdr">coff section header of the section that contains the
133 /// symbol.</param>
134 /// <param name="s">symbol in which to get the next one of.</param>
135 /// <returns>offset into the section where the next symbol is at.</returns>
136 std::uint32_t next_sym(coff::image_t* img,
137 coff::section_header_t* hdr,
138 coff::symbol_t* s);
139
140 const std::vector<std::uint8_t> m_lib;
141 std::vector<std::vector<std::uint8_t>> m_objs;
142 std::vector<routine_t> m_rtns;
143 std::set<sym_data_t> m_used_syms;
144 std::set<coff::image_t*> m_processed_objs;
145 std::map<coff::section_header_t*, std::size_t> m_scn_hash_tbl;
146 std::map<std::size_t, std::vector<sym_data_t>> m_lookup_tbl;
148};
149} // namespace theo::decomp