From 966d1e814c915520d4a676ea340e287f31d602ac Mon Sep 17 00:00:00 2001 From: _xeroxz Date: Thu, 21 Apr 2022 17:49:08 -0700 Subject: [PATCH] almost done writing docs. i need to add license to each header file and create a doxygen... --- include/decomp/symbol.hpp | 11 +++ include/obf/engine.hpp | 25 ++++++- include/obf/pass.hpp | 29 ++++++++ include/obf/passes/jcc_rewrite_pass.hpp | 26 ++++++++ include/obf/passes/next_inst_pass.hpp | 50 ++++++++++++++ include/obf/passes/reloc_transform_pass.hpp | 19 ++++++ include/obf/transform/gen.hpp | 9 +++ include/obf/transform/operation.hpp | 62 ++++++++++++++++- include/obf/transform/transform.hpp | 3 + include/recomp/recomp.hpp | 74 +++++++++++++++++++-- include/recomp/reloc.hpp | 46 +++++++++++++ include/recomp/symbol_table.hpp | 29 +++++--- include/theo.hpp | 12 ++-- src/recomp/recomp.cpp | 1 - src/theo.cpp | 4 +- 15 files changed, 375 insertions(+), 25 deletions(-) diff --git a/include/decomp/symbol.hpp b/include/decomp/symbol.hpp index 069e65c..76b7b87 100644 --- a/include/decomp/symbol.hpp +++ b/include/decomp/symbol.hpp @@ -6,8 +6,19 @@ #include namespace theo::decomp { +/// +/// meta symbol type. this is an abstraction upon the coff symbol storage/class +/// type. +/// enum sym_type_t { function, instruction, data, section }; +/// +/// symbol_t is an abstraction upon the coff symbol. this allows for easier +/// manipulation of the symbol. symbols can be different things, sections, +/// functions, and even instructions (when functions are broken down). +/// +/// this class is used throughout theodosius and is a keystone of the project. +/// class symbol_t { public: explicit symbol_t(coff::image_t* img, diff --git a/include/obf/engine.hpp b/include/obf/engine.hpp index 7c3e590..eb37bfc 100644 --- a/include/obf/engine.hpp +++ b/include/obf/engine.hpp @@ -4,15 +4,38 @@ #include namespace theo::obf { + +/// +/// singleton obfuscation engine class. this class is responsible for keeping +/// track of the registered passes and the order in which to execute them. +/// class engine_t { explicit engine_t(){}; public: + /// + /// get the singleton object of this class. + /// + /// the singleton object of this class. static engine_t* get(); + + /// + /// add a pass to the engine. the order in which you call this function + /// matters as the underlying data structure that contains the passes is a + /// vector. + /// + /// a pointer to the pass in which to add to the + /// engine. void add_pass(pass_t* pass); + + /// + /// run all the passes on the symbol. this function will only run a pass if + /// the symbol is the same type as the pass requires. + /// + /// symbol to run all passes on. void run(decomp::symbol_t* sym); private: std::vector passes; }; -} // namespace theo::comp::obf \ No newline at end of file +} // namespace theo::obf \ No newline at end of file diff --git a/include/obf/pass.hpp b/include/obf/pass.hpp index e210f4e..7d8fbe5 100644 --- a/include/obf/pass.hpp +++ b/include/obf/pass.hpp @@ -9,13 +9,42 @@ extern "C" { #include } +/// +/// this is the main namespace for obfuscation related things. +/// namespace theo::obf { + +/// +/// the pass_t class is a base clase for all passes made. you must override the +/// pass_t::run virtual function and declare the logic of your pass there. +/// +/// in the constructor of your pass you must call the super constructor (the +/// pass_t constructor) and pass it the type of symbol which you are interesting +/// in receiving. +/// class pass_t { public: + /// + /// the explicit constructor of the pass_t base class. + /// + /// the type of symbol in which the pass will run on. + /// every symbol passed to the virtual "run" instruction will be of this + /// type. explicit pass_t(decomp::sym_type_t sym_type) : m_sym_type(sym_type){}; + + /// + /// virtual method which must be implimented by the pass that inherits this + /// class. + /// + /// a symbol of the same type of m_sym_type. virtual void run(decomp::symbol_t* sym) = 0; + /// + /// gets the passes symbol type. + /// + /// the passes symbol type. decomp::sym_type_t sym_type() { return m_sym_type; } + private: decomp::sym_type_t m_sym_type; }; diff --git a/include/obf/passes/jcc_rewrite_pass.hpp b/include/obf/passes/jcc_rewrite_pass.hpp index b8388d0..7a8b60b 100644 --- a/include/obf/passes/jcc_rewrite_pass.hpp +++ b/include/obf/passes/jcc_rewrite_pass.hpp @@ -2,6 +2,32 @@ #include namespace theo::obf { +/// +/// jcc rewrite pass which rewrites rip relative jcc's so that they are position +/// independent. +/// +/// given the following code: +/// +/// jnz label1 +/// ; other code goes here +/// label1: +/// ; more code here +/// +/// the jnz instruction will be rewritten so that the following code is +/// generated: +/// +/// jnz br2 +/// br1: +/// jmp [rip] ; address after this instruction contains the address +/// ; of the instruction after the jcc. +/// br2: +/// jmp [rip] ; address after this instruction contains the address of where +/// ; branch 2 is located. +/// +/// its important to note that other passes will encrypt (transform) the address +/// of the next instruction. There is actually no jmp [rip] either, push/ret is +/// used. +/// class jcc_rewrite_pass_t : public pass_t { explicit jcc_rewrite_pass_t() : pass_t(decomp::sym_type_t::instruction){}; diff --git a/include/obf/passes/next_inst_pass.hpp b/include/obf/passes/next_inst_pass.hpp index 6fa7456..31fcafd 100644 --- a/include/obf/passes/next_inst_pass.hpp +++ b/include/obf/passes/next_inst_pass.hpp @@ -2,6 +2,56 @@ #include namespace theo::obf { +/// +/// This pass is used to generate transformations and jmp code to change RIP to +/// the next instruction. +/// +/// given the following code (get pml4 address from cr3): +/// +/// get_pml4: +/// 0: 48 c7 c0 ff 0f 00 00 mov rax,0xfff +/// 7: 48 f7 d0 not rax +/// a: 0f 20 da mov rdx,cr3 +/// d: 48 21 c2 and rdx,rax +/// 10: b1 00 mov cl,0x0 +/// 12: 48 d3 e2 shl rdx,cl +/// 15: 48 89 d0 mov rax,rdx +/// 18: c3 ret +/// +/// this pass will break up each instruction so that it can be anywhere in a +/// linear virtual address space. this pass will not work on rip relative code, +/// however clang will not generate such code when compiled with +/// "-mcmodel=large" +/// +/// get_pml4@0: +/// mov rax, 0xFFF +/// push [next_inst_addr_enc] +/// xor [rsp], 0x3243342 +/// ; a random number of transformations here... +/// ret +/// next_inst_addr_enc: +/// ; encrypted address of the next instruction goes here. +/// +/// get_pml4@7: +/// not rax +/// push [next_inst_addr_enc] +/// xor [rsp], 0x93983498 +/// ; a random number of transformations here... +/// ret +/// next_inst_addr_enc: +/// ; encrypted address of the next instruction goes here. +/// +/// this process is continued for each instruction in the function. the last +/// instruction "ret" will have no code generated for it as there is no next +/// instruction. +/// +/// +/// this pass also only runs at the instruction level, theodosius internally +/// breaks up functions inside of the ".split" section into individual +/// instruction symbols. this process also creates a psuedo relocation which +/// simply tells this pass that there needs to be a relocation to the next +/// symbol. the offset for these psuedo relocations is zero. +/// class next_inst_pass_t : public pass_t { explicit next_inst_pass_t() : pass_t(decomp::sym_type_t::instruction) { xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b}; diff --git a/include/obf/passes/reloc_transform_pass.hpp b/include/obf/passes/reloc_transform_pass.hpp index de68c4e..67a14fd 100644 --- a/include/obf/passes/reloc_transform_pass.hpp +++ b/include/obf/passes/reloc_transform_pass.hpp @@ -4,6 +4,25 @@ #include namespace theo::obf { +/// +/// this pass is like the next_inst_pass, however, relocations are encrypted +/// with transformations instead of the address of the next instruction. this +/// pass only runs at the instruction level and appends transformations into the +/// reloc_t object of the instruction symbol. +/// +/// given the following code: +/// +/// mov rax, &MessageBoxA +/// +/// this pass will generate a random number of transformations to encrypt the +/// address of "MessageBoxA". These transformations will then be applied by +/// theodosius internally when resolving relocations. +/// +/// mov rax, enc_MessageBoxA +/// xor rax, 0x389284324 +/// add rax, 0x345332567 +/// ror rax, 0x5353 +/// class reloc_transform_pass_t : public pass_t { explicit reloc_transform_pass_t() : pass_t(decomp::sym_type_t::instruction){}; diff --git a/include/obf/transform/gen.hpp b/include/obf/transform/gen.hpp index 65b6e23..1c53cd3 100644 --- a/include/obf/transform/gen.hpp +++ b/include/obf/transform/gen.hpp @@ -3,6 +3,15 @@ #include namespace theo::obf::transform { +/// +/// generate a sequence of transformations given an instruction that has a +/// relocation in it. +/// +/// instruction that has a relocation in it. +/// meta data relocation object for the instruction. +/// lowest number of transformations to generate. +/// highest number of transformations to generate. +/// inline std::vector generate(xed_decoded_inst_t* inst, recomp::reloc_t* reloc, std::uint8_t low, diff --git a/include/obf/transform/operation.hpp b/include/obf/transform/operation.hpp index 5c73738..dd1e9c2 100644 --- a/include/obf/transform/operation.hpp +++ b/include/obf/transform/operation.hpp @@ -1,10 +1,10 @@ #pragma once #include +#include +#include #include #include #include -#include -#include #define XED_ENCODER extern "C" { @@ -12,14 +12,50 @@ extern "C" { #include } +/// +/// this namespace encompasses the code for transforming relocations. +/// namespace theo::obf::transform { + +/// +/// lambda function which takes in a 64bit value (relocation address) and a +/// 32bit value (random value used in transformation). +/// using transform_t = std::function; +/// +/// operation_t is the base class for all types of transformations. classes that +/// inherit this class are singleton and simply call the super constructor +/// (operation_t::operation_t). +/// class operation_t { public: + /// + /// explicit constructor for operation_t + /// + /// lambda function when executed applies + /// transformations. type of transformation, such + /// as XOR, ADD, SUB, etc... explicit operation_t(transform_t op, xed_iclass_enum_t type) : m_transform(op), m_type(type) {} + /// + /// generates a native transform instruction given an existing instruction. it + /// works like so: + /// + /// mov rax, &MessageBoxA ; original instruction with relocation + /// + /// ; this function takes the first operand and out of the original + /// ; instruction and uses it to generate a transformation. + /// + /// xor rax, 0x39280928 ; this would be an example output for the xor + /// ;operation. + /// + /// + /// instruction with a relocation to generate a + /// transformation for. random 32bit number used in + /// the generate transform. returns the bytes of the native + /// instruction that was encoded. std::vector native(const xed_decoded_inst_t* inst, std::uint32_t imm) { std::uint32_t inst_len = {}; @@ -53,10 +89,32 @@ class operation_t { return std::vector(inst_buff, inst_buff + inst_len); } + /// + /// gets the inverse operation of the current operation. + /// + /// the inverse operation of the current operation. xed_iclass_enum_t inverse() { return m_inverse_op[m_type]; } + + /// + /// gets a pointer to the lambda function which contains the transform logic. + /// + /// a pointer to the lambda function which contains the transform + /// logic. transform_t* get_transform() { return &m_transform; } + + /// + /// gets the operation type. such as XED_ICLASS_ADD, XED_ICLASS_SUB, etc... + /// + /// the operation type. such as XED_ICLASS_ADD, XED_ICLASS_SUB, + /// etc... xed_iclass_enum_t type() { return m_type; } + /// + /// generate a random number in a range. + /// + /// lowest value of the range. + /// highest value of the range. + /// a random value in a range. static std::size_t random(std::size_t lowest, std::size_t largest) { std::random_device rd; std::mt19937 gen(rd()); diff --git a/include/obf/transform/transform.hpp b/include/obf/transform/transform.hpp index 6e79d82..656d4c0 100644 --- a/include/obf/transform/transform.hpp +++ b/include/obf/transform/transform.hpp @@ -6,6 +6,9 @@ #include namespace theo::obf::transform { +/// +/// map of all of the operations and their type. +/// inline std::map operations = { {XED_ICLASS_ADD, add_op_t::get()}, {XED_ICLASS_SUB, sub_op_t::get()}, diff --git a/include/recomp/recomp.hpp b/include/recomp/recomp.hpp index 3416509..791fc90 100644 --- a/include/recomp/recomp.hpp +++ b/include/recomp/recomp.hpp @@ -1,37 +1,97 @@ #pragma once +#include #include #include -#include +/// +/// this namespace encompasses all recomposition related code. +/// namespace theo::recomp { + +/// +/// a function which is called by recomp_t to resolve external symbols +/// using resolver_t = std::function; + +/// +/// a function which is called by recomp_t to copy symbols into memory. +/// using copier_t = std::function; + +/// +/// a function which is called to allocate space for a symbol. +/// +/// the first param is the size of the symbol, the second param is the +/// characteristics of the section which the symbol is allocated in. +/// using allocator_t = std::function; +/// +/// the main class responsible for recomposition +/// class recomp_t { public: - explicit recomp_t(decomp::decomp_t* dcmp); + /// + /// the explicit constructor for the recomp_t class. + /// + /// pointer to a decomp_t class. + /// lambda function which is used to allocate memory for + /// symbols. lambda function used to copy bytes + /// into allocations. lambda function used to + /// resolve external symbols. explicit recomp_t(decomp::decomp_t* dcmp, - allocator_t alloc, - copier_t copy, - resolver_t resolve); + allocator_t alloc, + copier_t copy, + resolver_t resolve); + /// + /// when called, this function allocates space for every symbol. + /// void allocate(); + + /// + /// when called, this function resolves all relocations in every symbol. + /// void resolve(); + + /// + /// when called, this function copies symbols into allocations. + /// void copy_syms(); + /// + /// setter for the allocater lambda function. + /// + /// lambda function which allocates memory for + /// symbols. void allocator(allocator_t alloc); + + /// + /// setter for the copier lambda function. + /// + /// copier lambda function used to copy bytes into + /// allocations made by the allocator. void copier(copier_t copy); + + /// + /// setter for the resolve lambda function. + /// + /// lambda function to resolve external symbols. void resolver(resolver_t resolve); + + /// + /// resolves the address of a function given its name. + /// + /// the name of the symbol to resolve the location + /// of. the address of the symbol. std::uintptr_t resolve(const std::string&& sym); private: - void gen_reloc_trans(decomp::symbol_t* sym); decomp::decomp_t* m_dcmp; resolver_t m_resolver; copier_t m_copier; allocator_t m_allocator; }; -} // namespace theo::comp \ No newline at end of file +} // namespace theo::recomp \ No newline at end of file diff --git a/include/recomp/reloc.hpp b/include/recomp/reloc.hpp index 977dc22..73a395c 100644 --- a/include/recomp/reloc.hpp +++ b/include/recomp/reloc.hpp @@ -4,23 +4,69 @@ #include namespace theo::recomp { + +/// +/// meta data about a relocation for a symbol +/// class reloc_t { public: + /// + /// explicit constructor for this class. + /// + /// offset into the symbol data where the relocation is + /// at. all relocations are assumed to be linear virtual addresses of the + /// symbol. + /// hash of the symbol to which the relocation is + /// of. the name of the symbol to which the + /// relocation is of. explicit reloc_t(std::uint32_t offset, std::size_t hash, const std::string&& sym_name) : m_offset(offset), m_hash(hash), m_sym_name(sym_name) {} + /// + /// returns the hash of the relocation symbol. + /// + /// hash of the relocation symbol std::size_t hash() { return m_hash; } + + /// + /// returns the name of the relocation symbol. + /// + /// returns the name of the relocation symbol. std::string name() { return m_sym_name; } + + /// + /// returns the offset into the symbol to which the relocation will be + /// applied. the offset is in bytes. zero based. + /// + /// returns the offset into the symbol to which the relocation will + /// be applied. the offset is in bytes. zero based. std::uint32_t offset() { return m_offset; } + + /// + /// sets the offset to which the relocation gets applied too. + /// + /// offset to which the relocation gets applied + /// too. void offset(std::uint32_t offset) { m_offset = offset; } + /// + /// adds a transformation to be applied to the relocation prior to writing it + /// into the symbol. + /// + /// a pair containing a lambda function that when executed + /// transforms a relocation. the second value in the pair is a random value + /// which is passed to the lambda. void add_transform( std::pair entry) { m_transforms.push_back(entry); } + /// + /// gets the vector of transformation. + /// + /// returns the vector of transformations. std::vector>& get_transforms() { return m_transforms; diff --git a/include/recomp/symbol_table.hpp b/include/recomp/symbol_table.hpp index 36a6e22..98453aa 100644 --- a/include/recomp/symbol_table.hpp +++ b/include/recomp/symbol_table.hpp @@ -14,7 +14,6 @@ namespace theo::recomp { /// class symbol_table_t { public: - /// /// default constructor. does nothing. /// @@ -33,27 +32,41 @@ class symbol_table_t { void put_symbol(decomp::symbol_t& sym); /// - /// add a vector of symbol to m_table + /// add a vector of symbol to m_table /// /// void put_symbols(std::vector& syms); /// - /// returns an optional pointer to a symbol from the symbol table given the symbols hash (hash of its name) - /// the hash is produced by theo::decomp::symbol_t::hash + /// returns an optional pointer to a symbol from the symbol table given the + /// symbols hash (hash of its name) the hash is produced by + /// theo::decomp::symbol_t::hash /// - /// hashcode of the symbol to get from the symbol table... - /// returns an optional pointer to a theo::decomp::symbol_t + /// hashcode of the symbol to get from the symbol + /// table... returns an optional pointer to a + /// theo::decomp::symbol_t std::optional sym_from_hash(std::size_t hash); /// /// returns an optional pointer to a symbol given its allocation location. /// - /// the address where the symbol is allocated at. - /// returns an optional pointer to a theo::decomp::symbol_t + /// the address where the symbol is allocated + /// at. returns an optional pointer to a + /// theo::decomp::symbol_t std::optional sym_from_alloc(std::uintptr_t allocated_at); + /// + /// this function is a wrapper function that allows you to get at each entry + /// in the symbol table by reference. + /// + /// a callback function that will be called for each + /// symbol void for_each(std::function fn); + + /// + /// returns the size of the symbol table. + /// + /// returns the size of the symbol table. std::uint32_t size(); private: diff --git a/include/theo.hpp b/include/theo.hpp index 3711aa9..bb6cd92 100644 --- a/include/theo.hpp +++ b/include/theo.hpp @@ -34,18 +34,18 @@ using lnk_fns_t = /// the main class which encapsulates a symbol table, decomp, and recomp /// objects. This class is a bridge that connects all three: decomp, obf, /// recomp. -/// +/// /// You will create an object of this type when using theo. /// class theo_t { public: - /// /// explicit constructor for theo class. /// /// a vector of bytes consisting of a lib /// - /// the name of the function which will be used as the entry point + /// the name of the function which will be used as the + /// entry point explicit theo_t(std::vector& lib, lnk_fns_t lnkr_fns, const std::string&& entry_sym); @@ -53,11 +53,13 @@ class theo_t { /// /// decomposes the lib file and return the number of symbols that are used. /// - /// optional amount of symbols that are used. no value if decomposition fails. + /// optional amount of symbols that are used. no value if + /// decomposition fails. std::optional decompose(); /// - /// compose the decomposed module. This will run obfuscation passes, the map and resolve symbols to each other. + /// compose the decomposed module. This will run obfuscation passes, the map + /// and resolve symbols to each other. /// /// returns the address of the entry point symbol std::uintptr_t compose(); diff --git a/src/recomp/recomp.cpp b/src/recomp/recomp.cpp index a1881be..5548aa5 100644 --- a/src/recomp/recomp.cpp +++ b/src/recomp/recomp.cpp @@ -1,7 +1,6 @@ #include namespace theo::recomp { -recomp_t::recomp_t(decomp::decomp_t* dcmp) : m_dcmp(dcmp) {} recomp_t::recomp_t(decomp::decomp_t* dcmp, allocator_t alloc, copier_t copy, diff --git a/src/theo.cpp b/src/theo.cpp index 59e4f1e..a2eaf39 100644 --- a/src/theo.cpp +++ b/src/theo.cpp @@ -4,7 +4,9 @@ namespace theo { theo_t::theo_t(std::vector& lib, lnk_fns_t lnkr_fns, const std::string&& entry_sym) - : m_dcmp(lib, &m_sym_tbl), m_recmp(&m_dcmp), m_entry_sym(entry_sym) { + : m_dcmp(lib, &m_sym_tbl), + m_recmp(&m_dcmp, {}, {}, {}), + m_entry_sym(entry_sym) { m_recmp.allocator(std::get<0>(lnkr_fns)); m_recmp.copier(std::get<1>(lnkr_fns)); m_recmp.resolver(std::get<2>(lnkr_fns));