OLLVM 之指令替换源码学习

指令替换，Instruction-Substitution（sub），是一种比较简单的混淆方式，会将代码中一些简单的数学运算复杂化，但这种方式容易被代码优化给去除，且目前 OLLVM 只实现对整数运算的混淆。如：

原式子：
a=b+c
混淆后：
a = b - (-c)
a = -(-b + (-c))
r = rand (); a = b + r; a = a + c; a = a - r
r = rand (); a = b - r; a = a + b; a = a + r

原式子：
a = b-c
混淆后：
a = b + (-c)
r = rand (); a = b + r; a = a - c; a = a - r
r = rand (); a = b - r; a = a - c; a = a + r

原指令：
a = b & c
混淆后：
a = (b ^ ~c) & b
a = ~(~a | ~b) & (r | ~r)

原指令：
a = b | c
混淆后：
a = (b & c) | (b ^ c)
a = (((~a & r) | (a & ~r)) ^ ((~b & r) | (b & ~r))) | (~(~a | ~b) & (r | ~r))

原指令：
a = a ^ b
混淆后：
a = (~a & b) | (a & ~b)
a = ((~a & r) | (a & ~r)) ^ ((~b & r) | (b & ~r))

-mllvm -sub : activate instructions substitution
-mllvm -sub_loop=3 : if the pass is activated, applies it 3 times on a function. Default : 1

OLLVM指令替换混淆源码位置：OLLVMCODE\lib\Transforms\Obfuscation\Substitution.cpp

0x01 宏定义与声明

开头，定义了5个宏，指示5种基本运算操作分别有多少种混淆方法。然后声明Substitution类，继承自FunctionPass，在Substitution类的声明处声明了5个指针数组，分别是Add, Sub, And, Or, Xor操作变换的函数指针。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43


#define NUMBER_ADD_SUBST 4    // Add操作有4种混淆方法
#define NUMBER_SUB_SUBST 3    // Sub操作有3种混淆方法
#define NUMBER_AND_SUBST 2    // And操作有2种混淆方法
#define NUMBER_OR_SUBST 2     // OR操作有2种混淆方法
#define NUMBER_XOR_SUBST 2    // XOR操作有2种混淆方法

namespace {

struct Substitution : public FunctionPass {
  static char ID; // Pass identification, replacement for typeid
  // 声明了5个指针数组，分别是Add, Sub, And, Or, Xor操作变换的函数指针
  void (Substitution::*funcAdd[NUMBER_ADD_SUBST])(BinaryOperator *bo);
  void (Substitution::*funcSub[NUMBER_SUB_SUBST])(BinaryOperator *bo);
  void (Substitution::*funcAnd[NUMBER_AND_SUBST])(BinaryOperator *bo);
  void (Substitution::*funcOr[NUMBER_OR_SUBST])(BinaryOperator *bo);
  void (Substitution::*funcXor[NUMBER_XOR_SUBST])(BinaryOperator *bo);
  bool flag;

  Substitution() : FunctionPass(ID) {}

  // 构造函数中对这5个指针数组初始化，填充混淆函数
  Substitution(bool flag) : FunctionPass(ID) {
    this->flag = flag;
    funcAdd[0] = &Substitution::addNeg;
    funcAdd[1] = &Substitution::addDoubleNeg;
    funcAdd[2] = &Substitution::addRand;
    funcAdd[3] = &Substitution::addRand2;

    funcSub[0] = &Substitution::subNeg;
    funcSub[1] = &Substitution::subRand;
    funcSub[2] = &Substitution::subRand2;

    funcAnd[0] = &Substitution::andSubstitution;
    funcAnd[1] = &Substitution::andSubstitutionRand;

    funcOr[0] = &Substitution::orSubstitution;
    funcOr[1] = &Substitution::orSubstitutionRand;

    funcXor[0] = &Substitution::xorSubstitution;
    funcXor[1] = &Substitution::xorSubstitutionRand;
  }
};
}

0x02 runOnFunction函数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17


bool Substitution::runOnFunction(Function &F) {
   // Check if the percentage is correct
   // 验证了 -mllvm -sub_loop=x 这个编译参数的正确性
   if (ObfTimes <= 0) {         
     errs()<<"Substitution application number -sub_loop=x must be x > 0";
	 return false;
   }

  Function *tmp = &F;
  // Do we obfuscate
  if (toObfuscate(flag, tmp, "sub")) {    // 调用toObfuscate函数判断是否需要进行指令替换混淆
    substitute(tmp);                      // 对函数执行指令替换混淆
	return true;
  }

  return false;
}

在 toObfuscate 函数中，主要做两个判断来决定是否要进行混淆：

判定flag是否为true, 也就是编译命令中是否有 -mllvm -sub命令，
判定当前函数是否有 attribute((annotate((“sub”))))的标记

0x03 Substitution::substitute函数

在这个函数中，会进行具体的指令拆分

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44


bool Substitution::substitute(Function *f) {
  Function *tmp = f;

  // Loop for the number of time we run the pass on the function
  int times = ObfTimes;     // 混淆次数
  do {                      // do-while循环根据传入的混淆次数参数对函数进行指定次数的混淆
    for (Function::iterator bb = tmp->begin(); bb != tmp->end(); ++bb) {            // 遍历函数中所有基本块
      for (BasicBlock::iterator inst = bb->begin(); inst != bb->end(); ++inst) {    // 遍历基本块中所有的指令
        if (inst->isBinaryOp()) {                                                   // 判断当前指令是否为二进制操作
          switch (inst->getOpcode()) {                                              // 获取当前二进制操作指令  并根据指令类型进入对应的case分支
          case BinaryOperator::Add:
            (this->*funcAdd[llvm::cryptoutils->get_range(NUMBER_ADD_SUBST)])(       // 用llvm::cryptoutils->get_range获取一个随机数，随机选取函数指针数组中的混淆函数
                cast<BinaryOperator>(inst));
            ++Add;
            break;
          case BinaryOperator::Sub:
            (this->*funcSub[llvm::cryptoutils->get_range(NUMBER_SUB_SUBST)])(
                cast<BinaryOperator>(inst));
            ++Sub;
            break;
          case Instruction::And:
            (this->*
             funcAnd[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst));
            ++And;
            break;
          case Instruction::Or:
            (this->*
             funcOr[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst));
            ++Or;
            break;
          case Instruction::Xor:
            (this->*
             funcXor[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst));
            ++Xor;
            break;
          default:
            break;
          }              // End switch
        }                // End isBinaryOp
      }                  // End for basickblock
    }                    // End for Function
  } while (--times > 0); // for times
  return false;
}

可以看到，逻辑非常简单，就是遍历函数内所有指令，如果是二进制操作指令，就随机从对应的混淆函数指针数组中选取一个对指令进行混淆。

然后后面就是各个混淆函数的内容了，这真没什么好说的，是个人都能看懂，看不懂来砍我（×

0x04 魔改思路

SUB的源码比BCF的源码好理解太多了。读完后，可以看到问题还是挺明显的。首先，OLLVM只对加、减、与、或、异或这五种操作做了混淆，乘除浮点数操作这些都没有。而且哪怕是这五种基本的操作也是单调的，总共也只有那么几种混淆函数，且都不是很复杂。

将基本运算的混淆复杂化
添加对乘除的混淆
添加对浮点数的混淆
可以将所有的整数运算混淆成浮点数，然后再加一些指令替换，再配合BCF插一些花指令让IDA不能F5 逆向的时候遇到那种全是浮点数各种处理还不能F5的题目，就是极致恶心