From e878b01d13db707457ad78ede45e4ea9fd0273ab Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 23 Jan 2026 12:50:49 -0800 Subject: [PATCH 1/4] editing initialize.sh --- .../SuperBenchmark/initialize.sh | 23 ++++++-- .../profiles/PERF-GPU-SUPERBENCH.json | 35 ------------ .../profiles/SETUP-NVIDIA-A100.json | 54 +++++++++++++++++++ 3 files changed, 74 insertions(+), 38 deletions(-) create mode 100644 src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh index cfd7e68560..ca8a053ed8 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh @@ -1,10 +1,27 @@ # Ansible will use sudo which needs explicit password input. This command removes that step. echo '$1 ALL=(ALL) NOPASSWD:ALL' | (sudo EDITOR='tee -a' visudo) -# sb binary might be in this path. This command adds this path to the PATH variable. -export PATH=$PATH:/home/$1/.local/bin +# Remove any existing system-installed Ansible to avoid version conflicts +# The old Ansible 2.10 doesn't support modern collections required by SuperBench +sudo apt remove -y ansible || true +sudo pip3 uninstall -y ansible ansible-base ansible-core || true +# Install ansible-core compatible with Python 3.8 (Ubuntu 20.04) +# ansible-core 2.12-2.13 is the highest version compatible with Python 3.8 +python3 -m pip install --user "ansible-core>=2.12,<2.14" +# Ensure the pip user-installed ansible is in PATH and takes precedence +export PATH=/home/$1/.local/bin:$PATH +# Configure Docker to use the data disk at /mnt to avoid filling up root filesystem +sudo mkdir -p /mnt/docker +sudo systemctl stop docker || true +# Backup existing docker data if it exists +if [ -d "/var/lib/docker" ]; then + sudo rsync -aP /var/lib/docker/ /mnt/docker/ || true +fi +# Configure Docker daemon to use new data directory +echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json +sudo systemctl start docker # Command to install sb dependencies. python3 -m pip install . -# Command to build sb. +# Command to build sb - this will install Ansible collections make postinstall # This command initiates /dev/nvidiactl and /dev/nvidia-uvm directories, which sb checks before running. sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json index 42ce2f08dd..ead2e4e51d 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json @@ -25,40 +25,5 @@ "ContainerVersion": "superbench/superbench:v0.9.0-cuda12.1" } } - ], - "Dependencies": [ - { - "Type": "NvidiaCudaInstallation", - "Parameters": { - "Scenario": "InstallNvidiaCuda", - "LinuxCudaVersion": "$.Parameters.LinuxCudaVersion", - "LinuxDriverVersion": "$.Parameters.LinuxDriverVersion", - "Username": "$.Parameters.Username", - "LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile" - } - }, - { - "Type": "DockerInstallation", - "Parameters": { - "Scenario": "InstallDocker" - } - }, - { - "Type": "NvidiaContainerToolkitInstallation", - "Parameters": { - "Scenario": "InstallNvidiaContainerToolkit" - } - }, - { - "Type": "LinuxPackageInstallation", - "Parameters": { - "Scenario": "InstallLinuxPackages", - "Packages": "sshpass,python3-pip", - "Packages-Apt": "nvidia-common", - "Packages-Dnf": "nvidia-driver", - "Packages-Yum": "nvidia-driver", - "Packages-Zypper": "" - } - } ] } diff --git a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json new file mode 100644 index 0000000000..862c4f3c9c --- /dev/null +++ b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json @@ -0,0 +1,54 @@ +{ + "Description": "AMD GPU Driver Installation Dependency", + "Metadata": { + "RecommendedMinimumExecutionTime": "00:10:00", + "SupportedPlatforms": "win-x64, linux-x64", + "SupportedOperatingSystems": "Windows, Linux", + "SupportedGpuModelOnWindows": "mi25,v620", + "SupportedLinuxGpuModel": "all", + "SupportedLinuxDistros": "Ubuntu", + "SpecialRequirements": "This is AMD GPU Driver dependency. It can only be installed on the system having an AMD GPU card/chip." + }, + "Parameters": { + "ConfigurationFile": "default.yaml", + "Username": "", + "LinuxCudaVersion": "12.0", + "LinuxDriverVersion": "525", + "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" + }, + "Dependencies": [ + { + "Type": "DockerInstallation", + "Parameters": { + "Scenario": "InstallDocker" + } + }, + { + "Type": "NvidiaCudaInstallation", + "Parameters": { + "Scenario": "InstallNvidiaCuda", + "LinuxCudaVersion": "$.Parameters.LinuxCudaVersion", + "LinuxDriverVersion": "$.Parameters.LinuxDriverVersion", + "Username": "$.Parameters.Username", + "LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile" + } + }, + { + "Type": "NvidiaContainerToolkitInstallation", + "Parameters": { + "Scenario": "InstallNvidiaContainerToolkit" + } + }, + { + "Type": "LinuxPackageInstallation", + "Parameters": { + "Scenario": "InstallLinuxPackages", + "Packages": "sshpass,python3-pip", + "Packages-Apt": "nvidia-common", + "Packages-Dnf": "nvidia-driver", + "Packages-Yum": "nvidia-driver", + "Packages-Zypper": "" + } + } + ] +} \ No newline at end of file From 476200afda717b27d81db53398a416abe7a257b6 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 23 Jan 2026 12:56:32 -0800 Subject: [PATCH 2/4] NVIDIA in metadata --- .../SuperBenchmark/initialize.sh | 11 ++--------- .../profiles/SETUP-NVIDIA-A100.json | 13 ++++++------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh index ca8a053ed8..0cc2d3cb50 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh @@ -1,27 +1,20 @@ # Ansible will use sudo which needs explicit password input. This command removes that step. echo '$1 ALL=(ALL) NOPASSWD:ALL' | (sudo EDITOR='tee -a' visudo) # Remove any existing system-installed Ansible to avoid version conflicts -# The old Ansible 2.10 doesn't support modern collections required by SuperBench sudo apt remove -y ansible || true sudo pip3 uninstall -y ansible ansible-base ansible-core || true # Install ansible-core compatible with Python 3.8 (Ubuntu 20.04) -# ansible-core 2.12-2.13 is the highest version compatible with Python 3.8 python3 -m pip install --user "ansible-core>=2.12,<2.14" # Ensure the pip user-installed ansible is in PATH and takes precedence export PATH=/home/$1/.local/bin:$PATH -# Configure Docker to use the data disk at /mnt to avoid filling up root filesystem +# Configure Docker to use the data disk at /mnt sudo mkdir -p /mnt/docker sudo systemctl stop docker || true -# Backup existing docker data if it exists -if [ -d "/var/lib/docker" ]; then - sudo rsync -aP /var/lib/docker/ /mnt/docker/ || true -fi -# Configure Docker daemon to use new data directory echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json sudo systemctl start docker # Command to install sb dependencies. python3 -m pip install . -# Command to build sb - this will install Ansible collections +# Command to build sb make postinstall # This command initiates /dev/nvidiactl and /dev/nvidia-uvm directories, which sb checks before running. sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json index 862c4f3c9c..2f42a941e1 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json @@ -1,13 +1,12 @@ { - "Description": "AMD GPU Driver Installation Dependency", + "Description": "NVIDIA A100 GPU Driver Installation Dependency", "Metadata": { "RecommendedMinimumExecutionTime": "00:10:00", - "SupportedPlatforms": "win-x64, linux-x64", - "SupportedOperatingSystems": "Windows, Linux", - "SupportedGpuModelOnWindows": "mi25,v620", - "SupportedLinuxGpuModel": "all", - "SupportedLinuxDistros": "Ubuntu", - "SpecialRequirements": "This is AMD GPU Driver dependency. It can only be installed on the system having an AMD GPU card/chip." + "SupportedPlatforms": "linux-x64", + "SupportedOperatingSystems": "Linux", + "SupportedLinuxGpuModel": "NVIDIA A100", + "SupportedLinuxDistros": "Ubuntu20", + "SpecialRequirements": "This is an NVIDIA GPU Driver dependency. It can only be installed on the system having an NVIDIA A100 GPU card/chip." }, "Parameters": { "ConfigurationFile": "default.yaml", From 8b94434481519a379a1b5a20dd2de3db444f1eb3 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 23 Jan 2026 13:52:03 -0800 Subject: [PATCH 3/4] minor --- .../VirtualClient.Actions/SuperBenchmark/initialize.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh index 0cc2d3cb50..2e814cad01 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh @@ -14,7 +14,7 @@ echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json sudo systemctl start docker # Command to install sb dependencies. python3 -m pip install . -# Command to build sb +# Command to build sb. make postinstall # This command initiates /dev/nvidiactl and /dev/nvidia-uvm directories, which sb checks before running. sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi \ No newline at end of file From 58d380d06b36ec997b0ad3d6a3c011b751c4e3cc Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 23 Jan 2026 15:45:21 -0800 Subject: [PATCH 4/4] Updating test case to use setu nvidia a100 profile. --- .../SuperBenchmarkProfileTests.cs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs index 92cabfc783..96ea2a4a99 100644 --- a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs +++ b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs @@ -39,7 +39,7 @@ public void SuperBenchmarkWorkloadProfileParametersAreInlinedCorrectly(string pr } [Test] - [TestCase("PERF-GPU-SUPERBENCH.json")] + [TestCase("SETUP-NVIDIA-A100.json")] public async Task SuperBenchmarkWorkloadProfileExecutesTheExpectedDependenciesAndReboot(string profile) { List expectedCommands = new List @@ -74,7 +74,7 @@ public async Task SuperBenchmarkWorkloadProfileExecutesTheExpectedDependenciesAn } [Test] - [TestCase("PERF-GPU-SUPERBENCH.json")] + [TestCase("SETUP-NVIDIA-A100.json")] public async Task SuperBenchmarkWorkloadProfileExecutesTheExpectedDependenciesAndWorkloadsAfterReboot(string profile) { IEnumerable expectedCommands = this.GetProfileExpectedCommands(PlatformID.Unix); @@ -123,12 +123,7 @@ private IEnumerable GetProfileExpectedCommands(PlatformID platform) $"sudo bash -c \"{setupCommand}\"", $"sudo apt-get update", $"sudo apt-get install -y nvidia-container-toolkit", - $"sudo systemctl restart docker", - $"sudo chmod -R 2777 \"/home/user/tools/VirtualClient\"", - $"sudo git clone -b v0.9.0 https://github.com/microsoft/superbenchmark", - $"sudo bash initialize.sh", - $"sb deploy --host-list localhost -i superbench/superbench:v0.9.0-cuda12.1", - $"sb run --host-list localhost -c default.yaml" + $"sudo systemctl restart docker" }; } }