diff --git a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs index 92cabfc783..96ea2a4a99 100644 --- a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs +++ b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs @@ -39,7 +39,7 @@ public void SuperBenchmarkWorkloadProfileParametersAreInlinedCorrectly(string pr } [Test] - [TestCase("PERF-GPU-SUPERBENCH.json")] + [TestCase("SETUP-NVIDIA-A100.json")] public async Task SuperBenchmarkWorkloadProfileExecutesTheExpectedDependenciesAndReboot(string profile) { List expectedCommands = new List @@ -74,7 +74,7 @@ public async Task SuperBenchmarkWorkloadProfileExecutesTheExpectedDependenciesAn } [Test] - [TestCase("PERF-GPU-SUPERBENCH.json")] + [TestCase("SETUP-NVIDIA-A100.json")] public async Task SuperBenchmarkWorkloadProfileExecutesTheExpectedDependenciesAndWorkloadsAfterReboot(string profile) { IEnumerable expectedCommands = this.GetProfileExpectedCommands(PlatformID.Unix); @@ -123,12 +123,7 @@ private IEnumerable GetProfileExpectedCommands(PlatformID platform) $"sudo bash -c \"{setupCommand}\"", $"sudo apt-get update", $"sudo apt-get install -y nvidia-container-toolkit", - $"sudo systemctl restart docker", - $"sudo chmod -R 2777 \"/home/user/tools/VirtualClient\"", - $"sudo git clone -b v0.9.0 https://github.com/microsoft/superbenchmark", - $"sudo bash initialize.sh", - $"sb deploy --host-list localhost -i superbench/superbench:v0.9.0-cuda12.1", - $"sb run --host-list localhost -c default.yaml" + $"sudo systemctl restart docker" }; } } diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh index cfd7e68560..2e814cad01 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh @@ -1,7 +1,17 @@ # Ansible will use sudo which needs explicit password input. This command removes that step. echo '$1 ALL=(ALL) NOPASSWD:ALL' | (sudo EDITOR='tee -a' visudo) -# sb binary might be in this path. This command adds this path to the PATH variable. -export PATH=$PATH:/home/$1/.local/bin +# Remove any existing system-installed Ansible to avoid version conflicts +sudo apt remove -y ansible || true +sudo pip3 uninstall -y ansible ansible-base ansible-core || true +# Install ansible-core compatible with Python 3.8 (Ubuntu 20.04) +python3 -m pip install --user "ansible-core>=2.12,<2.14" +# Ensure the pip user-installed ansible is in PATH and takes precedence +export PATH=/home/$1/.local/bin:$PATH +# Configure Docker to use the data disk at /mnt +sudo mkdir -p /mnt/docker +sudo systemctl stop docker || true +echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json +sudo systemctl start docker # Command to install sb dependencies. python3 -m pip install . # Command to build sb. diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json index 42ce2f08dd..ead2e4e51d 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json @@ -25,40 +25,5 @@ "ContainerVersion": "superbench/superbench:v0.9.0-cuda12.1" } } - ], - "Dependencies": [ - { - "Type": "NvidiaCudaInstallation", - "Parameters": { - "Scenario": "InstallNvidiaCuda", - "LinuxCudaVersion": "$.Parameters.LinuxCudaVersion", - "LinuxDriverVersion": "$.Parameters.LinuxDriverVersion", - "Username": "$.Parameters.Username", - "LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile" - } - }, - { - "Type": "DockerInstallation", - "Parameters": { - "Scenario": "InstallDocker" - } - }, - { - "Type": "NvidiaContainerToolkitInstallation", - "Parameters": { - "Scenario": "InstallNvidiaContainerToolkit" - } - }, - { - "Type": "LinuxPackageInstallation", - "Parameters": { - "Scenario": "InstallLinuxPackages", - "Packages": "sshpass,python3-pip", - "Packages-Apt": "nvidia-common", - "Packages-Dnf": "nvidia-driver", - "Packages-Yum": "nvidia-driver", - "Packages-Zypper": "" - } - } ] } diff --git a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json new file mode 100644 index 0000000000..2f42a941e1 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json @@ -0,0 +1,53 @@ +{ + "Description": "NVIDIA A100 GPU Driver Installation Dependency", + "Metadata": { + "RecommendedMinimumExecutionTime": "00:10:00", + "SupportedPlatforms": "linux-x64", + "SupportedOperatingSystems": "Linux", + "SupportedLinuxGpuModel": "NVIDIA A100", + "SupportedLinuxDistros": "Ubuntu20", + "SpecialRequirements": "This is an NVIDIA GPU Driver dependency. It can only be installed on the system having an NVIDIA A100 GPU card/chip." + }, + "Parameters": { + "ConfigurationFile": "default.yaml", + "Username": "", + "LinuxCudaVersion": "12.0", + "LinuxDriverVersion": "525", + "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" + }, + "Dependencies": [ + { + "Type": "DockerInstallation", + "Parameters": { + "Scenario": "InstallDocker" + } + }, + { + "Type": "NvidiaCudaInstallation", + "Parameters": { + "Scenario": "InstallNvidiaCuda", + "LinuxCudaVersion": "$.Parameters.LinuxCudaVersion", + "LinuxDriverVersion": "$.Parameters.LinuxDriverVersion", + "Username": "$.Parameters.Username", + "LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile" + } + }, + { + "Type": "NvidiaContainerToolkitInstallation", + "Parameters": { + "Scenario": "InstallNvidiaContainerToolkit" + } + }, + { + "Type": "LinuxPackageInstallation", + "Parameters": { + "Scenario": "InstallLinuxPackages", + "Packages": "sshpass,python3-pip", + "Packages-Apt": "nvidia-common", + "Packages-Dnf": "nvidia-driver", + "Packages-Yum": "nvidia-driver", + "Packages-Zypper": "" + } + } + ] +} \ No newline at end of file