GPU Support
Introduction
We present here an example on how to deploy using the Go client. This is part of our integration tests.
Example
func TestVMWithGPUDeployment(t *testing.T) {
tfPluginClient, err := setup()
assert.NoError(t, err)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
publicKey, privateKey, err := GenerateSSHKeyPair()
assert.NoError(t, err)
twinID := uint64(tfPluginClient.TwinID)
nodeFilter := types.NodeFilter{
Status: &statusUp,
FreeSRU: convertGBToBytes(20),
FreeMRU: convertGBToBytes(8),
RentedBy: &twinID,
HasGPU: &trueVal,
}
nodes, err := deployer.FilterNodes(ctx, tfPluginClient, nodeFilter)
if err != nil {
t.Skip("no available nodes found")
}
nodeID := uint32(nodes[0].NodeID)
nodeClient, err := tfPluginClient.NcPool.GetNodeClient(tfPluginClient.SubstrateConn, nodeID)
assert.NoError(t, err)
gpus, err := nodeClient.GPUs(ctx)
assert.NoError(t, err)
network := workloads.ZNet{
Name: "gpuNetwork",
Description: "network for testing gpu",
Nodes: []uint32{nodeID},
IPRange: gridtypes.NewIPNet(net.IPNet{
IP: net.IPv4(10, 20, 0, 0),
Mask: net.CIDRMask(16, 32),
}),
AddWGAccess: false,
}
disk := workloads.Disk{
Name: "gpuDisk",
SizeGB: 20,
}
vm := workloads.VM{
Name: "gpu",
Flist: "https://hub.grid.tf/tf-official-vms/ubuntu-22.04.flist",
CPU: 4,
Planetary: true,
Memory: 1024 * 8,
GPUs: ConvertGPUsToStr(gpus),
Entrypoint: "/init.sh",
EnvVars: map[string]string{
"SSH_KEY": publicKey,
},
Mounts: []workloads.Mount{
{DiskName: disk.Name, MountPoint: "/data"},
},
NetworkName: network.Name,
}
err = tfPluginClient.NetworkDeployer.Deploy(ctx, &network)
assert.NoError(t, err)
defer func() {
err = tfPluginClient.NetworkDeployer.Cancel(ctx, &network)
assert.NoError(t, err)
}()
dl := workloads.NewDeployment("gpu", nodeID, "", nil, network.Name, []workloads.Disk{disk}, nil, []workloads.VM{vm}, nil)
err = tfPluginClient.DeploymentDeployer.Deploy(ctx, &dl)
assert.NoError(t, err)
defer func() {
err = tfPluginClient.DeploymentDeployer.Cancel(ctx, &dl)
assert.NoError(t, err)
}()
vm, err = tfPluginClient.State.LoadVMFromGrid(nodeID, vm.Name, dl.Name)
assert.NoError(t, err)
assert.Equal(t, vm.GPUs, ConvertGPUsToStr(gpus))
time.Sleep(30 * time.Second)
output, err := RemoteRun("root", vm.YggIP, "lspci -v", privateKey)
assert.NoError(t, err)
assert.Contains(t, string(output), gpus[0].Vendor)
}
More Information
For more information on this, you can check this Client Pull Request on how to support the new calls to list GPUs and to deploy a machine with GPU.