Windows Development
This sample demonstrates a fundamental implementation of performing inference using DirectML. It covers the essential steps from creating a DirectML device to executing a simple operator.
Key features include:
This sample is designed to be a starting point for developers looking to integrate DirectML into their applications for machine learning inference on Windows.
Dependencies: Requires a compatible GPU and Windows 10 version with DirectML support.
Location: The source code for this sample can typically be found within the Windows SDK samples or the DirectML GitHub repository.
Understanding the following concepts is crucial for grasping this sample:
Below are illustrative code snippets. For the complete implementation, please refer to the official sample source code.
HRESULT InitializeDirectMLDevice(IDMLDevice** ppDmlDevice)
{
DML_CREATE_DEVICE_FLAGS dmlFlags = DML_CREATE_DEVICE_FLAGS_NONE;
// You might check for WARN flags if needed for debugging.
// Create a Direct3D 12 device first.
ID3D12Device* d3d12Device = nullptr;
D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0;
HRESULT hr = D3D12CreateDevice(
nullptr, // Use default adapter
featureLevel,
IID_PPV_ARGS(&d3d12Device)
);
if (SUCCEEDED(hr))
{
// Create the DirectML device from the Direct3D 12 device.
hr = DMLCreateDevice(
d3d12Device,
dmlFlags,
IID_PPV_ARGS(ppDmlDevice)
);
}
SafeRelease(d3d12Device); // Release the D3D12 device as it's no longer needed.
return hr;
}
HRESULT CreateTensorResource(
IDMLDevice* dmlDevice,
ID3D12Device* d3d12Device,
const DML_TENSOR_DESC& tensorDesc,
DML_RESOURCE_FLAGS flags,
ID3D12Resource** ppResource)
{
D3D12_HEAP_PROPERTIES heapProps = {};
heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; // Typically use default heap for GPU access.
D3D12_RESOURCE_DESC resourceDesc = CD3DX12_RESOURCE_DESC::Buffer(
GetRequiredIntermediateSize(dmlDevice, tensorDesc.TotalTensorSizeInBytes),
flags & DML_RESOURCE_FLAG_OWNED_BY_OPERATOR ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_COMMON
);
// For output tensors, ensure they are in a state that supports writing.
if (flags & DML_RESOURCE_FLAG_OUTPUT) {
resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
}
HRESULT hr = d3d12Device->CreateCommittedResource(
&heapProps,
D3D12_HEAP_FLAG_NONE,
&resourceDesc,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, // Or appropriate initial state
nullptr,
IID_PPV_ARGS(ppResource)
);
return hr;
}
// Example: Binary Operator (e.g., Addition)
DML_ELEMENT_WISE_ADD_OPERATOR_DESC addDesc = {};
addDesc.ATensor = &inputATensorDesc;
addDesc.BTensor = &inputBTensorDesc;
addDesc.OutputTensor = &outputTensorDesc;
DML_OPERATOR_DESC opDesc = { DML_OPERATOR_ELEMENT_WISE_ADD, &addDesc };
IDMLOperator* dmlOperator = nullptr;
HRESULT hr = dmlDevice->CreateOperator(&opDesc, IID_PPV_ARGS(&dmlOperator));
// Binding operator to resources
IDMLBindingTable* bindingTable = nullptr;
if (SUCCEEDED(hr))
{
// Need to create an operator binder first
IDMLOperatorInitializer* initializer = nullptr;
dmlDevice->CreateOperatorInitializer(IID_PPV_ARGS(&initializer)); // For persistent resources, not needed here
hr = dmlOperator->CreateBindingTable(IID_PPV_ARGS(&bindingTable));
}
if (SUCCEEDED(hr))
{
bindingTable->BindInputs(1, &inputResourceA); // Assuming inputResourceA is ID3D12Resource*
bindingTable->BindInputs(1, &inputResourceB); // Assuming inputResourceB is ID3D12Resource*
bindingTable->BindOutputs(1, &outputResource); // Assuming outputResource is ID3D12Resource*
}
ID3D12CommandRecorder* commandRecorder = nullptr;
d3d12CommandList->QueryInterface(IID_PPV_ARGS(&commandRecorder));
commandRecorder->RecordDispatch(bindingTable, nullptr, 0); // Dispatch for the operator
// Submit the command list and wait for completion
ID3D12CommandQueue* commandQueue = nullptr;
d3d12Device->GetCommandQueue(D3D12_COMMAND_LIST_TYPE_DIRECT, &commandQueue);
ID3D12Fence* fence = nullptr;
d3d12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence));
UINT64 fenceValue = 1;
commandQueue->Signal(fence, fenceValue);
// Execute command list and wait
commandQueue->ExecuteCommandLists(1, reinterpret_cast(&d3d12CommandList));
// Wait for completion (simplified for example)
HANDLE fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
fence->SetEventOnCompletion(fenceValue, fenceEvent);
WaitForSingleObject(fenceEvent, INFINITE);
CloseHandle(fenceEvent);
// Results are now available in the outputResource.
To deepen your understanding, consider exploring:
Refer to the official DirectML documentation and code samples for comprehensive details and advanced scenarios.