DirectML Documentation

Windows Development

DirectML Basic Inference Sample

This sample demonstrates a fundamental implementation of performing inference using DirectML. It covers the essential steps from creating a DirectML device to executing a simple operator.

Key features include:

This sample is designed to be a starting point for developers looking to integrate DirectML into their applications for machine learning inference on Windows.

Dependencies: Requires a compatible GPU and Windows 10 version with DirectML support.

Location: The source code for this sample can typically be found within the Windows SDK samples or the DirectML GitHub repository.

Core Concepts

Understanding the following concepts is crucial for grasping this sample:

Code Snippets

Below are illustrative code snippets. For the complete implementation, please refer to the official sample source code.

DML_basic_inference.cpp

Device Initialization


HRESULT InitializeDirectMLDevice(IDMLDevice** ppDmlDevice)
{
    DML_CREATE_DEVICE_FLAGS dmlFlags = DML_CREATE_DEVICE_FLAGS_NONE;
    // You might check for WARN flags if needed for debugging.

    // Create a Direct3D 12 device first.
    ID3D12Device* d3d12Device = nullptr;
    D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0;
    HRESULT hr = D3D12CreateDevice(
        nullptr, // Use default adapter
        featureLevel,
        IID_PPV_ARGS(&d3d12Device)
    );

    if (SUCCEEDED(hr))
    {
        // Create the DirectML device from the Direct3D 12 device.
        hr = DMLCreateDevice(
            d3d12Device,
            dmlFlags,
            IID_PPV_ARGS(ppDmlDevice)
        );
    }

    SafeRelease(d3d12Device); // Release the D3D12 device as it's no longer needed.
    return hr;
}
                
DML_basic_inference.cpp

Creating Tensors (Resources)


HRESULT CreateTensorResource(
    IDMLDevice* dmlDevice,
    ID3D12Device* d3d12Device,
    const DML_TENSOR_DESC& tensorDesc,
    DML_RESOURCE_FLAGS flags,
    ID3D12Resource** ppResource)
{
    D3D12_HEAP_PROPERTIES heapProps = {};
    heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; // Typically use default heap for GPU access.

    D3D12_RESOURCE_DESC resourceDesc = CD3DX12_RESOURCE_DESC::Buffer(
        GetRequiredIntermediateSize(dmlDevice, tensorDesc.TotalTensorSizeInBytes),
        flags & DML_RESOURCE_FLAG_OWNED_BY_OPERATOR ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_COMMON
    );

    // For output tensors, ensure they are in a state that supports writing.
    if (flags & DML_RESOURCE_FLAG_OUTPUT) {
        resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
    }

    HRESULT hr = d3d12Device->CreateCommittedResource(
        &heapProps,
        D3D12_HEAP_FLAG_NONE,
        &resourceDesc,
        D3D12_RESOURCE_STATE_UNORDERED_ACCESS, // Or appropriate initial state
        nullptr,
        IID_PPV_ARGS(ppResource)
    );
    return hr;
}
                
DML_basic_inference.cpp

Operator Definition and Binding


// Example: Binary Operator (e.g., Addition)
DML_ELEMENT_WISE_ADD_OPERATOR_DESC addDesc = {};
addDesc.ATensor = &inputATensorDesc;
addDesc.BTensor = &inputBTensorDesc;
addDesc.OutputTensor = &outputTensorDesc;

DML_OPERATOR_DESC opDesc = { DML_OPERATOR_ELEMENT_WISE_ADD, &addDesc };

IDMLOperator* dmlOperator = nullptr;
HRESULT hr = dmlDevice->CreateOperator(&opDesc, IID_PPV_ARGS(&dmlOperator));

// Binding operator to resources
IDMLBindingTable* bindingTable = nullptr;
if (SUCCEEDED(hr))
{
    // Need to create an operator binder first
    IDMLOperatorInitializer* initializer = nullptr;
    dmlDevice->CreateOperatorInitializer(IID_PPV_ARGS(&initializer)); // For persistent resources, not needed here

    hr = dmlOperator->CreateBindingTable(IID_PPV_ARGS(&bindingTable));
}

if (SUCCEEDED(hr))
{
    bindingTable->BindInputs(1, &inputResourceA); // Assuming inputResourceA is ID3D12Resource*
    bindingTable->BindInputs(1, &inputResourceB); // Assuming inputResourceB is ID3D12Resource*
    bindingTable->BindOutputs(1, &outputResource); // Assuming outputResource is ID3D12Resource*
}
                
DML_basic_inference.cpp

Executing Inference


ID3D12CommandRecorder* commandRecorder = nullptr;
d3d12CommandList->QueryInterface(IID_PPV_ARGS(&commandRecorder));

commandRecorder->RecordDispatch(bindingTable, nullptr, 0); // Dispatch for the operator

// Submit the command list and wait for completion
ID3D12CommandQueue* commandQueue = nullptr;
d3d12Device->GetCommandQueue(D3D12_COMMAND_LIST_TYPE_DIRECT, &commandQueue);

ID3D12Fence* fence = nullptr;
d3d12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence));
UINT64 fenceValue = 1;
commandQueue->Signal(fence, fenceValue);

// Execute command list and wait
commandQueue->ExecuteCommandLists(1, reinterpret_cast(&d3d12CommandList));

// Wait for completion (simplified for example)
HANDLE fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
fence->SetEventOnCompletion(fenceValue, fenceEvent);
WaitForSingleObject(fenceEvent, INFINITE);
CloseHandle(fenceEvent);

// Results are now available in the outputResource.
                

Further Exploration

To deepen your understanding, consider exploring:

Refer to the official DirectML documentation and code samples for comprehensive details and advanced scenarios.