Memory management




cudaFree deallocates memory that has been allocated on the device with cudaMalloc, cudaMalloc3D or cudaMallocPitch. For memory that has been allocated with cudaMallocHost or cudaHostAlloc, see cudaFreeHost. For memory that has been allocated with cudaMallocArray or cudaMalloc3DArray, see cudaFreeArray.



cudaError_t cudaFree(void* buffer);



The pointer to the buffer that had been allocated with cudaMalloc or cudaMallocPitch.

Return value

The error code returned from the memory freeing:

  • cudaSuccess: the resource was successfully freed.
  • cudaErrorInvalidDevicePointer: an error occured during the freeing.




#include <stdio.h>
#include <stdlib.h>

 * @brief This function sets the element value to that of the index of the
 * thread they have been processed by.
__global__ void set_to_index(int* a, int size)
    int my_index = blockIdx.x * blockDim.x + threadIdx.x;
    if(my_index < size)
        a[my_index] = my_index;

 * @brief Illustrates how to deallocate memory on the device.
 * @details This application consists in a simple kernel launched to set the
 * elements passed to their index in the array. Part of the process is to
 * deallocate the memory on the device, after data was copied back from it.
int main(int argc, char* argv[])
    const int ARRAY_SIZE = 10;
    const int ARRAY_SIZE_BYTES = ARRAY_SIZE * sizeof(int);

    // Declare pointers that will point to the memory allocated on the host.
    int a_host[ARRAY_SIZE];
    for(int i = 0; i < ARRAY_SIZE; i++)
        a_host[i] = 0;

    // Declare pointers that will point to the memory allocated on the device.
    int* a_device;

    // Allocate memory on the device
    cudaMalloc(&a_device, ARRAY_SIZE_BYTES);

    // Copy the input arrays to the device
    cudaMemcpy(a_device, a_host, ARRAY_SIZE_BYTES, cudaMemcpyHostToDevice);

    // Launch the kernel on the device
    set_to_index<<<1, ARRAY_SIZE>>>(a_device, ARRAY_SIZE);

    // Copy the output array back from the device to the host and print its values
    cudaMemcpy(a_host, a_device, ARRAY_SIZE_BYTES, cudaMemcpyDeviceToHost);
    for(int i = 0; i < ARRAY_SIZE; i++)
        printf("%d ", a_host[i]);

    // Free resources

    return EXIT_SUCCESS;