jcuda.driver
Class JCudaDriver

java.lang.Object
  extended by jcuda.driver.JCudaDriver

public class JCudaDriver
extends java.lang.Object

Java bindings for the NVidia CUDA driver API.

Most comments are taken from the CUDA reference manual


Field Summary
static int CU_MEMHOSTALLOC_DEVICEMAP
          If set, host memory is mapped into CUDA address space and JCudaDriver#cuMemHostGetDevicePointer may be called on the host pointer.
static int CU_MEMHOSTALLOC_PORTABLE
          If set, host memory is portable between CUDA contexts.
static int CU_MEMHOSTALLOC_WRITECOMBINED
          If set, host memory is allocated as write-combined - fast to write, faster to DMA, slow to read except via SSE4 streaming load instruction (MOVNTDQA).
static int CU_PARAM_TR_DEFAULT
          For texture references loaded into the module, use default texunit from texture reference
static int CU_TRSA_OVERRIDE_FORMAT
          Override the texref format with a format inferred from the array
static int CU_TRSF_NORMALIZED_COORDINATES
          Use normalized texture coordinates in the range [0,1) instead of [0,dim)
static int CU_TRSF_READ_AS_INTEGER
          Read the texture as integers rather than promoting the values to floats in the range [0,1]
static int CUDA_ARRAY3D_2DARRAY
          If set, the CUDA array contains an array of 2D slices and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number of slices, not the depth of a 3D array.
static int CUDA_VERSION
           
 
Method Summary
static int align(int value, int alignment)
          Returns the given (address) value, adjusted to have the given alignment.
static int cuArray3DCreate(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray)
          Creates a CUDA array.
static int cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray)
          Get a 3D CUDA array descriptor.
static int cuArrayCreate(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray)
          Creates a 1D or 2D CUDA array.
static int cuArrayDestroy(CUarray hArray)
          Destroys a CUDA array.
static int cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray)
          Get a 1D or 2D CUDA array descriptor.
static int cuCtxAttach(CUcontext pctx, int flags)
          Increment context usage-count.
static int cuCtxCreate(CUcontext pctx, int flags, CUdevice dev)
          Create a CUDA context.
static int cuCtxDestroy(CUcontext ctx)
          Destroy the current context context or a floating CuDA context.
static int cuCtxDetach(CUcontext ctx)
          Decrement a context’s usage-count.
static int cuCtxGetDevice(CUdevice device)
          Return device-ID for current context.
static int cuCtxPopCurrent(CUcontext pctx)
          Pops the current CUDA context from the current CPU thread.
static int cuCtxPushCurrent(CUcontext ctx)
          Attach floating context to CPU thread.
static int cuCtxSynchronize()
          Block for a context’s tasks to complete.
static int cuDeviceComputeCapability(int[] major, int[] minor, CUdevice dev)
          Returns the compute capability of the device.
static int cuDeviceGet(CUdevice device, int ordinal)
          Returns a device-handle.
static int cuDeviceGetAttribute(int[] pi, int attrib, CUdevice dev)
          Returns information about the device.
static int cuDeviceGetCount(int[] count)
          Returns the number of compute-capable devices.
static int cuDeviceGetName(byte[] name, int len, CUdevice dev)
          Returns an identifier string.
static int cuDeviceGetProperties(CUdevprop prop, CUdevice dev)
          Get device properties.
static int cuDeviceTotalMem(int[] bytes, CUdevice dev)
          Returns the total amount of memory on the device.
static int cuDriverGetVersion(int[] driverVersion)
          Returns in driverVersion the version number of the installed CUDA driver.
static int cuEventCreate(CUevent phEvent, int Flags)
          Creates an event.
static int cuEventDestroy(CUevent hEvent)
          Destroys an event.
static int cuEventElapsedTime(float[] pMilliseconds, CUevent hStart, CUevent hEnd)
          Computes the elapsed time between two events.
static int cuEventQuery(CUevent hEvent)
          Queries an event’s status.
static int cuEventRecord(CUevent hEvent, CUstream hStream)
          Records an event.
static int cuEventSynchronize(CUevent hEvent)
          Waits for an event to complete.
static int cuFuncGetAttribute(int[] pi, int attrib, CUfunction func)
          Returns in pi the integer value of the attribute attrib on the kernel given by func.
static int cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z)
          Sets the block-dimensions for the function.
static int cuFuncSetCacheConfig(CUfunction hfunc, int config)
          Sets the preferred cache configuration for the device function hfunc.
static int cuFuncSetSharedSize(CUfunction hfunc, int bytes)
          Sets the shared-memory size for the function.
static int cuGLCtxCreate(CUcontext pCtx, int Flags, CUdevice device)
          Create a CUDA context for interoperability with OpenGL.
static int cuGLInit()
          Deprecated. As of CUDA 3.0
static int cuGLMapBufferObject(CUdeviceptr dptr, int[] size, int bufferobj)
          Deprecated. As of CUDA 3.0
static int cuGLMapBufferObjectAsync(CUdeviceptr dptr, int[] size, int buffer, CUstream hStream)
          Deprecated. As of CUDA 3.0
static int cuGLRegisterBufferObject(int bufferobj)
          Deprecated. As of CUDA 3.0
static int cuGLSetBufferObjectMapFlags(int buffer, int Flags)
          Deprecated. As of CUDA 3.0
static int cuGLUnmapBufferObject(int bufferobj)
          Deprecated. As of CUDA 3.0
static int cuGLUnmapBufferObjectAsync(int buffer, CUstream hStream)
          Deprecated. As of CUDA 3.0
static int cuGLUnregisterBufferObject(int bufferobj)
          Deprecated. As of CUDA 3.0
static int cuGraphicsGLRegisterBuffer(CUgraphicsResource pCudaResource, int buffer, int Flags)
          Registers the buffer object specified by buffer for access by CUDA.
static int cuGraphicsGLRegisterImage(CUgraphicsResource pCudaResource, int image, int target, int Flags)
          Registers the texture or renderbuffer object specified by image for access by CUDA.
static int cuGraphicsMapResources(int count, CUgraphicsResource[] resources, CUstream hStream)
          Maps the count graphics resources in resources for access by CUDA.
static int cuGraphicsResourceGetMappedPointer(CUdeviceptr pDevPtr, int[] pSize, CUgraphicsResource resource)
          Returns in *pDevPtr a pointer through which the mapped graphics resource resource may be accessed.
static int cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, int flags)
          Set flags for mapping the graphics resource resource.
static int cuGraphicsSubResourceGetMappedArray(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel)
          Returns in *pArray an array through which the subresource of the mapped graphics resource resource which corresponds to array index arrayIndex and mipmap level mipLevel may be accessed.
static int cuGraphicsUnmapResources(int count, CUgraphicsResource[] resources, CUstream hStream)
          Unmaps the count graphics resources in resources.
static int cuGraphicsUnregisterResource(CUgraphicsResource resource)
          Unregisters the graphics resource resource so it is not accessible by CUDA unless registered again.
static int cuInit(int Flags)
          Initialize the CUDA driver API.
static int cuLaunch(CUfunction f)
          Launches a CUDA function.
static int cuLaunchGrid(CUfunction f, int grid_width, int grid_height)
          Launches a CUDA function.
static int cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream)
           
static int cuMemAlloc(CUdeviceptr dptr, int bytesize)
          Allocates device memory.
static int cuMemAllocHost(Pointer pp, int bytesize)
          Allocates page-locked host memory.
static int cuMemAllocPitch(CUdeviceptr dptr, int[] pPitch, int WidthInBytes, int Height, int ElementSizeBytes)
          Allocates device memory.
static int cuMemcpy2D(CUDA_MEMCPY2D pCopy)
          Copies memory for 2D arrays.
static int cuMemcpy2DAsync(CUDA_MEMCPY2D pCopy, CUstream hStream)
          Copies memory.
static int cuMemcpy2DUnaligned(CUDA_MEMCPY2D pCopy)
           
static int cuMemcpy3D(CUDA_MEMCPY3D pCopy)
          Copies memory for 3D arrays.
static int cuMemcpy3DAsync(CUDA_MEMCPY3D pCopy, CUstream hStream)
          Copies memory.
static int cuMemcpyAtoA(CUarray dstArray, int dstIndex, CUarray srcArray, int srcIndex, int ByteCount)
          Copies memory from Array to Array.
static int cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray hSrc, int SrcIndex, int ByteCount)
          Copies memory from Array to Device.
static int cuMemcpyAtoH(Pointer dstHost, CUarray srcArray, int srcIndex, int ByteCount)
          Copies memory from Array to Host.
static int cuMemcpyAtoHAsync(Pointer dstHost, CUarray srcArray, int srcIndex, int ByteCount, CUstream hStream)
          Copies memory.
static int cuMemcpyDtoA(CUarray dstArray, int dstIndex, CUdeviceptr srcDevice, int ByteCount)
          Copies memory from Device to Array.
static int cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, int ByteCount)
          Copies memory from Device to Device.
static int cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, int ByteCount, CUstream hStream)
          Copies memory.
static int cuMemcpyDtoH(Pointer dstHost, CUdeviceptr srcDevice, int ByteCount)
          Copies memory from Device to Host.
static int cuMemcpyDtoHAsync(Pointer dstHost, CUdeviceptr srcDevice, int ByteCount, CUstream hStream)
          Copies memory.
static int cuMemcpyHtoA(CUarray dstArray, int dstIndex, Pointer pSrc, int ByteCount)
          Copies memory from Host to Array.
static int cuMemcpyHtoAAsync(CUarray dstArray, int dstIndex, Pointer pSrc, int ByteCount, CUstream hStream)
          Copies memory.
static int cuMemcpyHtoD(CUdeviceptr dstDevice, Pointer srcHost, int ByteCount)
          Copy memory from Host to Device.
static int cuMemcpyHtoDAsync(CUdeviceptr dstDevice, Pointer srcHost, int ByteCount, CUstream hStream)
          Copies memory.
static int cuMemFree(CUdeviceptr dptr)
          Frees device memory.
static int cuMemFreeHost(Pointer p)
          Frees page-locked host memory.
static int cuMemGetAddressRange(CUdeviceptr pbase, int[] psize, CUdeviceptr dptr)
          Get information on memory allocations.
static int cuMemGetInfo(int[] free, int[] total)
          Gets free and total memory.
static int cuMemHostAlloc(Pointer pp, long bytes, int Flags)
          Allocates bytes bytes of host memory that is page-locked and accessible to the device.
static int cuMemHostGetDevicePointer(CUdeviceptr ret, Pointer p, int Flags)
          Passes back the device pointer ret corresponding to the mapped, pinned host buffer p allocated by cuMemHostAlloc.
static int cuMemHostGetFlags(int[] pFlags, Pointer p)
          Passes back the flags pFlags that were specified when allocating the pinned host buffer p allocated by cuMemHostAlloc.
static int cuMemsetD16(CUdeviceptr dstDevice, short us, int N)
          Initializes device memory.
static int cuMemsetD2D16(CUdeviceptr dstDevice, int dstPitch, short us, int Width, int Height)
          Initializes device memory.
static int cuMemsetD2D32(CUdeviceptr dstDevice, int dstPitch, int ui, int Width, int Height)
          Initializes device memory.
static int cuMemsetD2D8(CUdeviceptr dstDevice, int dstPitch, char uc, int Width, int Height)
          Initializes device memory.
static int cuMemsetD32(CUdeviceptr dstDevice, int ui, int N)
          Initializes device memory.
static int cuMemsetD8(CUdeviceptr dstDevice, char uc, int N)
          Initializes device memory.
static int cuModuleGetFunction(CUfunction hfunc, CUmodule hmod, java.lang.String name)
          Returns a function handle.
static int cuModuleGetGlobal(CUdeviceptr dptr, int[] bytes, CUmodule hmod, java.lang.String name)
          Returns a global pointer from a module.
static int cuModuleGetTexRef(CUtexref pTexRef, CUmodule hmod, java.lang.String name)
          Gets a handle to a texture-reference.
static int cuModuleLoad(CUmodule module, java.lang.String fname)
          Loads a compute module.
static int cuModuleLoadData(CUmodule module, byte[] image)
          Loads a module’s data.
static int cuModuleLoadDataEx(CUmodule phMod, Pointer p, int numOptions, int[] options, Pointer optionValues)
          Takes a pointer p and loads the corresponding module phMod into the current context.
static int cuModuleLoadFatBinary(CUmodule module, byte[] fatCubin)
          Loads a fat-binary object.
static int cuModuleUnload(CUmodule hmod)
          Unloads a module.
static int cuParamSetf(CUfunction hfunc, int offset, float value)
          Adds a floating-point parameter to the function’s argument list.
static int cuParamSeti(CUfunction hfunc, int offset, int value)
          Adds an integer parameter to the function’s argument list.
static int cuParamSetSize(CUfunction hfunc, int numbytes)
          Sets the parameter-size for the function.
static int cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef)
          Adds a texture-reference to the function’s argument list.
static int cuParamSetv(CUfunction hfunc, int offset, Pointer ptr, int numbytes)
          Adds arbitrary data to the function’s argument list.
static int cuStreamCreate(CUstream phStream, int Flags)
          Create a stream.
static int cuStreamDestroy(CUstream hStream)
          Destroys a stream.
static int cuStreamQuery(CUstream hStream)
          Determine status of a compute stream.
static int cuStreamSynchronize(CUstream hStream)
          Block until a stream’s tasks are completed.
static int cuTexRefCreate(CUtexref pTexRef)
          Creates a texture-reference.
static int cuTexRefDestroy(CUtexref hTexRef)
          Destroys a texture-reference.
static int cuTexRefGetAddress(CUdeviceptr pdptr, CUtexref hTexRef)
          Gets the address associated with a texture-reference.
static int cuTexRefGetAddressMode(int[] pam, CUtexref hTexRef, int dim)
          Gets the addressing mode used by a texture-reference.
static int cuTexRefGetArray(CUarray phArray, CUtexref hTexRef)
          Gets the array bound to a texture-reference.
static int cuTexRefGetFilterMode(int[] pfm, CUtexref hTexRef)
          Gets the filter-mode used by a texture-reference.
static int cuTexRefGetFlags(int[] pFlags, CUtexref hTexRef)
          Gets the flags used by a texture-reference.
static int cuTexRefGetFormat(int[] pFormat, int[] pNumChannels, CUtexref hTexRef)
          Gets the format used by a texture-reference.
static int cuTexRefSetAddress(int[] ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, int bytes)
          Binds an address as a texture-reference.
static int cuTexRefSetAddress2D(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, int PitchInBytes)
          Binds a linear address range to the texture reference hTexRef.
static int cuTexRefSetAddressMode(CUtexref hTexRef, int dim, int am)
          Set the addressing mode for a texture-reference.
static int cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, int Flags)
          Binds an array to a texture-reference.
static int cuTexRefSetFilterMode(CUtexref hTexRef, int fm)
          Sets the mode for a texture-reference.
static int cuTexRefSetFlags(CUtexref hTexRef, int Flags)
          Sets flags for a texture-reference.
static int cuTexRefSetFormat(CUtexref hTexRef, int fmt, int NumPackedComponents)
          Sets the format for a texture-reference.
static void setExceptionsEnabled(boolean enabled)
          Enables or disables exceptions.
static void setLogLevel(LogLevel logLevel)
          Set the specified log level for the JCuda driver library.
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

CUDA_VERSION

public static final int CUDA_VERSION
See Also:
Constant Field Values

CU_MEMHOSTALLOC_PORTABLE

public static final int CU_MEMHOSTALLOC_PORTABLE
If set, host memory is portable between CUDA contexts. Flag for cuMemHostAlloc(jcuda.Pointer, long, int)

See Also:
Constant Field Values

CU_MEMHOSTALLOC_DEVICEMAP

public static final int CU_MEMHOSTALLOC_DEVICEMAP
If set, host memory is mapped into CUDA address space and JCudaDriver#cuMemHostGetDevicePointer may be called on the host pointer. Flag for cuMemHostAlloc(jcuda.Pointer, long, int)

See Also:
Constant Field Values

CU_MEMHOSTALLOC_WRITECOMBINED

public static final int CU_MEMHOSTALLOC_WRITECOMBINED
If set, host memory is allocated as write-combined - fast to write, faster to DMA, slow to read except via SSE4 streaming load instruction (MOVNTDQA). Flag for cuMemHostAlloc(jcuda.Pointer, long, int)

See Also:
Constant Field Values

CUDA_ARRAY3D_2DARRAY

public static final int CUDA_ARRAY3D_2DARRAY
If set, the CUDA array contains an array of 2D slices and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number of slices, not the depth of a 3D array.

See Also:
Constant Field Values

CU_PARAM_TR_DEFAULT

public static final int CU_PARAM_TR_DEFAULT
For texture references loaded into the module, use default texunit from texture reference

See Also:
Constant Field Values

CU_TRSA_OVERRIDE_FORMAT

public static final int CU_TRSA_OVERRIDE_FORMAT
Override the texref format with a format inferred from the array

See Also:
Constant Field Values

CU_TRSF_READ_AS_INTEGER

public static final int CU_TRSF_READ_AS_INTEGER
Read the texture as integers rather than promoting the values to floats in the range [0,1]

See Also:
Constant Field Values

CU_TRSF_NORMALIZED_COORDINATES

public static final int CU_TRSF_NORMALIZED_COORDINATES
Use normalized texture coordinates in the range [0,1) instead of [0,dim)

See Also:
Constant Field Values
Method Detail

setLogLevel

public static void setLogLevel(LogLevel logLevel)
Set the specified log level for the JCuda driver library.

Currently supported log levels:
LOG_QUIET: Never print anything
LOG_ERROR: Print error messages
LOG_TRACE: Print a trace of all native function calls

Parameters:
logLevel - The log level to use.

setExceptionsEnabled

public static void setExceptionsEnabled(boolean enabled)
Enables or disables exceptions. By default, the methods of this class only return the CUresult error code from the underlying CUDA function. If exceptions are enabled, a CudaException with a detailed error message will be thrown if a method is about to return a result code that is not CUresult.CUDA_SUCCESS

Parameters:
enabled - Whether exceptions are enabled

align

public static int align(int value,
                        int alignment)
Returns the given (address) value, adjusted to have the given alignment. This function may be used to align the parameters for a kernel call according to their alignment requirements.

Parameters:
value - The address value
alignment - The desired alignment
Returns:
The aligned address value

cuInit

public static int cuInit(int Flags)
Initialize the CUDA driver API.

SYNOPSIS
CUresult cuInit( unsigned int Flags );

DESCRIPTION
Initializes the driver API and must be called before any other function from the driver API. Currently, the Flags parameters must be 0. If cuInit() has not been called, any function from the driver API will return CUDA_ERROR_NOT_INITIALIZED.

Returns:
CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NO_DEVICE,

cuDeviceGet

public static int cuDeviceGet(CUdevice device,
                              int ordinal)
Returns a device-handle.

SYNOPSIS
CUresult cuDeviceGet(CUdevice* dev, int ordinal);

DESCRIPTION
Returns in *dev a device handle given an ordinal in the range [0, cuDeviceGetCount()-1].

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE,
See Also:
cuDeviceComputeCapability(int[], int[], jcuda.driver.CUdevice), cuDeviceGetAttribute(int[], int, jcuda.driver.CUdevice), cuDeviceGetCount(int[]), cuDeviceGetName(byte[], int, jcuda.driver.CUdevice), cuDeviceGetProperties(jcuda.driver.CUdevprop, jcuda.driver.CUdevice), cuDeviceTotalMem(int[], jcuda.driver.CUdevice)

cuDeviceGetCount

public static int cuDeviceGetCount(int[] count)
Returns the number of compute-capable devices.

SYNOPSIS
CUresult cuDeviceGetCount(int* count);

DESCRIPTION
Returns in *count the number of devices with compute capability greater or equal to 1.0 that are available for execution. If there is no such device, cuDeviceGetCount() returns 0.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuDeviceComputeCapability(int[], int[], jcuda.driver.CUdevice), cuDeviceGetAttribute(int[], int, jcuda.driver.CUdevice), cuDeviceGetName(byte[], int, jcuda.driver.CUdevice), cuDeviceGet(jcuda.driver.CUdevice, int), cuDeviceGetProperties(jcuda.driver.CUdevprop, jcuda.driver.CUdevice), cuDeviceTotalMem(int[], jcuda.driver.CUdevice)

cuDeviceGetName

public static int cuDeviceGetName(byte[] name,
                                  int len,
                                  CUdevice dev)
Returns an identifier string.

SYNOPSIS
CUresult cuDeviceGetName(char* name, int len, CUdevice dev);

DESCRIPTION
Returns an ASCII string identifying the device dev in the NULL-terminated string pointed to by name. len specifies the maximum length of the string that may be returned.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE,
See Also:
cuDeviceComputeCapability(int[], int[], jcuda.driver.CUdevice), cuDeviceGetAttribute(int[], int, jcuda.driver.CUdevice), cuDeviceGetCount(int[]), cuDeviceGet(jcuda.driver.CUdevice, int), cuDeviceGetProperties(jcuda.driver.CUdevprop, jcuda.driver.CUdevice), cuDeviceTotalMem(int[], jcuda.driver.CUdevice)

cuDeviceComputeCapability

public static int cuDeviceComputeCapability(int[] major,
                                            int[] minor,
                                            CUdevice dev)
Returns the compute capability of the device.

SYNOPSIS
CUresult cuDeviceComputeCapability(int* major, int* minor, CUdevice dev);

DESCRIPTION
Returns in *major and *minor the major and minor revision numbers that define the compute capability of device dev.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE,
See Also:
cuDeviceGetAttribute(int[], int, jcuda.driver.CUdevice), cuDeviceGetCount(int[]), cuDeviceGetName(byte[], int, jcuda.driver.CUdevice), cuDeviceGet(jcuda.driver.CUdevice, int), cuDeviceGetProperties(jcuda.driver.CUdevprop, jcuda.driver.CUdevice), cuDeviceTotalMem(int[], jcuda.driver.CUdevice)

cuDeviceTotalMem

public static int cuDeviceTotalMem(int[] bytes,
                                   CUdevice dev)
Returns the total amount of memory on the device.

SYNOPSIS
CUresult cuDeviceTotalMem( unsigned int* bytes, CUdevice dev );

DESCRIPTION
Returns in *bytes the total amount of memory available on the device dev in bytes.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE,
See Also:
cuDeviceComputeCapability(int[], int[], jcuda.driver.CUdevice), cuDeviceGetAttribute(int[], int, jcuda.driver.CUdevice), cuDeviceGetCount(int[]), cuDeviceGetName(byte[], int, jcuda.driver.CUdevice), cuDeviceGet(jcuda.driver.CUdevice, int), cuDeviceGetProperties(jcuda.driver.CUdevprop, jcuda.driver.CUdevice)

cuDeviceGetProperties

public static int cuDeviceGetProperties(CUdevprop prop,
                                        CUdevice dev)
Get device properties.

SYNOPSIS
CUresult cuDeviceGetProperties(CUdevprop* prop, CUdevice dev);

DESCRIPTION
Returns in *prop the properties of device dev. The CUdevprop structure is defined as:
 typedef struct CUdevprop_st {
     int maxThreadsPerBlock;
     int maxThreadsDim[3];
     int maxGridSize[3];
     int sharedMemPerBlock;
     int totalConstantMemory;
     int SIMDWidth;
     int memPitch;
     int regsPerBlock;
     int clockRate;
     int textureAlign
 } CUdevprop;
 
where: - maxThreadsPerBlock is the maximum number of threads per block; - maxThreadsDim[3] is the maximum sizes of each dimension of a block; - maxGridSize[3] is the maximum sizes of each dimension of a grid; - sharedMemPerBlock is the total amount of shared memory available per block in bytes; - totalConstantMemory is the total amount of constant memory available on the device in bytes; - SIMDWidth is the warp size; - memPitch is the maximum pitch allowed by the memory copy functions that involve memory regions allocated through cuMemAllocPitch(); - regsPerBlock is the total number of registers available per block; - clockRate is the clock frequency in kilohertz; - textureAlign is the alignment requirement; texture base addresses that are aligned to textureAlign bytes do not need an offset applied to texture fetches.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE,
See Also:
cuDeviceComputeCapability(int[], int[], jcuda.driver.CUdevice), cuDeviceGetAttribute(int[], int, jcuda.driver.CUdevice), cuDeviceGetCount(int[]), cuDeviceGetName(byte[], int, jcuda.driver.CUdevice), cuDeviceGet(jcuda.driver.CUdevice, int), cuDeviceTotalMem(int[], jcuda.driver.CUdevice)

cuDeviceGetAttribute

public static int cuDeviceGetAttribute(int[] pi,
                                       int attrib,
                                       CUdevice dev)
Returns information about the device.

SYNOPSIS
CUresult cuDeviceGetAttribute(int* value, CUdevice_attribute attrib, CUdevice dev);

DESCRIPTION
Returns in *value the integer value of the attribute attrib on device dev. The supported attributes are: - CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: maximum number of threads per block;
- CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: maximum x-dimension of a block;
- CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: maximum y-dimension of a block;
- CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: maximum z-dimension of a block;
- CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: maximum x-dimension of a grid;
- CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: maximum y-dimension of a grid;
- CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: maximum z-dimension of a grid;
- CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: maximum amount of shared memory available to a thread block in bytes; this amount is shared by all thread blocks si- multaneously resident on a multiprocessor;
- CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: total amount of constant memory available on the device in bytes;
- CU_DEVICE_ATTRIBUTE_WARP_SIZE: warp size in threads;
- CU_DEVICE_ATTRIBUTE_MAX_PITCH: maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through cuMemAllocPitch();
- CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: maximum number of 32-bit registers available to a thread block; this number is shared by all thread blocks simultaneously resident on a multiprocessor;
- CU_DEVICE_ATTRIBUTE_CLOCK_RATE: clock frequency in kilohertz;
- CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset applied to texture fetches;
- CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: 1 if the device can concurrently copy mem- ory between host and device while executing a kernel, or 0 if not;
- CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: number of multiprocessors on the device.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE,
See Also:
cuDeviceComputeCapability(int[], int[], jcuda.driver.CUdevice), cuDeviceGetCount(int[]), cuDeviceGetName(byte[], int, jcuda.driver.CUdevice), cuDeviceGet(jcuda.driver.CUdevice, int), cuDeviceGetProperties(jcuda.driver.CUdevprop, jcuda.driver.CUdevice), cuDeviceTotalMem(int[], jcuda.driver.CUdevice)

cuDriverGetVersion

public static int cuDriverGetVersion(int[] driverVersion)
Returns in driverVersion the version number of the installed CUDA driver. This function automatically returns CUDA_ERROR_INVALID_VALUE if the driverVersion argument is NULL.

Parameters:
driverVersion - - Returns the CUDA driver version
Returns:
CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE

cuCtxCreate

public static int cuCtxCreate(CUcontext pctx,
                              int flags,
                              CUdevice dev)
Create a CUDA context.

SYNOPSIS
CUresult cuCtxCreate(CUcontext* pCtx, unsigned int Flags, CUdevice dev);

DESCRIPTION
Creates a new CUDA context and associates it with the calling thread. The Flags parameter is described below. The context is created with a usage count of 1 and the caller of cuCtxCreate() must call cuCtxDe- stroy() or cuCtxDetach() when done using the context. If a context is already current to the thread, it is supplanted by the newly created context and may be restored by a subsequent call to cuCtxPopCurrent(). The two LSBs of the Flags parameter can be used to control how the OS thread which owns the CUDA context at the time of an API call interacts with the OS scheduler when waiting for results from the GPU.
The two LSBs of the Flags parameter can be used to control how the OS thread, which owns the CUDA context at the time of an API call, interacts with the OS scheduler when waiting for results from the GPU.
- CU_CTX_SCHED_AUTO: The default value if the Flags parameter is zero, uses a heuristic based on the number of active CUDA contexts in the process C and the number of logical processors in the system P. If C > P, then CUDA will yield to other OS threads when waiting for the GPU, otherwise CUDA will not yield while waiting for results and actively spin on the processor.
- CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for results from the GPU. This can decrease latency when waiting for the GPU, but may lower the performance of CPU threads if they are performing work in parallel with the CUDA thread.
- CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for results from the GPU. This can increase latency when waiting for the GPU, but can increase the performance of CPU threads performing work in parallel with the GPU.
- CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
- CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations. This flag must be set in order to allocate pinned host memory that is accessible to the GPU.

Note to Linux users:
Context creation will fail with CUDA_ERROR_UNKNOWN if the compute mode of the device is CU_COMPUTEMODE_PROHIBITED. Similarly, context creation will also fail with CUDA_ERROR_UNKNOWN if the compute mode for the device is set to CU_COMPUTEMODE_EXCLUSIVE and there is already an active context on the device. The function cuDeviceGetAttribute() can be used with CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the device. The nvidia-smi tool can be used to set the compute mode for devices. Documentation for nvidia-smi can be obtained by passing a -h option to it.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_DEVICE, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN,
See Also:
cuCtxAttach(jcuda.driver.CUcontext, int), cuCtxDetach(jcuda.driver.CUcontext), cuCtxDestroy(jcuda.driver.CUcontext), cuCtxPushCurrent(jcuda.driver.CUcontext), cuCtxPopCurrent(jcuda.driver.CUcontext)

cuCtxDestroy

public static int cuCtxDestroy(CUcontext ctx)
Destroy the current context context or a floating CuDA context.

SYNOPSIS
CUresult cuCtxDestroy(CUcontext ctx);

DESCRIPTION
Destroys the given CUDA context. If the context usage count is not equal to 1, or the context is current to any CPU thread other than the current one, this function fails. Floating contexts (detached from a CPU thread via cuCtxPopCurrent()) may be destroyed by this function.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuCtxAttach(jcuda.driver.CUcontext, int), cuCtxDetach(jcuda.driver.CUcontext), cuCtxPushCurrent(jcuda.driver.CUcontext), cuCtxPopCurrent(jcuda.driver.CUcontext)

cuCtxAttach

public static int cuCtxAttach(CUcontext pctx,
                              int flags)
Increment context usage-count.

SYNOPSIS
CUresult cuCtxAttach(CUcontext* pCtx, unsigned int Flags);

DESCRIPTION
Increments the usage count of the context and passes back a context handle in *pCtx that must be passed to cuCtxDetach() when the application is done with the context. cuCtxAttach() fails if there is no context current to the thread. Currently, the Flags parameter must be 0.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuCtxDetach(jcuda.driver.CUcontext), cuCtxGetDevice(jcuda.driver.CUdevice), cuCtxSynchronize()

cuCtxDetach

public static int cuCtxDetach(CUcontext ctx)
Decrement a context’s usage-count.

SYNOPSIS
CUresult cuCtxDetach(CUcontext ctx);

DESCRIPTION
Decrements the usage count of the context, and destroys the context if the usage count goes to 0. The context must be a handle that was passed back by cuCtxCreate() or cuCtxAttach(), and must be current to the calling thread.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT,
See Also:
cuCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuCtxAttach(jcuda.driver.CUcontext, int), cuCtxDestroy(jcuda.driver.CUcontext), cuCtxPushCurrent(jcuda.driver.CUcontext), cuCtxPopCurrent(jcuda.driver.CUcontext)

cuCtxPushCurrent

public static int cuCtxPushCurrent(CUcontext ctx)
Attach floating context to CPU thread.

SYNOPSIS
CUresult cuCtxPushCurrent(CUcontext ctx);

DESCRIPTION
Pushes the given context onto the CPU thread’s stack of current contexts. The specified context becomes the CPU thread’s current context, so all CUDA functions that operate on the current context are affected. The previous current context may be made current again by calling cuCtxDestroy() or cuCtxPopCur- rent(). The context must be "floating," i.e. not attached to any thread. Contexts are made to float by calling cuCtxPopCurrent().

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuCtxAttach(jcuda.driver.CUcontext, int), cuCtxDetach(jcuda.driver.CUcontext), cuCtxDestroy(jcuda.driver.CUcontext), cuCtxPopCurrent(jcuda.driver.CUcontext)

cuCtxPopCurrent

public static int cuCtxPopCurrent(CUcontext pctx)
Pops the current CUDA context from the current CPU thread.

SYNOPSIS
CUresult cuCtxPopCurrent(CUcontext *pctx);

DESCRIPTION
Pops the current CUDA context from the CPU thread. The CUDA context must have a usage count of 1. CUDA contexts have a usage count of 1 upon creation; the usage count may be incremented with cuCtxAttach() and decremented with cuCtxDetach(). If successful, cuCtxPopCurrent() passes back the context handle in *pctx. The context may then be made current to a different CPU thread by calling cuCtxPushCurrent(). Floating contexts may be destroyed by calling cuCtxDestroy(). If a context was current to the CPU thread before cuCtxCreate or cuCtxPushCurrent was called, this function makes that context current to the CPU thread again.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT,
See Also:
cuCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuCtxAttach(jcuda.driver.CUcontext, int), cuCtxDetach(jcuda.driver.CUcontext), cuCtxDestroy(jcuda.driver.CUcontext), cuCtxPushCurrent(jcuda.driver.CUcontext)

cuCtxGetDevice

public static int cuCtxGetDevice(CUdevice device)
Return device-ID for current context.

SYNOPSIS
CUresult cuCtxGetDevice(CUdevice* device);

DESCRIPTION
Returns in *device the ordinal of the current context’s device.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuCtxAttach(jcuda.driver.CUcontext, int), cuCtxDetach(jcuda.driver.CUcontext), cuCtxSynchronize()

cuCtxSynchronize

public static int cuCtxSynchronize()
Block for a context’s tasks to complete.

SYNOPSIS
CUresult cuCtxSynchronize(void);

DESCRIPTION
Blocks until the device has completed all preceding requested tasks. cuCtxSynchronize() returns an error if one of the preceding tasks failed.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT,
See Also:
cuCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuCtxAttach(jcuda.driver.CUcontext, int), cuCtxDetach(jcuda.driver.CUcontext), cuCtxGetDevice(jcuda.driver.CUdevice)

cuModuleLoad

public static int cuModuleLoad(CUmodule module,
                               java.lang.String fname)
Loads a compute module.

SYNOPSIS
CUresult cuModuleLoad(CUmodule* mod, const char* filename);

DESCRIPTION
Takes a file name filename and loads the corresponding module mod into the current context. The CUDA driver API does not attempt to lazily allocate the resources needed by a module; if the memory for functions and data (constant and global) needed by the module cannot be allocated, cuModuleLoad() fails. The file should be a cubin file as output by nvcc.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_FILE_NOT_FOUND,
See Also:
cuModuleLoadData(jcuda.driver.CUmodule, byte[]), cuModuleLoadFatBinary(jcuda.driver.CUmodule, byte[]), cuModuleUnload(jcuda.driver.CUmodule), cuModuleGetFunction(jcuda.driver.CUfunction, jcuda.driver.CUmodule, java.lang.String), cuModuleGetGlobal(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUmodule, java.lang.String), cuModuleGetTexRef(jcuda.driver.CUtexref, jcuda.driver.CUmodule, java.lang.String)

cuModuleLoadData

public static int cuModuleLoadData(CUmodule module,
                                   byte[] image)
Loads a module’s data.

SYNOPSIS
CUresult cuModuleLoadData(CUmodule* mod, const void* image);

DESCRIPTION
Takes a pointer image and loads the corresponding module mod into the current context. The pointer may be obtained by mapping a cubin file, passing a cubin file as a text string, or incorporating a cubin object into the executable resources and using operation system calls such as Windowsâ?? FindResource() to obtain the pointer.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
See Also:
cuModuleLoad(jcuda.driver.CUmodule, java.lang.String), cuModuleLoadFatBinary(jcuda.driver.CUmodule, byte[]), cuModuleUnload(jcuda.driver.CUmodule), cuModuleGetFunction(jcuda.driver.CUfunction, jcuda.driver.CUmodule, java.lang.String), cuModuleGetGlobal(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUmodule, java.lang.String), cuModuleGetTexRef(jcuda.driver.CUtexref, jcuda.driver.CUmodule, java.lang.String)

cuModuleLoadDataEx

public static int cuModuleLoadDataEx(CUmodule phMod,
                                     Pointer p,
                                     int numOptions,
                                     int[] options,
                                     Pointer optionValues)
Takes a pointer p and loads the corresponding module phMod into the current context. The pointer may be obtained by mapping a cubin or PTX file, passing a cubin or PTX file as a text string, or incorporating a cubin object into the executable resources and using operating system calls such as Windows FindResource() to obtain the pointer. Options are passed as an array via options and any corresponding parameters are passed in optionValues. The number of total options is supplied via numOptions. Any outputs will be returned via optionValues. Supported options are:
- CU_JIT_MAX_REGISTERS: input specifies the maximum number of registers per thread;
- CU_JIT_THREADS_PER_BLOCK: input specifies number of threads per block to target compilation for; output returns the number of threads the compiler actually targeted;
- CU_JIT_WALL_TIME: output returns the float value of wall clock time, in milliseconds, spent compiling the PTX code;
- CU_JIT_INFO_LOG_BUFFER: input is a pointer to a buffer in which to print any informational log messages from PTX assembly;
- CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: input is the size in bytes of the buffer; output is the number of bytes filled with messages;
- CU_JIT_ERROR_LOG_BUFFER: input is a pointer to a buffer in which to print any error log messages from PTX assembly;
- CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: input is the size in bytes of the buffer; output is the number of bytes filled with messages;
- CU_JIT_OPTIMIZATION_LEVEL: input is the level of optimization to apply to generated code (0 - 4), with 4 being the default and highest level;
- CU_JIT_TARGET_FROM_CUCONTEXT: causes compilation target to be determined based on current attached context (default);
- CU_JIT_TARGET: input is the compilation target based on supplied CUjit_target_enum; possible values are:
-– CU_TARGET_COMPUTE_10
-– CU_TARGET_COMPUTE_11
-– CU_TARGET_COMPUTE_12
-– CU_TARGET_COMPUTE_13

Parameters:
phMod - - Returned module
p - - Module data to load
numOptions - - Number of options
options - - Options for JIT
optionValues - - Option values for JIT
Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU
See Also:
cuModuleGetFunction(jcuda.driver.CUfunction, jcuda.driver.CUmodule, java.lang.String), cuModuleGetGlobal(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUmodule, java.lang.String), cuModuleGetTexRef(jcuda.driver.CUtexref, jcuda.driver.CUmodule, java.lang.String), cuModuleLoad(jcuda.driver.CUmodule, java.lang.String), cuModuleLoadData(jcuda.driver.CUmodule, byte[]), cuModuleLoadFatBinary(jcuda.driver.CUmodule, byte[]), cuModuleUnload(jcuda.driver.CUmodule)

cuModuleLoadFatBinary

public static int cuModuleLoadFatBinary(CUmodule module,
                                        byte[] fatCubin)
Loads a fat-binary object.

SYNOPSIS
CUresult cuModuleLoadFatBinary(CUmodule* mod, const void* fatBin);

DESCRIPTION
Takes a pointer fatBin and loads the corresponding module mod into the current context. The pointer represents a fat binary object, which is a collection of different cubin files, all representing the same device code but compiled and optimized for different architectures. There is currently no documented API for constructing and using fat binary objects by programmers, and therefore this function is an internal function in this version of CUDA. More information can be found in the nvcc document.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU,
See Also:
cuModuleLoad(jcuda.driver.CUmodule, java.lang.String), cuModuleLoadData(jcuda.driver.CUmodule, byte[]), cuModuleUnload(jcuda.driver.CUmodule), cuModuleGetFunction(jcuda.driver.CUfunction, jcuda.driver.CUmodule, java.lang.String), cuModuleGetGlobal(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUmodule, java.lang.String), cuModuleGetTexRef(jcuda.driver.CUtexref, jcuda.driver.CUmodule, java.lang.String)

cuModuleUnload

public static int cuModuleUnload(CUmodule hmod)
Unloads a module.

SYNOPSIS
CUresult cuModuleUnload(CUmodule mod);

DESCRIPTION
Unloads a module mod from the current context.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuModuleLoad(jcuda.driver.CUmodule, java.lang.String), cuModuleLoadData(jcuda.driver.CUmodule, byte[]), cuModuleLoadFatBinary(jcuda.driver.CUmodule, byte[]), cuModuleGetFunction(jcuda.driver.CUfunction, jcuda.driver.CUmodule, java.lang.String), cuModuleGetGlobal(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUmodule, java.lang.String), cuModuleGetTexRef(jcuda.driver.CUtexref, jcuda.driver.CUmodule, java.lang.String)

cuModuleGetFunction

public static int cuModuleGetFunction(CUfunction hfunc,
                                      CUmodule hmod,
                                      java.lang.String name)
Returns a function handle.

SYNOPSIS
CUresult cuModuleGetFunction(CUfunction* func, CUmodule mod, const char* funcname);

DESCRIPTION
Returns in *func the handle of the function of name funcname located in module mod. If no function of that name exists, cuModuleGetFunction() returns CUDA_ERROR_NOT_FOUND.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND,
See Also:
cuModuleLoad(jcuda.driver.CUmodule, java.lang.String), cuModuleLoadData(jcuda.driver.CUmodule, byte[]), cuModuleLoadFatBinary(jcuda.driver.CUmodule, byte[]), cuModuleUnload(jcuda.driver.CUmodule), cuModuleGetGlobal(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUmodule, java.lang.String), cuModuleGetTexRef(jcuda.driver.CUtexref, jcuda.driver.CUmodule, java.lang.String)

cuModuleGetGlobal

public static int cuModuleGetGlobal(CUdeviceptr dptr,
                                    int[] bytes,
                                    CUmodule hmod,
                                    java.lang.String name)
Returns a global pointer from a module.

SYNOPSIS
CUresult cuModuleGetGlobal(CUdeviceptr* devPtr, unsigned int* bytes, CUmodule mod, const char* globalname);

DESCRIPTION
Returns in *devPtr and *bytes the base pointer and size of the global of name globalname located in mod- ule mod. If no variable of that name exists, cuModuleGetGlobal() returns CUDA_ERROR_NOT_FOUND. Both parameters devPtr and bytes are optional. If one of them is null, it is ignored.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND,
See Also:
cuModuleLoad(jcuda.driver.CUmodule, java.lang.String), cuModuleLoadData(jcuda.driver.CUmodule, byte[]), cuModuleLoadFatBinary(jcuda.driver.CUmodule, byte[]), cuModuleUnload(jcuda.driver.CUmodule), cuModuleGetFunction(jcuda.driver.CUfunction, jcuda.driver.CUmodule, java.lang.String), cuModuleGetTexRef(jcuda.driver.CUtexref, jcuda.driver.CUmodule, java.lang.String)

cuModuleGetTexRef

public static int cuModuleGetTexRef(CUtexref pTexRef,
                                    CUmodule hmod,
                                    java.lang.String name)
Gets a handle to a texture-reference.

SYNOPSIS
CUresult cuModuleGetTexRef(CUtexref* texRef, CUmodule hmod, const char* texrefname);

DESCRIPTION
Returns in *texref the handle of the texture reference of name texrefname in the module mod. If no tex- ture reference of that name exists, cuModuleGetTexRef() returns CUDA_ERROR_NOT_FOUND. This texture reference handle should not be destroyed, since it will be destroyed when the module is unloaded.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND,
See Also:
cuModuleLoad(jcuda.driver.CUmodule, java.lang.String), cuModuleLoadData(jcuda.driver.CUmodule, byte[]), cuModuleLoadFatBinary(jcuda.driver.CUmodule, byte[]), cuModuleUnload(jcuda.driver.CUmodule), cuModuleGetFunction(jcuda.driver.CUfunction, jcuda.driver.CUmodule, java.lang.String), cuModuleGetGlobal(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUmodule, java.lang.String)

cuMemGetInfo

public static int cuMemGetInfo(int[] free,
                               int[] total)
Gets free and total memory.

SYNOPSIS
CUresult cuMemGetInfo(unsigned int* free, unsigned int* total);

DESCRIPTION
Returns in *free and *total respectively, the free and total amount of memory available for allocation by the CUDA context, in bytes.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int)

cuMemHostAlloc

public static int cuMemHostAlloc(Pointer pp,
                                 long bytes,
                                 int Flags)
Allocates bytes bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual memory ranges allocated with this function and automatically accelerates calls to functions such as cuMemcpyHtoD(). Since the memory can be accessed directly by the device, it can be read or written with much higher bandwidth than pageable memory obtained with functions such as malloc(). Allocating excessive amounts of pinned memory may degrade system performance, since it reduces the amount of memory available to the system for paging. As a result, this function is best used sparingly to allocate staging areas for data exchange between host and device.

The Flags parameter enables different options to be specified that affect the allocation, as follows. - CU_MEMHOSTALLOC_PORTABLE: The memory returned by this call will be considered as pinned memory by all CUDA contexts, not just the one that performed the allocation.
- CU_MEMHOSTALLOC_DEVICEMAP: Maps the allocation into the CUDA address space. The device pointer to the memory may be obtained by calling cuMemHostGetDevicePointer(). This feature is available only on GPUs with compute capability greater than or equal to 1.1.
- CU_MEMHOSTALLOC_WRITECOMBINED: Allocates the memory as write-combined (WC). WC memory can be transferred across the PCI Express bus more quickly on some system configurations, but cannot be read efficiently by most CPUs. WC memory is a good option for buffers that will be written by the CPU and read by the GPU via mapped pinned memory or host->device transfers.

All of these flags are orthogonal to one another: a developer may allocate memory that is portable, mapped and/or write-combined with no restrictions. The CUDA context must have been created with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTALLOC_MAPPED flag to have any effect. The CU_MEMHOSTALLOC_MAPPED flag may be specified on CUDA contexts for devices that do not support mapped pinned memory. The failure is deferred to cuMemHostGetDevicePointer() because the memory may be mapped into other CUDA contexts via the CU_MEMHOSTALLOC_PORTABLE flag. The memory allocated by this function must be freed with cuMemFreeHost().

Parameters:
pp - - Returned host pointer to page-locked memory
bytes - - Requested allocation size in bytes
Flags - - Flags for allocation request
Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY
See Also:
cuArray3DCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY3D_DESCRIPTOR), cuArray3DGetDescriptor(jcuda.driver.CUDA_ARRAY3D_DESCRIPTOR, jcuda.driver.CUarray), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayDestroy(jcuda.driver.CUarray), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocHost(jcuda.Pointer, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D), cuMemcpy2DAsync(jcuda.driver.CUDA_MEMCPY2D, jcuda.driver.CUstream), cuMemcpy2DUnaligned(jcuda.driver.CUDA_MEMCPY2D), cuMemcpy3D(jcuda.driver.CUDA_MEMCPY3D), cuMemcpy3DAsync(jcuda.driver.CUDA_MEMCPY3D, jcuda.driver.CUstream), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyAtoHAsync(jcuda.Pointer, jcuda.driver.CUarray, int, int, jcuda.driver.CUstream), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoHAsync(jcuda.Pointer, jcuda.driver.CUdeviceptr, int, jcuda.driver.CUstream), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyHtoAAsync(jcuda.driver.CUarray, int, jcuda.Pointer, int, jcuda.driver.CUstream), cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyHtoDAsync(jcuda.driver.CUdeviceptr, jcuda.Pointer, int, jcuda.driver.CUstream), cuMemFree(jcuda.driver.CUdeviceptr), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuMemGetInfo(int[], int[]), cuMemHostGetDevicePointer(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int)

cuMemHostGetDevicePointer

public static int cuMemHostGetDevicePointer(CUdeviceptr ret,
                                            Pointer p,
                                            int Flags)
Passes back the device pointer ret corresponding to the mapped, pinned host buffer p allocated by cuMemHostAlloc. cuMemHostGetDevicePointer() will fail if the CU_MEMALLOCHOST_DEVICEMAP flag was not specified at the time the memory was allocated, or if the function is called on a GPU that does not support mapped pinned memory. Flags provides for future releases. For now, it must be set to 0.

Parameters:
ret - - Returned device pointer
p - - Host pointer
Flags - - Options (must be 0)
Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
See Also:
cuArray3DCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY3D_DESCRIPTOR), cuArray3DGetDescriptor(jcuda.driver.CUDA_ARRAY3D_DESCRIPTOR, jcuda.driver.CUarray), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayDestroy(jcuda.driver.CUarray), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocHost(jcuda.Pointer, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D), cuMemcpy2DAsync(jcuda.driver.CUDA_MEMCPY2D, jcuda.driver.CUstream), cuMemcpy2DUnaligned(jcuda.driver.CUDA_MEMCPY2D), cuMemcpy3D(jcuda.driver.CUDA_MEMCPY3D), cuMemcpy3DAsync(jcuda.driver.CUDA_MEMCPY3D, jcuda.driver.CUstream), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyAtoHAsync(jcuda.Pointer, jcuda.driver.CUarray, int, int, jcuda.driver.CUstream), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoHAsync(jcuda.Pointer, jcuda.driver.CUdeviceptr, int, jcuda.driver.CUstream), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyHtoAAsync(jcuda.driver.CUarray, int, jcuda.Pointer, int, jcuda.driver.CUstream), cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyHtoDAsync(jcuda.driver.CUdeviceptr, jcuda.Pointer, int, jcuda.driver.CUstream), cuMemFree(jcuda.driver.CUdeviceptr), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuMemGetInfo(int[], int[]), cuMemHostAlloc(jcuda.Pointer, long, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int)

cuMemHostGetFlags

public static int cuMemHostGetFlags(int[] pFlags,
                                    Pointer p)
Passes back the flags pFlags that were specified when allocating the pinned host buffer p allocated by cuMemHostAlloc.

cuMemHostGetFlags() will fail if the pointer does not reside in an allocation performed by cuMemAllocHost() or cuMemHostAlloc().

Parameters:
pFlags - Returned flags word
p - Host pointer

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE

See Also:
cuMemAllocHost(jcuda.Pointer, int), cuMemHostAlloc(jcuda.Pointer, long, int)

cuMemAlloc

public static int cuMemAlloc(CUdeviceptr dptr,
                             int bytesize)
Allocates device memory.

SYNOPSIS
CUresult cuMemAlloc(CUdeviceptr* devPtr, unsigned int count);

DESCRIPTION
Allocates count bytes of linear memory on the device and returns in *devPtr a pointer to the allocated memory. The allocated memory is suitably aligned for any kind of variable. The memory is not cleared. If count is 0, cuMemAlloc() returns CUDA_ERROR_INVALID_VALUE.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
See Also:
cuMemGetInfo(int[], int[]), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int)

cuMemAllocPitch

public static int cuMemAllocPitch(CUdeviceptr dptr,
                                  int[] pPitch,
                                  int WidthInBytes,
                                  int Height,
                                  int ElementSizeBytes)
Allocates device memory.

SYNOPSIS
CUresult cuMemAllocPitch(CUdeviceptr* devPtr, unsigned int* pitch, unsigned int widthInBytes, unsigned int height, unsigned int elementSizeBytes);

DESCRIPTION
Allocates at least widthInBytes*height bytes of linear memory on the device and returns in *devPtr a pointer to the allocated memory. The function may pad the allocation to ensure that corresponding pointers in any given row will continue to meet the alignment requirements for coalescing as the address is updated from row to row. elementSizeBytes specifies the size of the largest reads and writes that will be performed on the memory range. elementSizeBytes may be 4, 8 or 16 (since coalesced memory transactions are not possible on other data sizes). If elementSizeBytes is smaller than the actual read/write size of a kernel, the kernel will run correctly, but possibly at reduced speed. The pitch returned in *pitch by cuMemAllocPitch() is the width in bytes of the allocation. The intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array. Given the row and column of an array element of type T, the address is computed as
T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;
The pitch returned by cuMemAllocPitch() is guaranteed to work with cuMemcpy2D() under all cir- cumstances. For allocations of 2D arrays, it is recommended that programmers consider performing pitch allocations using cuMemAllocPitch(). Due to alignment restrictions in the hardware, this is especially true if the application will be performing 2D memory copies between different regions of device memory (whether linear memory or CUDA arrays).

Parameters:
ElementSizeBytes - Size of biggest r/w to be performed by kernels on this memory: 4, 8 or 16 bytes
Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int)

cuMemFree

public static int cuMemFree(CUdeviceptr dptr)
Frees device memory.

SYNOPSIS
CUresult cuMemFree(CUdeviceptr devPtr);

DESCRIPTION
Frees the memory space pointed to by devPtr, which must have been returned by a previous call to cuMemMalloc() or cuMemMallocPitch().

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int)

cuMemGetAddressRange

public static int cuMemGetAddressRange(CUdeviceptr pbase,
                                       int[] psize,
                                       CUdeviceptr dptr)
Get information on memory allocations.

SYNOPSIS
CUresult cuMemGetAddressRange(CUdeviceptr* basePtr, unsigned int* size, CUdeviceptr devPtr);

DESCRIPTION
Returns the base address in *basePtr and size and *size of the allocation by cuMemAlloc() or cuMemAl- locPitch() that contains the input pointer devPtr. Both parameters basePtr and size are optional. If one of them is null, it is ignored.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int)

cuMemAllocHost

public static int cuMemAllocHost(Pointer pp,
                                 int bytesize)
Allocates page-locked host memory.

SYNOPSIS
CUresult cuMemAllocHost(void** hostPtr, unsigned int count);

DESCRIPTION
Allocates count bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual memory ranges allocated with this function and automatically accelerates calls to functions such as cuMemcpy(). Since the memory can be accessed directly by the device, it can be read or written with much higher bandwidth than pageable memory obtained with functions such as malloc(). Allocating excessive amounts of memory with cuMemAllocHost() may degrade system performance, since it reduces the amount of memory available to the system for paging. As a result, this function is best used sparingly to allocate staging areas for data exchange between host and device.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int)

cuMemFreeHost

public static int cuMemFreeHost(Pointer p)
Frees page-locked host memory.

SYNOPSIS
CUresult cuMemFreeHost(void* hostPtr);

DESCRIPTION
Frees the memory space pointed to by hostPtr, which must have been returned by a previous call to cuMemAllocHost().

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int)

cuMemcpyHtoD

public static int cuMemcpyHtoD(CUdeviceptr dstDevice,
                               Pointer srcHost,
                               int ByteCount)
Copy memory from Host to Device.

SYNOPSIS
CUresult cuMemcpyHtoD(CUdeviceptr dstDevPtr, const void *srcHostPtr, unsigned int count);
CUresult cuMemcpyHtoDAsync(CUdeviceptr dstDevPtr, const void *srcHostPtr, unsigned int count, CUstream stream);

DESCRIPTION
Copies from host memory to device memory. dstDevPtr and srcHostPtr specify the base addresses of the destination and source, respectively. count specifies the number of bytes to copy. cuMemcpyHtoDAsync() is asynchronous and can optionally be associated to a stream by passing a non- zero stream argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed as input.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpyDtoH

public static int cuMemcpyDtoH(Pointer dstHost,
                               CUdeviceptr srcDevice,
                               int ByteCount)
Copies memory from Device to Host.

SYNOPSIS
CUresult cuMemcpyDtoH(void* dstHostPtr, CUdeviceptr srcDevPtr, unsigned int count);
CUresult cuMemcpyDtoHAsync(void* dstHostPtr, CUdeviceptr srcDevPtr, unsigned int count, CUstream stream);

DESCRIPTION
Copies from device to host memory. dstHostPtr and srcDevPtrspecify the base addresses of the source and destination, respectively. countspecifies the number of bytes to copy. MemcpyDtoHAsync() is asynchronous and can optionally be associated to a stream by passing a non-zero stream argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed as input.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpyDtoD

public static int cuMemcpyDtoD(CUdeviceptr dstDevice,
                               CUdeviceptr srcDevice,
                               int ByteCount)
Copies memory from Device to Device.

SYNOPSIS
CUresult cuMemcpyDtoD(CUdeviceptr dstDevPtr, CUdeviceptr srcDevPtr, unsigned int count);

DESCRIPTION
Copies from device memory to device memory. dstDevice and srcDevPtr are the base pointers of the destination and source, respectively. count specifies the number of bytes to copy.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpyDtoA

public static int cuMemcpyDtoA(CUarray dstArray,
                               int dstIndex,
                               CUdeviceptr srcDevice,
                               int ByteCount)
Copies memory from Device to Array.

SYNOPSIS
CUresult cuMemcpyDtoA(CUarray dstArray, unsigned int dstIndex, CUdeviceptr srcDevPtr, unsigned int count);

DESCRIPTION
Copies from device memory to a 1D CUDA array. dstArray and dstIndex specify the CUDA array handle and starting index of the destination data. srcDevPtr specifies the base pointer of the source. count specifies the number of bytes to copy.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpyAtoD

public static int cuMemcpyAtoD(CUdeviceptr dstDevice,
                               CUarray hSrc,
                               int SrcIndex,
                               int ByteCount)
Copies memory from Array to Device.

SYNOPSIS
CUresult cuMemcpyAtoD(CUdeviceptr dstDevPtr, CUarray srcArray, unsigned int srcIndex, unsigned int count);

DESCRIPTION
Copies from a 1D CUDA array to device memory. dstDevPtr specifies the base pointer of the destination and must be naturally aligned with the CUDA array elements. srcArray and srcIndex specify the CUDA array handle and the index (in array elements) of the array element where the copy is to begin. count specifies the number of bytes to copy and must be evenly divisible by the array element size.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpyHtoA

public static int cuMemcpyHtoA(CUarray dstArray,
                               int dstIndex,
                               Pointer pSrc,
                               int ByteCount)
Copies memory from Host to Array.

SYNOPSIS
CUresult cuMemcpyHtoA(CUarray dstArray, unsigned int dstIndex, const void *srcHostPtr, unsigned int count);
CUresult cuMemcpyHtoAAsync(CUarray dstArray, unsigned int dstIndex, const void *srcHostPtr, unsigned int count, CUstream stream);

DESCRIPTION
Copies from host memory to a 1D CUDA array. dstArray and dstIndex specify the CUDA array handle and starting index of the destination data. srcHostPtr specify the base addresse of the source. count specifies the number of bytes to copy. cuMemcpyHtoAAsync() is asynchronous and can optionally be associated to a stream by passing a non- zero stream argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed as input.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpyAtoH

public static int cuMemcpyAtoH(Pointer dstHost,
                               CUarray srcArray,
                               int srcIndex,
                               int ByteCount)
Copies memory from Array to Host.

SYNOPSIS
CUresult cuMemcpyAtoH(void* dstHostPtr, CUarray srcArray, unsigned int srcIndex, unsigned int count);
CUresult cuMemcpyAtoHAsync(void* dstHostPtr, CUarray srcArray, unsigned int srcIndex, unsigned int count, CUstream stream);

DESCRIPTION
Copies from a 1D CUDA array to host memory. dstHostPtr specifies the base pointer of the destination. srcArray and srcIndex specify the CUDA array handle and starting index of the source data. count specifies the number of bytes to copy. cuMemcpyAtoHAsync() is asynchronous and can optionally be associated to a stream by passing a nonzero stream argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed as input.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpyAtoA

public static int cuMemcpyAtoA(CUarray dstArray,
                               int dstIndex,
                               CUarray srcArray,
                               int srcIndex,
                               int ByteCount)
Copies memory from Array to Array.

SYNOPSIS
CUresult cuMemcpyAtoA(CUarray dstArray, unsigned int dstIndex, CUarray srcArray, unsigned int srcIndex, unsigned int count);

DESCRIPTION
Copies from one 1D CUDA array to another. dstArray and srcArray specify the handles of the destination and source CUDA arrays for the copy, respectively. dstIndex and srcIndex specify the destination and source indices into the CUDA array. These values are in the range [0, Width-1] for the CUDA array; they are not byte offsets. count is the number of bytes to be copied. The size of the elements in the CUDA arrays need not be the same format, but the elements must be the same size; and count must be evenly divisible by that size.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpy2D

public static int cuMemcpy2D(CUDA_MEMCPY2D pCopy)
Copies memory for 2D arrays.

SYNOPSIS
CUresult cuMemcpy2D(const CUDA_MEMCPY2D* copyParam);
CUresult cuMemcpy2DUnaligned(const CUDA_MEMCPY2D* copyParam);
CUresult cuMemcpy2DAsync(const CUDA_MEMCPY2D* copyParam, CUstream stream);


DESCRIPTION
Perform a 2D memory copy according to the parameters specified in copyParam. The CUDA_MEMCPY2D structure is defined as such:
 typedef struct CUDA_MEMCPY2D_st {
     unsigned int srcXInBytes, srcY;
     CUmemorytype srcMemoryType;
     const void *srcHost;
     CUdeviceptr srcDevice;
     CUarray srcArray;
     unsigned int srcPitch;
     unsigned int dstXInBytes, dstY;
     CUmemorytype dstMemoryType;
     void *dstHost;
     CUdeviceptr dstDevice;
     CUarray dstArray;
     unsigned int dstPitch;
     unsigned int WidthInBytes;
     unsigned int Height;
 } CUDA_MEMCPY2D;
 
where: - srcMemoryType and dstMemoryType specify the type of memory of the source and destination, respectively; CUmemorytype_enum is defined as such:
 typedef enum CUmemorytype_enum {
     CU_MEMORYTYPE_HOST = 0x01,
     CU_MEMORYTYPE_DEVICE = 0x02,
     CU_MEMORYTYPE_ARRAY = 0x03
 } CUmemorytype;
 
If srcMemoryType is CU_MEMORYTYPE_HOST, srcHost and srcPitch specify the (host) base address of the source data and the bytes per row to apply. srcArray is ignored.
If srcMemoryType is CU_MEMORYTYPE_DEVICE, srcDevice and srcPitch specify the (device) base address of the source data and the bytes per row to apply. srcArray is ignored.
If srcMemoryType is CU_MEMORYTYPE_ARRAY, srcArray specifies the handle of the source data. srcHost, srcDevice and srcPitch are ignored.
If dstMemoryType is CU_MEMORYTYPE_HOST, dstHost and dstPitch specify the (host) base address of the destination data and the bytes per row to apply. dstArray is ignored.
If dstMemoryType is CU_MEMORYTYPE_DEVICE, dstDevice and dstPitch specify the (device) base address of the destination data and the bytes per row to apply. dstArray is ignored.
If dstMemoryType is CU_MEMORYTYPE_ARRAY, dstArray specifies the handle of the destination data. dstHost, dstDevice and dstPitch are ignored.
- srcXInBytes and srcY specify the base address of the source data for the copy. For host pointers, the starting address is
void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
For device pointers, the starting address is
CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; For CUDA arrays, srcXInBytes must be evenly divisible by the array element size.
- dstXInBytes and dstY specify the base address of the destination data for the copy. For host pointers, the base address is
void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
For device pointers, the starting address is
CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
For CUDA arrays, dstXInBytes must be evenly divisible by the array element size.
- WidthInBytes and Height specify the width (in bytes) and height of the 2D copy being performed. Any pitches must be greater than or equal to WidthInBytes.
cuMemcpy2D() returns an error if any pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH) cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D(). On intra-device memory copies (device - device, CUDA array - device, CUDA array - CUDA array), cuMemcpy2D() may fail for pitches not computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this restriction, but may run significantly slower in the cases where cuMemcpy2D() would have returned an error code. cuMemcpy2DAsync() is asynchronous and can optionally be associated to a stream by passing a non-zero stream argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed as input.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, 168, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpy3D(jcuda.driver.CUDA_MEMCPY3D), cuMemcpy3DAsync(jcuda.driver.CUDA_MEMCPY3D, jcuda.driver.CUstream)

cuMemcpy2DUnaligned

public static int cuMemcpy2DUnaligned(CUDA_MEMCPY2D pCopy)
See Also:
cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpy3D

public static int cuMemcpy3D(CUDA_MEMCPY3D pCopy)
Copies memory for 3D arrays.

SYNOPSIS
CUresult cuMemcpy3D(const CUDA_MEMCPY3D* copyParam);
CUresult cuMemcpy3DAsync(const CUDA_MEMCPY3D* copyParam, CUstream stream);


DESCRIPTION
Perform a 3D memory copy according to the parameters specified in copyParam. The CUDA_MEMCPY3D structure is defined as such:
 typedef struct CUDA_MEMCPY3D_st {
     unsigned int srcXInBytes, srcY, srcZ;
     unsigned int srcLOD;
     CUmemorytype srcMemoryType;
     const void *srcHost;
     CUdeviceptr srcDevice;
     CUarray srcArray;
     unsigned int srcPitch; // ignored when src is array
     unsigned int srcHeight; // ignored when src is array; may be 0 if Depth==1
     unsigned int dstXInBytes, dstY, dstZ;
     unsigned int dstLOD;
     CUmemorytype dstMemoryType;
     void *dstHost;
     CUdeviceptr dstDevice;
     CUarray dstArray;
     unsigned int dstPitch; // ignored when dst is array
     unsigned int dstHeight; // ignored when dst is array; may be 0 if Depth==1
     unsigned int WidthInBytes;
     unsigned int Height;
     unsigned int Depth;
 } CUDA_MEMCPY3D;
 
CUresult CUDAAPI cuMemcpy3D( const CUDA_MEMCPY3D *pCopy ); where: - srcMemoryType and dstMemoryType specify the type of memory of the source and destination, respectively; CUmemorytype_enum is defined as such:
 typedef enum CUmemorytype_enum {
     CU_MEMORYTYPE_HOST = 0x01,
     CU_MEMORYTYPE_DEVICE = 0x02,
     CU_MEMORYTYPE_ARRAY = 0x03
 } CUmemorytype;
 
If srcMemoryType is CU_MEMORYTYPE_HOST, srcHost, srcPitch and srcHeight specify the (host) base address of the source data, the bytes per row, and the height of each 2D slice of the 3D array. srcArray is ignored.
If srcMemoryType is CU_MEMORYTYPE_DEVICE, srcDevice, srcPitch and srcHeight specify the (device) base address of the source data, the bytes per row, and the height of each 2D slice of the 3D array. srcArray is ignored.
If srcMemoryType is CU_MEMORYTYPE_ARRAY, srcArray specifies the handle of the source data. srcHost, srcDevice, srcPitch and srcHeight are ignored.
If dstMemoryType is CU_MEMORYTYPE_HOST, dstHost and dstPitch specify the (host) base address of the destination data, the bytes per row, and the height of each 2D slice of the 3D array. dstArray is ignored.
If dstMemoryType is CU_MEMORYTYPE_DEVICE, dstDevice and dstPitch specify the (device) base address of the destination data, the bytes per row, and the height of each 2D slice of the 3D array. dstArray is ignored.
If dstMemoryType is CU_MEMORYTYPE_ARRAY, dstArray specifies the handle of the destination data. dstHost, dstDevice, dstPitch and dstHeight are ignored.
- srcXInBytes, srcY and srcZ specify the base address of the source data for the copy. For host pointers, the starting address is
void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes);
For device pointers, the starting address is
CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
For CUDA arrays, srcXInBytes must be evenly divisible by the array element size.
- dstXInBytes, dstY and dstZ specify the base address of the destination data for the copy. For host pointers, the base address is
void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes);
For device pointers, the starting address is
CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
For CUDA arrays, dstXInBytes must be evenly divisible by the array element size.
- WidthInBytes, Height and Depth specify the width (in bytes), height and depth of the 3D copy being performed. Any pitches must be greater than or equal to WidthInBytes.
cuMemcpy3D() returns an error if any pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH) cuMemcpy3DAsync() is asynchronous and can optionally be associated to a stream by passing a non-zero stream argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed as input. The srcLOD and dstLOD members of the CUDA_MEMCPY3D structure must be set to 0.

Returns:
CUDA_SUCCESS,
See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int), cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int), cuMemcpyDtoA(jcuda.driver.CUarray, int, jcuda.driver.CUdeviceptr, int), cuMemcpyAtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUarray, int, int), cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int), cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int), cuMemcpyAtoA(jcuda.driver.CUarray, int, jcuda.driver.CUarray, int, int), cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D), cuMemcpy2DAsync(jcuda.driver.CUDA_MEMCPY2D, jcuda.driver.CUstream)

cuMemcpyHtoDAsync

public static int cuMemcpyHtoDAsync(CUdeviceptr dstDevice,
                                    Pointer srcHost,
                                    int ByteCount,
                                    CUstream hStream)
Copies memory.

See Also:
cuMemcpyHtoD(jcuda.driver.CUdeviceptr, jcuda.Pointer, int)

cuMemcpyDtoHAsync

public static int cuMemcpyDtoHAsync(Pointer dstHost,
                                    CUdeviceptr srcDevice,
                                    int ByteCount,
                                    CUstream hStream)
Copies memory.

See Also:
cuMemcpyDtoH(jcuda.Pointer, jcuda.driver.CUdeviceptr, int)

cuMemcpyDtoDAsync

public static int cuMemcpyDtoDAsync(CUdeviceptr dstDevice,
                                    CUdeviceptr srcDevice,
                                    int ByteCount,
                                    CUstream hStream)
Copies memory.

See Also:
cuMemcpyDtoD(jcuda.driver.CUdeviceptr, jcuda.driver.CUdeviceptr, int)

cuMemcpyHtoAAsync

public static int cuMemcpyHtoAAsync(CUarray dstArray,
                                    int dstIndex,
                                    Pointer pSrc,
                                    int ByteCount,
                                    CUstream hStream)
Copies memory.

See Also:
cuMemcpyHtoA(jcuda.driver.CUarray, int, jcuda.Pointer, int)

cuMemcpyAtoHAsync

public static int cuMemcpyAtoHAsync(Pointer dstHost,
                                    CUarray srcArray,
                                    int srcIndex,
                                    int ByteCount,
                                    CUstream hStream)
Copies memory.

See Also:
cuMemcpyAtoH(jcuda.Pointer, jcuda.driver.CUarray, int, int)

cuMemcpy2DAsync

public static int cuMemcpy2DAsync(CUDA_MEMCPY2D pCopy,
                                  CUstream hStream)
Copies memory.

See Also:
cuMemcpy2D(jcuda.driver.CUDA_MEMCPY2D)

cuMemcpy3DAsync

public static int cuMemcpy3DAsync(CUDA_MEMCPY3D pCopy,
                                  CUstream hStream)
Copies memory.

See Also:
cuMemcpy3D(jcuda.driver.CUDA_MEMCPY3D)

cuMemsetD8

public static int cuMemsetD8(CUdeviceptr dstDevice,
                             char uc,
                             int N)
Initializes device memory.

SYNOPSIS
CUresult cuMemsetD8(CUdeviceptr dstDevPtr, unsigned char value, unsigned int count );
CUresult cuMemsetD16(CUdeviceptr dstDevPtr, unsigned short value, unsigned int count );
CUresult cuMemsetD32(CUdeviceptr dstDevPtr, unsigned int value, unsigned int count );

DESCRIPTION
Sets the memory range of count 8-, 16-, or 32-bit values to the specified value value.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int)

cuMemsetD16

public static int cuMemsetD16(CUdeviceptr dstDevice,
                              short us,
                              int N)
Initializes device memory.

See Also:
cuMemsetD8(jcuda.driver.CUdeviceptr, char, int)

cuMemsetD32

public static int cuMemsetD32(CUdeviceptr dstDevice,
                              int ui,
                              int N)
Initializes device memory.

See Also:
cuMemsetD8(jcuda.driver.CUdeviceptr, char, int)

cuMemsetD2D8

public static int cuMemsetD2D8(CUdeviceptr dstDevice,
                               int dstPitch,
                               char uc,
                               int Width,
                               int Height)
Initializes device memory.

SYNOPSIS
CUresult cuMemsetD2D8(CUdeviceptr dstDevPtr, unsigned int dstPitch, unsigned char value, unsigned int width, unsigned int height );
CUresult cuMemsetD2D16(CUdeviceptr dstDevPtr, unsigned int dstPitch, unsigned short value, unsigned int width, unsigned int height );
CUresult cuMemsetD2D32(CUdeviceptr dstDevPtr, unsigned int dstPitch, unsigned int value, unsigned int width, unsigned int height );

DESCRIPTION
Sets the 2D memory range of width 8-, 16-, or 32-bit values to the specified value value. height specifies the number of rows to set, and dstPitch specifies the number of bytes between each row. These functions perform fastest when the pitch is one that has been passed back by cuMemAllocPitch().

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int)

cuMemsetD2D16

public static int cuMemsetD2D16(CUdeviceptr dstDevice,
                                int dstPitch,
                                short us,
                                int Width,
                                int Height)
Initializes device memory.

See Also:
cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int)

cuMemsetD2D32

public static int cuMemsetD2D32(CUdeviceptr dstDevice,
                                int dstPitch,
                                int ui,
                                int Width,
                                int Height)
Initializes device memory.

See Also:
cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int)

cuFuncGetAttribute

public static int cuFuncGetAttribute(int[] pi,
                                     int attrib,
                                     CUfunction func)
Returns in pi the integer value of the attribute attrib on the kernel given by func. The supported attributes are:
- CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The number of threads beyond which a launch of the function would fail. This number depends on both the function and the device on which the function is currently loaded.
- CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: The size in bytes of statically-allocated shared memory required by this function. This does not include dynamically-allocated shared memory requested by the user at runtime.
• CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: The size in bytes of user-allocated constant memory required by this function.
• CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: The size in bytes of thread local memory used by this function.
• CU_FUNC_ATTRIBUTE_NUM_REGS: The number of registers used by each thread of this function.

Parameters:
pi - - Returned attribute value
attrib - - Attribute requested
func - - Function to query attribute of
Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuFuncSetSharedSize(jcuda.driver.CUfunction, int), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunch(jcuda.driver.CUfunction), cuLaunchGrid(jcuda.driver.CUfunction, int, int), cuLaunchGridAsync(jcuda.driver.CUfunction, int, int, jcuda.driver.CUstream)

cuFuncSetBlockShape

public static int cuFuncSetBlockShape(CUfunction hfunc,
                                      int x,
                                      int y,
                                      int z)
Sets the block-dimensions for the function.

SYNOPSIS
CUresult cuFuncSetBlockShape(CUfunction func, int x, int y, int z);

DESCRIPTION
Specifies the X, Y and Z dimensions of the thread blocks that are created when the kernel given by func is launched.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE,
See Also:
cuFuncSetSharedSize(jcuda.driver.CUfunction, int), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunch(jcuda.driver.CUfunction), cuLaunchGrid(jcuda.driver.CUfunction, int, int)

cuFuncSetSharedSize

public static int cuFuncSetSharedSize(CUfunction hfunc,
                                      int bytes)
Sets the shared-memory size for the function.

SYNOPSIS
CUresult cuFuncSetSharedSize(CUfunction func, unsigned int bytes);

DESCRIPTION
Sets through bytes the amount of shared memory that will be available to each thread block when the kernel given by func is launched.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE,
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunch(jcuda.driver.CUfunction), cuLaunchGrid(jcuda.driver.CUfunction, int, int)

cuFuncSetCacheConfig

public static int cuFuncSetCacheConfig(CUfunction hfunc,
                                       int config)
Sets the preferred cache configuration for the device function hfunc.

On devices where the L1 cache and shared memory use the same hardware resources, this sets through config the preferred cache configuration for the device function hfunc. This is only a preference. The driver will use the requested configuration if possible, but it is free to choose a different configuration if required to execute hfunc.
This setting does nothing on devices where the size of the L1 cache and shared memory are fixed.

Switching between configuration modes may insert a device-side synchronization point for streamed kernel launches.

The supported cache modes are:

CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
CU_FUNC_CACHE_PREFER_SHARED: function prefers larger shared memory and smaller L1 cache.
CU_FUNC_CACHE_PREFER_L1: function prefers larger L1 cache and smaller shared memory.


Parameters:
hfunc - Kernel to configure cache for
config - Requested cache configuration

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuFuncGetAttribute(int[], int, jcuda.driver.CUfunction), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunch(jcuda.driver.CUfunction), cuLaunchGrid(jcuda.driver.CUfunction, int, int), cuLaunchGridAsync(jcuda.driver.CUfunction, int, int, jcuda.driver.CUstream)

cuArrayCreate

public static int cuArrayCreate(CUarray pHandle,
                                CUDA_ARRAY_DESCRIPTOR pAllocateArray)
Creates a 1D or 2D CUDA array.

SYNOPSIS
CUresult cuArrayCreate(CUarray* array, const CUDA_ARRAY_DESCRIPTOR* desc);

DESCRIPTION
Creates a CUDA array according to the CUDA_ARRAY_DESCRIPTOR structure desc and returns a handle to the new CUDA array in *array. The CUDA_ARRAY_DESCRIPTOR structure is defined as such:
 typedef struct {
     unsigned int Width;
     unsigned int Height;
     CUarray_format Format;
     unsigned int NumChannels;
 } CUDA_ARRAY_DESCRIPTOR;
 
where: - Width and Height are the width and height of the CUDA array (in elements); the CUDA array is one-dimensional if height is 0, two-dimensional, otherwise; - NumChannels specifies the number of packed components per CUDA array element.; it may be 1, 2 or 4; - Format specifies the format of the elements; CUarray_format is defined as such:
 typedef enum CUarray_format_enum {
     CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
     CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
     CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
     CU_AD_FORMAT_SIGNED_INT8 = 0x08,
     CU_AD_FORMAT_SIGNED_INT16 = 0x09,
     CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
     CU_AD_FORMAT_HALF = 0x10,
     CU_AD_FORMAT_FLOAT = 0x20
 } CUarray_format;
 
Here are examples of CUDA array descriptions: - Description for a CUDA array of 2048 floats:
 CUDA_ARRAY_DESCRIPTOR desc;
 desc.Format = CU_AD_FORMAT_FLOAT;
 desc.NumChannels = 1;
 desc.Width = 2048;
 desc.Height = 1;
 
- Description for a 64 x 64 CUDA array of floats:
 CUDA_ARRAY_DESCRIPTOR desc;
 desc.Format = CU_AD_FORMAT_FLOAT;
 desc.NumChannels = 1;
 desc.Width = 64;
 desc.Height = 64;
 
- Description for a width x height CUDA array of 64-bit, 4x16-bit float16’s:
 CUDA_ARRAY_DESCRIPTOR desc;
 desc.FormatFlags = CU_AD_FORMAT_HALF;
 desc.NumChannels = 4;
 desc.Width = width;
 desc.Height = height;
 
- Description for a width x height CUDA array of 16-bit elements, each of which is two 8-bit unsigned chars:
 CUDA_ARRAY_DESCRIPTOR arrayDesc;
 desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INTS;
 desc.NumChannels = 2;
 desc.Width = width;
 desc.Height = height;
 

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN,
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int)

cuArrayGetDescriptor

public static int cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor,
                                       CUarray hArray)
Get a 1D or 2D CUDA array descriptor.

SYNOPSIS
CUresult cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR* arrayDesc, CUarray array)

DESCRIPTION
Returns in *arrayDesc a descriptor of the format and dimensions of the 1D or 2D CUDA array array. It is useful for subroutines that have been passed a CUDA array, but need to know the CUDA array parameters for validation or other purposes.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE,
See Also:
cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArray3DCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY3D_DESCRIPTOR), cuArray3DGetDescriptor(jcuda.driver.CUDA_ARRAY3D_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray)

cuArrayDestroy

public static int cuArrayDestroy(CUarray hArray)
Destroys a CUDA array.

SYNOPSIS
CUresult cuArrayDestroy(CUarray array);

DESCRIPTION
Destroys the CUDA array array.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ARRAY_IS_MAPPED,
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuMemsetD8(jcuda.driver.CUdeviceptr, char, int), cuMemsetD16(jcuda.driver.CUdeviceptr, short, int), cuMemsetD32(jcuda.driver.CUdeviceptr, int, int), cuMemsetD2D8(jcuda.driver.CUdeviceptr, int, char, int, int), cuMemsetD2D16(jcuda.driver.CUdeviceptr, int, short, int, int), cuMemsetD2D32(jcuda.driver.CUdeviceptr, int, int, int, int)

cuArray3DCreate

public static int cuArray3DCreate(CUarray pHandle,
                                  CUDA_ARRAY3D_DESCRIPTOR pAllocateArray)
Creates a CUDA array.

SYNOPSIS
CUresult cuArray3DCreate(CUarray* array, const CUDA_ARRAY3D_DESCRIPTOR* desc);

DESCRIPTION
Creates a CUDA array according to the CUDA_ARRAY3D_DESCRIPTOR structure desc and re- turns a handle to the new CUDA array in *array. The CUDA_ARRAY3D_DESCRIPTOR structure is defined as such:
 typedef struct {
     unsigned int Width;
     unsigned int Height;
     unsigned int Depth;
     CUarray_format Format;
     unsigned int NumChannels;
     unsigned int Flags;
 } CUDA_ARRAY3D_DESCRIPTOR;
 
where: - Width, Height and Depth are the width, height and depth of the CUDA array (in elements); the CUDA array is one-dimensional if height and depth are 0, two-dimensional if depth is 0, and three- dimensional otherwise; - NumChannels specifies the number of packed components per CUDA array element.; it may be 1, 2 or 4; - Format specifies the format of the elements; CUarray_format is defined as such:
 typedef enum CUarray_format_enum {
     CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
     CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
     CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
     CU_AD_FORMAT_SIGNED_INT8 = 0x08,
     CU_AD_FORMAT_SIGNED_INT16 = 0x09,
     CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
     CU_AD_FORMAT_HALF = 0x10,
     CU_AD_FORMAT_FLOAT = 0x20
 } CUarray_format;
 
- Flags provides for future features. For now, it must be set to 0. Here are examples of CUDA array descriptions: - Description for a CUDA array of 2048 floats:
 CUDA_ARRAY3D_DESCRIPTOR desc;
 desc.Format = CU_AD_FORMAT_FLOAT;
 desc.NumChannels = 1;
 desc.Width = 2048;
 desc.Height = 0;
 desc.Depth = 0;
 
- Description for a 64 x 64 CUDA array of floats:
 CUDA_ARRAY3D_DESCRIPTOR desc;
 desc.Format = CU_AD_FORMAT_FLOAT;
 desc.NumChannels = 1;
 desc.Width = 64;
 desc.Height = 64;
 desc.Depth = 0;
 
- Description for a width x height x depth CUDA array of 64-bit, 4x16-bit float16’s:
 CUDA_ARRAY_DESCRIPTOR desc;
 desc.FormatFlags = CU_AD_FORMAT_HALF;
 desc.NumChannels = 4;
 desc.Width = width;
 desc.Height = height;
 desc.Depth = depth;
 

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN,
See Also:
cuMemGetInfo(int[], int[]), cuMemAlloc(jcuda.driver.CUdeviceptr, int), cuMemAllocPitch(jcuda.driver.CUdeviceptr, int[], int, int, int), cuMemFree(jcuda.driver.CUdeviceptr), cuMemAllocHost(jcuda.Pointer, int), cuMemFreeHost(jcuda.Pointer), cuMemGetAddressRange(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUdeviceptr), cuArray3DGetDescriptor(jcuda.driver.CUDA_ARRAY3D_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray)

cuArray3DGetDescriptor

public static int cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor,
                                         CUarray hArray)
Get a 3D CUDA array descriptor.

SYNOPSIS
CUresult cuArray3DGetDescriptor(CUDA_ARRAY3D_DESC *arrayDesc, CUarray array);

DESCRIPTION
Returns in *arrayDesc a descriptor containing information on the format and dimensions of the CUDA array array. It is useful for subroutines that have been passed a CUDA array, but need to know the CUDA array parameters for validation or other purposes.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE,
See Also:
cuArrayCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY_DESCRIPTOR), cuArray3DCreate(jcuda.driver.CUarray, jcuda.driver.CUDA_ARRAY3D_DESCRIPTOR), cuArrayGetDescriptor(jcuda.driver.CUDA_ARRAY_DESCRIPTOR, jcuda.driver.CUarray), cuArrayDestroy(jcuda.driver.CUarray)

cuTexRefCreate

public static int cuTexRefCreate(CUtexref pTexRef)
Creates a texture-reference.

SYNOPSIS
CUresult cuTexRefCreate(CUtexref* texRef);

DESCRIPTION
Creates a texture reference and returns its handle in *texRef. Once created, the application must call cuTexRefSetArray() or cuTexRefSetAddress() to associate the reference with allocated memory. Other texture reference functions are used to specify the format and interpretation (addressing, filtering, etc.) to be used when the memory is read through this texture reference. To associate the texture reference with a texture ordinal for a given function, the application should call cuParamSetTexRef().

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefDestroy

public static int cuTexRefDestroy(CUtexref hTexRef)
Destroys a texture-reference.

SYNOPSIS
CUresult cuTexRefDestroy(CUtexref texRef);

DESCRIPTION
Destroys the texture reference.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefSetArray

public static int cuTexRefSetArray(CUtexref hTexRef,
                                   CUarray hArray,
                                   int Flags)
Binds an array to a texture-reference.

SYNOPSIS
CUresult cuTexRefSetArray(CUtexref texRef, CUarray array, unsigned int flags);

DESCRIPTION
Binds the CUDA array array to the texture reference texRef. Any previous address or CUDA ar- ray state associated with the texture reference is superseded by this function. flags must be set to CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound to texRef is unbound.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefSetAddress

public static int cuTexRefSetAddress(int[] ByteOffset,
                                     CUtexref hTexRef,
                                     CUdeviceptr dptr,
                                     int bytes)
Binds an address as a texture-reference.

SYNOPSIS
CUresult cuTexRefSetAddress(unsigned int* byteOffset, CUtexref texRef, CUdeviceptr devPtr, int bytes);

DESCRIPTION
Binds a linear address range to the texture reference texRef. Any previous address or CUDA array state associated with the texture reference is superseded by this function. Any memory previously bound to texRef is unbound. Since the hardware enforces an alignment requirement on texture base addresses, cuTexRefSetAddress() passes back a byte offset in *byteOffset that must be applied to texture fetches in order to read from the desired memory. This offset must be divided by the texel size and passed to kernels that read from the texture so they can be applied to the tex1Dfetch() function. If the device memory pointer was returned from cuMemAlloc(), the offset is guaranteed to be 0 and NULL may be passed as the ByteOffset parameter.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefSetFormat

public static int cuTexRefSetFormat(CUtexref hTexRef,
                                    int fmt,
                                    int NumPackedComponents)
Sets the format for a texture-reference.

SYNOPSIS
CUresult cuTexRefSetFormat(CUtexref texRef, CUarray_format format, int numPackedComponents)

DESCRIPTION
Specifies the format of the data to be read by the texture reference texRef. format and numPackedCom- ponents are exactly analogous to the Format and NumChannels members of the CUDA_ARRAY_DESCRIPTOR structure: They specify the format of each component and the number of components per array element.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefSetAddress2D

public static int cuTexRefSetAddress2D(CUtexref hTexRef,
                                       CUDA_ARRAY_DESCRIPTOR desc,
                                       CUdeviceptr dptr,
                                       int PitchInBytes)
Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated with the texture reference is superseded by this function. Any memory previously bound to hTexRef is unbound. When using a tex2D() function inside a kernel, we have to either call cuTexRefSetArray() to bind the corresponding texture reference to an array, or cuTexRefSetAddress2D() to bind the texture reference to linear memory. Function calls to cuTexRefSetFormat() cannot follow calls to cuTexRefSetAddress2D() for the same texture reference.

Parameters:
hTexRef - - Texture reference to bind
desc - - Descriptor of CUDA array
dptr - - Device pointer to bind
PitchInBytes - - Line pitch in bytes
Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref)

cuTexRefSetAddressMode

public static int cuTexRefSetAddressMode(CUtexref hTexRef,
                                         int dim,
                                         int am)
Set the addressing mode for a texture-reference.

SYNOPSIS
CUresult cuTexRefSetAddressMode(CUtexref texRef, int dim, CUaddress_mode mode);

DESCRIPTION
Specifies the addressing mode mode for the given dimension of the texture reference texRef. If dim is zero, the addressing mode is applied to the first parameter of the functions used to fetch from the texture; if dim is 1, the second, and so on. CUaddress_mode is defined as such:
 typedef enum CUaddress_mode_enum {
     CU_TR_ADDRESS_MODE_WRAP = 0,
     CU_TR_ADDRESS_MODE_CLAMP = 1,
     CU_TR_ADDRESS_MODE_MIRROR = 2,
 } CUaddress_mode;
 
Note that this call has no effect if texRef is bound to linear memory.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefSetFilterMode

public static int cuTexRefSetFilterMode(CUtexref hTexRef,
                                        int fm)
Sets the mode for a texture-reference.

SYNOPSIS
CUresult cuTexRefSetFilterMode(CUtexref texRef, CUfilter_mode mode);

DESCRIPTION
Specifies the filtering mode mode to be used when reading memory through the texture reference texRef. CUfilter_mode_enum is defined as such:
 typedef enum CUfilter_mode_enum {
     CU_TR_FILTER_MODE_POINT = 0,
     CU_TR_FILTER_MODE_LINEAR = 1
 } CUfilter_mode;
 
Note that this call has no effect if texRef is bound to linear memory.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefSetFlags

public static int cuTexRefSetFlags(CUtexref hTexRef,
                                   int Flags)
Sets flags for a texture-reference.

SYNOPSIS
CUresult cuTexRefSetFlags(CUtexref texRef, unsigned int Flags);

DESCRIPTION
Specifies optional flags to control the behavior of data returned through the texture reference. The valid flags are: - CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of having the texture promote integer data to floating point data in the range [0, 1]; - CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior of hav- ing the texture coordinates range from [0, Dim) where Dim is the width or height of the CUDA array. Instead, the texture coordinates [0, 1.0) reference the entire breadth of the array dimension

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefGetAddress

public static int cuTexRefGetAddress(CUdeviceptr pdptr,
                                     CUtexref hTexRef)
Gets the address associated with a texture-reference.

SYNOPSIS
CUresult cuTexRefGetAddress(CUdeviceptr* devPtr, CUtexref texRef);

DESCRIPTION
Returns in *devPtr the base address bound to the texture reference texRef, or returns CUDA_ERROR_INVALID_VALUE if the texture reference is not bound to any device memory range.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefGetArray

public static int cuTexRefGetArray(CUarray phArray,
                                   CUtexref hTexRef)
Gets the array bound to a texture-reference.

SYNOPSIS
CUresult cuTexRefGetArray(CUarray* array, CUtexref texRef);

DESCRIPTION
Returns in *array the CUDA array bound by the texture reference texRef, or returns CUDA_ERROR_INVALID_VALUE if the texture reference is not bound to any CUDA array.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefGetAddressMode

public static int cuTexRefGetAddressMode(int[] pam,
                                         CUtexref hTexRef,
                                         int dim)
Gets the addressing mode used by a texture-reference.

SYNOPSIS
CUresult cuTexRefGetAddressMode(CUaddress_mode* mode, CUtexref texRef, int dim);

DESCRIPTION
Returns in *mode the addressing mode corresponding to the dimension dim of the texture reference texRef. Currently the only valid values for dim are 0 and 1.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefGetFilterMode

public static int cuTexRefGetFilterMode(int[] pfm,
                                        CUtexref hTexRef)
Gets the filter-mode used by a texture-reference.

SYNOPSIS
CUresult cuTexRefGetFilterMode(CUfilter_mode* mode, CUtexref texRef);

DESCRIPTION
Returns in *mode the filtering mode of the texture reference texRef.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefGetFormat

public static int cuTexRefGetFormat(int[] pFormat,
                                    int[] pNumChannels,
                                    CUtexref hTexRef)
Gets the format used by a texture-reference.

SYNOPSIS
CUresult cuTexRefGetFormat(CUarray_format* format, int* numPackedComponents, CUtexref texRef);

DESCRIPTION
Returns in *format and *numPackedComponents the format and number of components of the CUDA array bound to the texture reference texRef. If format or numPackedComponents is null, it will be ignored.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFlags(int[], jcuda.driver.CUtexref)

cuTexRefGetFlags

public static int cuTexRefGetFlags(int[] pFlags,
                                   CUtexref hTexRef)
Gets the flags used by a texture-reference.

SYNOPSIS
CUresult cuTexRefGetFlags(unsigned int* flags, CUtexref texRef);

DESCRIPTION
Returns in *flags the flags of the texture reference texRef.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuTexRefCreate(jcuda.driver.CUtexref), cuTexRefDestroy(jcuda.driver.CUtexref), cuTexRefSetArray(jcuda.driver.CUtexref, jcuda.driver.CUarray, int), cuTexRefSetAddress(int[], jcuda.driver.CUtexref, jcuda.driver.CUdeviceptr, int), cuTexRefSetFormat(jcuda.driver.CUtexref, int, int), cuTexRefSetAddressMode(jcuda.driver.CUtexref, int, int), cuTexRefSetFilterMode(jcuda.driver.CUtexref, int), cuTexRefSetFlags(jcuda.driver.CUtexref, int), cuTexRefGetAddress(jcuda.driver.CUdeviceptr, jcuda.driver.CUtexref), cuTexRefGetArray(jcuda.driver.CUarray, jcuda.driver.CUtexref), cuTexRefGetAddressMode(int[], jcuda.driver.CUtexref, int), cuTexRefGetFilterMode(int[], jcuda.driver.CUtexref), cuTexRefGetFormat(int[], int[], jcuda.driver.CUtexref)

cuParamSetSize

public static int cuParamSetSize(CUfunction hfunc,
                                 int numbytes)
Sets the parameter-size for the function.

SYNOPSIS
CUresult cuParamSetSize(CUfunction func, unsigned int numbytes);

DESCRIPTION
Sets through numbytes the total size in bytes needed by the function parameters of function func.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuFuncSetSharedSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunch(jcuda.driver.CUfunction), cuLaunchGrid(jcuda.driver.CUfunction, int, int)

cuParamSeti

public static int cuParamSeti(CUfunction hfunc,
                              int offset,
                              int value)
Adds an integer parameter to the function’s argument list.

SYNOPSIS
CUresult cuParamSeti(CUfunction func, int offset, unsigned int value);

DESCRIPTION
Sets an integer parameter that will be specified the next time the kernel corresponding to func will be invoked. offset is a byte offset.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuFuncSetSharedSize(jcuda.driver.CUfunction, int), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunch(jcuda.driver.CUfunction), cuLaunchGrid(jcuda.driver.CUfunction, int, int)

cuParamSetf

public static int cuParamSetf(CUfunction hfunc,
                              int offset,
                              float value)
Adds a floating-point parameter to the function’s argument list.

SYNOPSIS
CUresult cuParamSetf(CUfunction func, int offset, float value);

DESCRIPTION
Sets a floating point parameter that will be specified the next time the kernel corresponding to func will be invoked. offset is a byte offset.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuFuncSetSharedSize(jcuda.driver.CUfunction, int), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunch(jcuda.driver.CUfunction), cuLaunchGrid(jcuda.driver.CUfunction, int, int)

cuParamSetv

public static int cuParamSetv(CUfunction hfunc,
                              int offset,
                              Pointer ptr,
                              int numbytes)
Adds arbitrary data to the function’s argument list.

SYNOPSIS
CUresult cuParamSetv(CUfunction func, int offset, void* ptr, unsigned int numbytes);

DESCRIPTION
Copies an arbitrary amount of data into the parameter space of the kernel corresponding to func. offset is a byte offset.

If the given Pointer is a device pointer, then the parameter is assumed to be the actual pointer (and not what the pointer points to), and the size (numbytes) should be Sizeof.POINTER.

Thus, to pass a pointer to device memory to a kernel, you may use
 
 CUdeviceptr d_in = new CUdeviceptr();
 JCudaDriver.cuMemAlloc(d_in, n);
 JCudaDriver.cuParamSetv(hfunc, offset, d_in, Sizeof.POINTER);
 

If the given Pointer is a pointer to host memory, then it is assumed that the parameter should be the value that the pointer points to, and thus, the size (numbytes) should be the Sizeof of the value that the pointer points to. Thus, to pass a short value to a kernel, you may use
 short value = 0;
 JCudaDriver.cuParamSetv(hfunc, offset, Pointer.to(new short[]{value}), Sizeof.SHORT);
 

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuFuncSetSharedSize(jcuda.driver.CUfunction, int), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunch(jcuda.driver.CUfunction), cuLaunchGrid(jcuda.driver.CUfunction, int, int)

cuParamSetTexRef

public static int cuParamSetTexRef(CUfunction hfunc,
                                   int texunit,
                                   CUtexref hTexRef)
Adds a texture-reference to the function’s argument list.

SYNOPSIS
CUresult cuParamSetTexRef(CUfunction func, int texunit, CUtexref texRef);

DESCRIPTION
Makes the CUDA array or linear memory bound to the texture reference texRef available to a device program as a texture. In this version of CUDA, the texture reference must be obtained via cuModuleGetTexRef() and the texunit parameter must be set to CU_PARAM_TR_DEFAULT.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuFuncSetSharedSize(jcuda.driver.CUfunction, int), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuLaunch(jcuda.driver.CUfunction), cuLaunchGrid(jcuda.driver.CUfunction, int, int)

cuLaunch

public static int cuLaunch(CUfunction f)
Launches a CUDA function.

SYNOPSIS
CUresult cuLaunch(CUfunction func);

DESCRIPTION
Invokes the kernel func on a 1Ã?1 grid of blocks. The block contains the number of threads specified by a previous call to cuFuncSetBlockShape().

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuFuncSetSharedSize(jcuda.driver.CUfunction, int), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunchGrid(jcuda.driver.CUfunction, int, int)

cuLaunchGrid

public static int cuLaunchGrid(CUfunction f,
                               int grid_width,
                               int grid_height)
Launches a CUDA function.

SYNOPSIS
CUresult cuLaunchGrid(CUfunction func, int grid_width, int grid_height); CUresult cuLaunchGridAsync(CUfunction func, int grid_width, int grid_height, CUstream stream);

DESCRIPTION
Invokes the kernel on a grid_width x grid_height grid of blocks. Each block contains the number of threads specified by a previous call to cuFuncSetBlockShape(). cuLaunchGridAsync() can optionally be associated to a stream by passing a non-zero stream argument.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
See Also:
cuFuncSetBlockShape(jcuda.driver.CUfunction, int, int, int), cuFuncSetSharedSize(jcuda.driver.CUfunction, int), cuParamSetSize(jcuda.driver.CUfunction, int), cuParamSeti(jcuda.driver.CUfunction, int, int), cuParamSetf(jcuda.driver.CUfunction, int, float), cuParamSetv(jcuda.driver.CUfunction, int, jcuda.Pointer, int), cuParamSetTexRef(jcuda.driver.CUfunction, int, jcuda.driver.CUtexref), cuLaunch(jcuda.driver.CUfunction)

cuLaunchGridAsync

public static int cuLaunchGridAsync(CUfunction f,
                                    int grid_width,
                                    int grid_height,
                                    CUstream hStream)
See Also:
cuLaunchGrid(jcuda.driver.CUfunction, int, int)

cuEventCreate

public static int cuEventCreate(CUevent phEvent,
                                int Flags)
Creates an event.

SYNOPSIS
CUresult cuEventCreate(CUevent* event, unsigned int flags);

DESCRIPTION
Creates an event with the flags specified via Flags. Valid flags include:
- CU_EVENT_DEFAULT: Default event creation flag
- CU_EVENT_BLOCKING_SYNC: Specifies that event should use blocking synchronization

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
See Also:
cuEventRecord(jcuda.driver.CUevent, jcuda.driver.CUstream), cuEventQuery(jcuda.driver.CUevent), cuEventSynchronize(jcuda.driver.CUevent), cuEventDestroy(jcuda.driver.CUevent), cuEventElapsedTime(float[], jcuda.driver.CUevent, jcuda.driver.CUevent)

cuEventRecord

public static int cuEventRecord(CUevent hEvent,
                                CUstream hStream)
Records an event.

SYNOPSIS
CUresult cuEventRecord(CUevent event, CUstream stream);

DESCRIPTION
Records an event. If stream is non-zero, the event is recorded after all preceding operations in the stream have been completed; otherwise, it is recorded after all preceding operations in the CUDA context have been completed. Since this operation is asynchronous, cuEventQuery() and/or cuEventSynchronize() must be used to determine when the event has actually been recorded. If cuEventRecord() has previously been called and the event has not been recorded yet, this function returns CUDA_ERROR_INVALID_VALUE.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE,
See Also:
cuEventCreate(jcuda.driver.CUevent, int), cuEventQuery(jcuda.driver.CUevent), cuEventSynchronize(jcuda.driver.CUevent), cuEventDestroy(jcuda.driver.CUevent), cuEventElapsedTime(float[], jcuda.driver.CUevent, jcuda.driver.CUevent)

cuEventQuery

public static int cuEventQuery(CUevent hEvent)
Queries an event’s status.

SYNOPSIS
CUresult cuEventQuery(CUevent event);

DESCRIPTION
Returns CUDA_SUCCESS if the event has actually been recorded, or CUDA_ERROR_NOT_READY if not. If cuEventRecord() has not been called on this event, the function returns CUDA_ERROR_INVALID_VALUE

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_NOT_READY,
See Also:
cuEventCreate(jcuda.driver.CUevent, int), cuEventRecord(jcuda.driver.CUevent, jcuda.driver.CUstream), cuEventSynchronize(jcuda.driver.CUevent), cuEventDestroy(jcuda.driver.CUevent), cuEventElapsedTime(float[], jcuda.driver.CUevent, jcuda.driver.CUevent)

cuEventSynchronize

public static int cuEventSynchronize(CUevent hEvent)
Waits for an event to complete.

SYNOPSIS
CUresult cuEventSynchronize(CUevent event);

DESCRIPTION
Blocks until the event has actually been recorded. If cuEventRecord() has not been called on this event, the function returns CUDA_ERROR_INVALID_VALUE.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
See Also:
cuEventCreate(jcuda.driver.CUevent, int), cuEventRecord(jcuda.driver.CUevent, jcuda.driver.CUstream), cuEventQuery(jcuda.driver.CUevent), cuEventDestroy(jcuda.driver.CUevent), cuEventElapsedTime(float[], jcuda.driver.CUevent, jcuda.driver.CUevent)

cuEventDestroy

public static int cuEventDestroy(CUevent hEvent)
Destroys an event.

SYNOPSIS
CUresult cuEventDestroy(CUevent event);

DESCRIPTION
Destroys the event.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
See Also:
cuEventCreate(jcuda.driver.CUevent, int), cuEventRecord(jcuda.driver.CUevent, jcuda.driver.CUstream), cuEventQuery(jcuda.driver.CUevent), cuEventSynchronize(jcuda.driver.CUevent), cuEventElapsedTime(float[], jcuda.driver.CUevent, jcuda.driver.CUevent)

cuEventElapsedTime

public static int cuEventElapsedTime(float[] pMilliseconds,
                                     CUevent hStart,
                                     CUevent hEnd)
Computes the elapsed time between two events.

SYNOPSIS
CUresult cuEventDestroy(float* time, CUevent start, CUevent end);

DESCRIPTION
Computes the elapsed time between two events (in milliseconds with a resolution of around 0.5 microseconds). If either event has not been recorded yet, this function returns CUDA_ERROR_INVALID_VALUE. If either event has been recorded with a non-zero stream, the result is undefined.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE,
See Also:
cuEventCreate(jcuda.driver.CUevent, int), cuEventRecord(jcuda.driver.CUevent, jcuda.driver.CUstream), cuEventQuery(jcuda.driver.CUevent), cuEventSynchronize(jcuda.driver.CUevent), cuEventDestroy(jcuda.driver.CUevent)

cuStreamCreate

public static int cuStreamCreate(CUstream phStream,
                                 int Flags)
Create a stream.

SYNOPSIS
CUresult cuStreamCreate(CUstream* stream, unsigned int flags);

DESCRIPTION
Creates a stream. At present, flags is required to be 0.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
See Also:
cuStreamQuery(jcuda.driver.CUstream), cuStreamSynchronize(jcuda.driver.CUstream), cuStreamDestroy(jcuda.driver.CUstream)

cuStreamQuery

public static int cuStreamQuery(CUstream hStream)
Determine status of a compute stream.

SYNOPSIS
CUresult cuStreamQuery(CUstream stream);

DESCRIPTION
Returns CUDA_SUCCESS if all operations in the stream have completed, or CUDA_ERROR_NOT_READY if not.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_NOT_READY,
See Also:
cuStreamCreate(jcuda.driver.CUstream, int), cuStreamSynchronize(jcuda.driver.CUstream), cuStreamDestroy(jcuda.driver.CUstream)

cuStreamSynchronize

public static int cuStreamSynchronize(CUstream hStream)
Block until a stream’s tasks are completed.

SYNOPSIS
CUresult cuStreamSynchronize(CUstream stream);

DESCRIPTION
Blocks until the device has completed all operations in the stream.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
See Also:
cuStreamCreate(jcuda.driver.CUstream, int), cuStreamQuery(jcuda.driver.CUstream), cuStreamDestroy(jcuda.driver.CUstream)

cuStreamDestroy

public static int cuStreamDestroy(CUstream hStream)
Destroys a stream.

SYNOPSIS
CUresult cuStreamDestroy(CUstream stream);

DESCRIPTION
Destroys the stream.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
See Also:
cuStreamCreate(jcuda.driver.CUstream, int), cuStreamQuery(jcuda.driver.CUstream), cuStreamSynchronize(jcuda.driver.CUstream)

cuGLInit

public static int cuGLInit()
Deprecated. As of CUDA 3.0

Initializes GL interoperability.

SYNOPSIS
CUresult cuGLInit(void);

DESCRIPTION
Initializes OpenGL interoperability. It must be called before performing any other OpenGL interoperability operations. It may fail if the needed OpenGL driver facilities are not available.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN,
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGLRegisterBufferObject(int), cuGLMapBufferObject(jcuda.driver.CUdeviceptr, int[], int), cuGLUnmapBufferObject(int), cuGLUnregisterBufferObject(int)

cuGLCtxCreate

public static int cuGLCtxCreate(CUcontext pCtx,
                                int Flags,
                                CUdevice device)
Create a CUDA context for interoperability with OpenGL.

SYNOPSIS
CUresult cuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device);

DESCRIPTION
Creates a new CUDA context, initializes OpenGL interoperability, and associates the CUDA context with the calling thread. It must be called before performing any other OpenGL interoperability operations. It may fail if the needed OpenGL driver facilities are not available. For usage of the Flags parameter, see cuCtxCreate.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
See Also:
cuCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGLInit(), cuGLRegisterBufferObject(int), cuGLMapBufferObject(jcuda.driver.CUdeviceptr, int[], int), cuGLUnmapBufferObject(int), cuGLUnregisterBufferObject(int)

cuGraphicsGLRegisterBuffer

public static int cuGraphicsGLRegisterBuffer(CUgraphicsResource pCudaResource,
                                             int buffer,
                                             int Flags)
Registers the buffer object specified by buffer for access by CUDA. A handle to the registered object is returned as pCudaResource. The map flags Flags specify the intended usage, as follows:

Parameters:
pCudaResource - Pointer to the returned object handle
buffer - name of buffer object to be registered
Flags - Map flags

Returns:
CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_INVALID_CONTEXT,
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGraphicsUnregisterResource(jcuda.driver.CUgraphicsResource), cuGraphicsMapResources(int, jcuda.driver.CUgraphicsResource[], jcuda.driver.CUstream), cuGraphicsResourceGetMappedPointer(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUgraphicsResource)

cuGraphicsGLRegisterImage

public static int cuGraphicsGLRegisterImage(CUgraphicsResource pCudaResource,
                                            int image,
                                            int target,
                                            int Flags)
Registers the texture or renderbuffer object specified by image for access by CUDA. target must match the type of the object. A handle to the registered object is returned as pCudaResource. The map flags Flags specify the intended usage, as follows: Parameters:
pCudaResource - Pointer to the returned object handle
image - name of texture or renderbuffer object to be registered
target - Identifies the type of object specified by image, and must be one of GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE, GL_TEXTURE_CUBE_MAP, GL_TEXTURE_3D, GL_TEXTURE_2D_ARRAY, or GL_RENDERBUFFER.
Flags - Map flags

Returns:
CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_INVALID_CONTEXT,
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGraphicsUnregisterResource(jcuda.driver.CUgraphicsResource), cuGraphicsMapResources(int, jcuda.driver.CUgraphicsResource[], jcuda.driver.CUstream), cuGraphicsSubResourceGetMappedArray(jcuda.driver.CUarray, jcuda.driver.CUgraphicsResource, int, int)

cuGLRegisterBufferObject

public static int cuGLRegisterBufferObject(int bufferobj)
Deprecated. As of CUDA 3.0

Registers a GL buffer object.

SYNOPSIS
CUresult cuGLRegisterBufferObject(GLuint bufferObj);

DESCRIPTION
Registers the buffer object of ID bufferObj for access by CUDA. This function must be called before CUDA can map the buffer object. While it is registered, the buffer object cannot be used by any OpenGL commands except as a data source for OpenGL drawing commands.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_ALREADY_MAPPED,
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGLInit(), cuGLMapBufferObject(jcuda.driver.CUdeviceptr, int[], int), cuGLUnmapBufferObject(int), cuGLUnregisterBufferObject(int)

cuGLMapBufferObject

public static int cuGLMapBufferObject(CUdeviceptr dptr,
                                      int[] size,
                                      int bufferobj)
Deprecated. As of CUDA 3.0

Maps a GL buffer object.

SYNOPSIS
CUresult cuGLMapBufferObject(CUdeviceptr* devPtr, unsigned int* size, GLuint bufferObj);

DESCRIPTION
Maps the buffer object of ID bufferObj into the address space of the current CUDA context and returns in *devPtr and *size the base pointer and size of the resulting mapping.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_MAP_FAILED,
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGLInit(), cuGLRegisterBufferObject(int), cuGLUnmapBufferObject(int), cuGLUnregisterBufferObject(int)

cuGLUnmapBufferObject

public static int cuGLUnmapBufferObject(int bufferobj)
Deprecated. As of CUDA 3.0

Unmaps a GL buffer object.

SYNOPSIS
CUresult cuGLUnmapBufferObject(GLuint bufferObj);

DESCRIPTION
Unmaps the buffer object of ID bufferObj for access by CUDA.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGLInit(), cuGLRegisterBufferObject(int), cuGLMapBufferObject(jcuda.driver.CUdeviceptr, int[], int), cuGLUnregisterBufferObject(int)

cuGLUnregisterBufferObject

public static int cuGLUnregisterBufferObject(int bufferobj)
Deprecated. As of CUDA 3.0

Unregister a GL buffer object.

SYNOPSIS
CUresult cuGLUnregisterBufferObject(GLuint bufferObj);

DESCRIPTION
Unregisters the buffer object of ID bufferObj for access by CUDA.

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGLInit(), cuGLRegisterBufferObject(int), cuGLMapBufferObject(jcuda.driver.CUdeviceptr, int[], int), cuGLUnmapBufferObject(int)

cuGLSetBufferObjectMapFlags

public static int cuGLSetBufferObjectMapFlags(int buffer,
                                              int Flags)
Deprecated. As of CUDA 3.0

Sets the map flags for the registered buffer object of ID bufferobj.

Changes to Flags will take effect the next time bufferobj is mapped. The Flags argument may be any of the following:
If bufferobj has not been registered for use with CUDA, then CUDA_ERROR_INVALID_HANDLE is returned. If bufferobj is presently mapped for access by CUDA, then CUDA_ERROR_ALREADY_MAPPED is returned.

Parameters:
bufferobj - Buffer object to unmap
Flags - Map flags

Returns:
CUDA_SUCCESS, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_INVALID_CONTEXT,
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGLInit(), cuGLMapBufferObject(jcuda.driver.CUdeviceptr, int[], int), cuGLRegisterBufferObject(int), cuGLUnregisterBufferObject(int), cuGLUnmapBufferObject(int), cuGLMapBufferObjectAsync(jcuda.driver.CUdeviceptr, int[], int, jcuda.driver.CUstream), cuGLUnmapBufferObjectAsync(int, jcuda.driver.CUstream)

cuGLMapBufferObjectAsync

public static int cuGLMapBufferObjectAsync(CUdeviceptr dptr,
                                           int[] size,
                                           int buffer,
                                           CUstream hStream)
Deprecated. As of CUDA 3.0

Maps the buffer object of ID bufferobj into the address space of the current CUDA context and returns in *dptr and *size the base pointer and size of the resulting mapping.
Stream hStream in the current CUDA context is synchronized with the current GL context.

Parameters:
dptr - Returned mapped base pointer
size - Returned size of mapping
bufferobj - Buffer object to map
hStream - Stream to synchronize

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_MAP_FAILED
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGLInit(), cuGLRegisterBufferObject(int), cuGLUnmapBufferObject(int), cuGLUnregisterBufferObject(int), cuGLUnmapBufferObjectAsync(int, jcuda.driver.CUstream), cuGLSetBufferObjectMapFlags(int, int)

cuGLUnmapBufferObjectAsync

public static int cuGLUnmapBufferObjectAsync(int buffer,
                                             CUstream hStream)
Deprecated. As of CUDA 3.0

Unmaps the buffer object of ID bufferobj for access by CUDA.

Stream hStream in the current CUDA context is synchronized with the current GL context.

Parameters:
bufferobj - Buffer object to unmap
hStream - Stream to synchronize

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
See Also:
cuGLCtxCreate(jcuda.driver.CUcontext, int, jcuda.driver.CUdevice), cuGLInit(), cuGLMapBufferObject(jcuda.driver.CUdeviceptr, int[], int), cuGLRegisterBufferObject(int), cuGLUnregisterBufferObject(int), cuGLMapBufferObjectAsync(jcuda.driver.CUdeviceptr, int[], int, jcuda.driver.CUstream), cuGLSetBufferObjectMapFlags(int, int)

cuGraphicsUnregisterResource

public static int cuGraphicsUnregisterResource(CUgraphicsResource resource)
Unregisters the graphics resource resource so it is not accessible by CUDA unless registered again.
If resource is invalid then CUDA_ERROR_INVALID_HANDLE is returned.

Parameters:
resource - Resource to unregister

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_UNKNOWN
See Also:
cuGraphicsGLRegisterBuffer(jcuda.driver.CUgraphicsResource, int, int), cuGraphicsGLRegisterImage(jcuda.driver.CUgraphicsResource, int, int, int)

cuGraphicsSubResourceGetMappedArray

public static int cuGraphicsSubResourceGetMappedArray(CUarray pArray,
                                                      CUgraphicsResource resource,
                                                      int arrayIndex,
                                                      int mipLevel)
Returns in *pArray an array through which the subresource of the mapped graphics resource resource which corresponds to array index arrayIndex and mipmap level mipLevel may be accessed. The value set in *pArray may change every time that resource is mapped.

If resource is not a texture then it cannot be accessed via an array and CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. If arrayIndex is not a valid array index for resource then CUDA_ERROR_INVALID_VALUE is returned. If mipLevel is not a valid mipmap level for resource then CUDA_ERROR_INVALID_VALUE is returned. If resource is not mapped then CUDA_ERROR_NOT_MAPPED is returned.

Parameters:
pArray - Returned array through which a subresource of resource may be accessed
resource - Mapped resource to access
arrayIndex - Array index for array textures or cubemap face index as defined by CUarray_cubemap_face for cubemap textures for the subresource to access
mipLevel - Mipmap level for the subresource to access

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_NOT_MAPPED CUDA_ERROR_NOT_MAPPED_AS_ARRAY
See Also:
cuGraphicsResourceGetMappedPointer(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUgraphicsResource)

cuGraphicsResourceGetMappedPointer

public static int cuGraphicsResourceGetMappedPointer(CUdeviceptr pDevPtr,
                                                     int[] pSize,
                                                     CUgraphicsResource resource)
Returns in *pDevPtr a pointer through which the mapped graphics resource resource may be accessed. Returns in pSize the size of the memory in bytes which may be accessed from that pointer. The value set in pPointer may change every time that resource is mapped.

If resource is not a buffer then it cannot be accessed via a pointer and CUDA_ERROR_NOT_MAPPED_AS_POINTER is returned. If resource is not mapped then CUDA_ERROR_NOT_MAPPED is returned.
Parameters:
pDevPtr - Returned pointer through which resource may be accessed
pSize - Returned size of the buffer accessible starting at *pPointer
resource - Mapped resource to access

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_NOT_MAPPED CUDA_ERROR_NOT_MAPPED_AS_POINTER
See Also:
cuGraphicsMapResources(int, jcuda.driver.CUgraphicsResource[], jcuda.driver.CUstream), cuGraphicsSubResourceGetMappedArray(jcuda.driver.CUarray, jcuda.driver.CUgraphicsResource, int, int)

cuGraphicsResourceSetMapFlags

public static int cuGraphicsResourceSetMapFlags(CUgraphicsResource resource,
                                                int flags)
Set flags for mapping the graphics resource resource.

Changes to flags will take effect the next time resource is mapped. The flags argument may be any of the following: If resource is presently mapped for access by CUDA then CUDA_ERROR_ALREADY_MAPPED is returned. If flags is not one of the above values then CUDA_ERROR_INVALID_VALUE is returned.

Parameters:
resource - Registered resource to set flags for
flags - Parameters for resource mapping

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED
See Also:
cuGraphicsMapResources(int, jcuda.driver.CUgraphicsResource[], jcuda.driver.CUstream)

cuGraphicsMapResources

public static int cuGraphicsMapResources(int count,
                                         CUgraphicsResource[] resources,
                                         CUstream hStream)
Maps the count graphics resources in resources for access by CUDA.

The resources in resources may be accessed by CUDA until they are unmapped. The graphics API from which resources were registered should not access any resources while they are mapped by CUDA. If an application does so, the results are undefined.

This function provides the synchronization guarantee that any graphics calls issued before cuGraphicsMapResources() will complete before any subsequent CUDA work issued in stream begins.

If resources includes any duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any of resources are presently mapped for access by CUDA then CUDA_ERROR_ALREADY_MAPPED is returned.
Parameters:
count - Number of resources to map
resources - Resources to map for CUDA usage
hStream - Stream with which to synchronize

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_UNKNOWN
See Also:
cuGraphicsResourceGetMappedPointer(jcuda.driver.CUdeviceptr, int[], jcuda.driver.CUgraphicsResource), cuGraphicsSubResourceGetMappedArray(jcuda.driver.CUarray, jcuda.driver.CUgraphicsResource, int, int), cuGraphicsUnmapResources(int, jcuda.driver.CUgraphicsResource[], jcuda.driver.CUstream)

cuGraphicsUnmapResources

public static int cuGraphicsUnmapResources(int count,
                                           CUgraphicsResource[] resources,
                                           CUstream hStream)
Unmaps the count graphics resources in resources.

Once unmapped, the resources in resources may not be accessed by CUDA until they are mapped again.

This function provides the synchronization guarantee that any CUDA work issued in stream before cuGraphicsUnmapResources() will complete before any subsequently issued graphics work begins.

If resources includes any duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any of resources are not presently mapped for access by CUDA then CUDA_ERROR_NOT_MAPPED is returned.

Parameters:
count - Number of resources to unmap
resources - Resources to unmap
hStream - Stream with which to synchronize

Returns:
CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_NOT_MAPPED, CUDA_ERROR_UNKNOWN
See Also:
cuGraphicsMapResources(int, jcuda.driver.CUgraphicsResource[], jcuda.driver.CUstream)