Package | Description |
---|---|
org.bytedeco.cuda.cudart | |
org.bytedeco.cuda.cudnn | |
org.bytedeco.cuda.global | |
org.bytedeco.cuda.nppc | |
org.bytedeco.cuda.nvcomp | |
org.bytedeco.cuda.nvjpeg |
Modifier and Type | Method and Description |
---|---|
CUstream_st |
CUDA_LAUNCH_PARAMS_v1.hStream()
Stream identifier
|
CUstream_st |
CUlaunchConfig.hStream()
Stream identifier
|
CUstream_st |
CUDA_GRAPH_INSTANTIATE_PARAMS.hUploadStream()
Upload stream
|
CUstream_st |
cudaLaunchParams.stream()
Stream identifier
|
CUstream_st |
cudaLaunchConfig_t.stream()
Stream identifier
|
CUstream_st |
cudaGraphInstantiateParams.uploadStream()
Upload stream
|
Modifier and Type | Method and Description |
---|---|
void |
CUstreamCallback.call(CUstream_st hStream,
int status,
Pointer userData) |
void |
cudaStreamCallback_t.call(CUstream_st stream,
int status,
Pointer userData) |
CUDA_LAUNCH_PARAMS_v1 |
CUDA_LAUNCH_PARAMS_v1.hStream(CUstream_st setter) |
CUlaunchConfig |
CUlaunchConfig.hStream(CUstream_st setter) |
CUDA_GRAPH_INSTANTIATE_PARAMS |
CUDA_GRAPH_INSTANTIATE_PARAMS.hUploadStream(CUstream_st setter) |
cudaLaunchParams |
cudaLaunchParams.stream(CUstream_st setter) |
cudaLaunchConfig_t |
cudaLaunchConfig_t.stream(CUstream_st setter) |
cudaGraphInstantiateParams |
cudaGraphInstantiateParams.uploadStream(CUstream_st setter) |
Modifier and Type | Method and Description |
---|---|
CUstream_st |
cudnnDebug_t.stream() |
Modifier and Type | Method and Description |
---|---|
cudnnDebug_t |
cudnnDebug_t.stream(CUstream_st setter) |
Modifier and Type | Field and Description |
---|---|
static CUstream_st |
cudart.CU_STREAM_LEGACY |
static CUstream_st |
cudart.CU_STREAM_PER_THREAD |
static CUstream_st |
cudart.cudaStreamLegacy |
static CUstream_st |
cudart.cudaStreamPerThread |
Modifier and Type | Method and Description |
---|---|
static CUstream_st |
cudart.CU_STREAM_LEGACY()
Legacy stream handle
Stream handle that can be passed as a CUstream to use an implicit stream
with legacy synchronization behavior.
|
static CUstream_st |
cudart.CU_STREAM_PER_THREAD()
Per-thread stream handle
Stream handle that can be passed as a CUstream to use an implicit stream
with per-thread synchronization behavior.
|
static CUstream_st |
cudart.cudaStreamLegacy()
Legacy stream handle
Stream handle that can be passed as a cudaStream_t to use an implicit stream
with legacy synchronization behavior.
|
static CUstream_st |
cudart.cudaStreamPerThread()
Per-thread stream handle
Stream handle that can be passed as a cudaStream_t to use an implicit stream
with per-thread synchronization behavior.
|
static CUstream_st |
nppc.nppGetStream()
Get the NPP CUDA stream.
|
Modifier and Type | Method and Description |
---|---|
static nvcompManagerBase |
nvcomp.create_manager(byte[] comp_buffer,
CUstream_st stream,
int device_id,
int checksum_policy) |
static nvcompManagerBase |
nvcomp.create_manager(ByteBuffer comp_buffer,
CUstream_st stream,
int device_id,
int checksum_policy) |
static nvcompManagerBase |
nvcomp.create_manager(BytePointer comp_buffer,
CUstream_st stream,
int device_id,
int checksum_policy)
\brief Construct a ManagerBase from a buffer
This synchronizes the stream
|
static int |
cublas.cublasGetMatrixAsync_64(long rows,
long cols,
long elemSize,
Pointer A,
long lda,
Pointer B,
long ldb,
CUstream_st stream) |
static int |
cublas.cublasGetMatrixAsync(int rows,
int cols,
int elemSize,
Pointer A,
int lda,
Pointer B,
int ldb,
CUstream_st stream) |
static int |
cublas.cublasGetStream_v2(cublasContext handle,
CUstream_st streamId) |
static int |
cublas.cublasGetVectorAsync_64(long n,
long elemSize,
Pointer devicePtr,
long incx,
Pointer hostPtr,
long incy,
CUstream_st stream) |
static int |
cublas.cublasGetVectorAsync(int n,
int elemSize,
Pointer devicePtr,
int incx,
Pointer hostPtr,
int incy,
CUstream_st stream) |
static int |
cublas.cublasLtMatmul(cublasLtContext lightHandle,
cublasLtMatmulDescOpaque_t computeDesc,
Pointer alpha,
Pointer A,
cublasLtMatrixLayoutOpaque_t Adesc,
Pointer B,
cublasLtMatrixLayoutOpaque_t Bdesc,
Pointer beta,
Pointer C,
cublasLtMatrixLayoutOpaque_t Cdesc,
Pointer D,
cublasLtMatrixLayoutOpaque_t Ddesc,
cublasLtMatmulAlgo_t algo,
Pointer workspace,
long workspaceSizeInBytes,
CUstream_st stream)
Execute matrix multiplication (D = alpha * op(A) * op(B) + beta * C).
|
static int |
cublas.cublasLtMatrixTransform(cublasLtContext lightHandle,
cublasLtMatrixTransformDescOpaque_t transformDesc,
Pointer alpha,
Pointer A,
cublasLtMatrixLayoutOpaque_t Adesc,
Pointer beta,
Pointer B,
cublasLtMatrixLayoutOpaque_t Bdesc,
Pointer C,
cublasLtMatrixLayoutOpaque_t Cdesc,
CUstream_st stream)
Matrix layout conversion helper (C = alpha * op(A) + beta * op(B))
Can be used to change memory order of data or to scale and shift the values.
|
static int |
cublas.cublasSetKernelStream(CUstream_st stream) |
static int |
cublas.cublasSetMatrixAsync_64(long rows,
long cols,
long elemSize,
Pointer A,
long lda,
Pointer B,
long ldb,
CUstream_st stream) |
static int |
cublas.cublasSetMatrixAsync(int rows,
int cols,
int elemSize,
Pointer A,
int lda,
Pointer B,
int ldb,
CUstream_st stream) |
static int |
cublas.cublasSetStream_v2(cublasContext handle,
CUstream_st streamId) |
static int |
cublas.cublasSetVectorAsync_64(long n,
long elemSize,
Pointer hostPtr,
long incx,
Pointer devicePtr,
long incy,
CUstream_st stream) |
static int |
cublas.cublasSetVectorAsync(int n,
int elemSize,
Pointer hostPtr,
int incx,
Pointer devicePtr,
int incy,
CUstream_st stream) |
static int |
cudart.cudaEventRecord(CUevent_st event,
CUstream_st stream)
\brief Records an event
Captures in \p event the contents of \p stream at the time of this call.
|
static int |
cudart.cudaEventRecordWithFlags(CUevent_st event,
CUstream_st stream,
int flags)
\brief Records an event
Captures in \p event the contents of \p stream at the time of this call.
|
static int |
cudart.cudaFreeAsync(Pointer devPtr,
CUstream_st hStream)
\brief Frees memory with stream ordered semantics
Inserts a free operation into \p hStream.
|
static int |
cudart.cudaGLMapBufferObjectAsync(Pointer devPtr,
int bufObj,
CUstream_st stream)
Deprecated.
|
static int |
cudart.cudaGLMapBufferObjectAsync(PointerPointer devPtr,
int bufObj,
CUstream_st stream)
Deprecated.
This function is deprecated as of CUDA 3.0.
Maps the buffer object of ID \p bufObj into the address space of
CUDA and returns in \p *devPtr the base pointer of the resulting
mapping. The buffer must have previously been registered by
calling ::cudaGLRegisterBufferObject(). While a buffer is mapped
by CUDA, any OpenGL operation which references the buffer will
result in undefined behavior. The OpenGL context used to create
the buffer, or another context from the same share group, must be
bound to the current thread when this is called.
Stream /p stream is synchronized with the current GL context.
|
static int |
cudart.cudaGLUnmapBufferObjectAsync(int bufObj,
CUstream_st stream)
Deprecated.
This function is deprecated as of CUDA 3.0.
Unmaps the buffer object of ID \p bufObj for access by CUDA. When
a buffer is unmapped, the base address returned by
::cudaGLMapBufferObject() is invalid and subsequent references to
the address result in undefined behavior. The OpenGL context used
to create the buffer, or another context from the same share group,
must be bound to the current thread when this is called.
Stream /p stream is synchronized with the current GL context.
|
static int |
cudart.cudaGraphicsMapResources(int count,
cudaGraphicsResource resources,
CUstream_st stream)
\brief Map graphics resources for access by CUDA
Maps the \p count graphics resources in \p resources for access by CUDA.
|
static int |
cudart.cudaGraphicsUnmapResources(int count,
cudaGraphicsResource resources,
CUstream_st stream)
\brief Unmap graphics resources.
|
static int |
cudart.cudaGraphLaunch(CUgraphExec_st graphExec,
CUstream_st stream)
\brief Launches an executable graph in a stream
Executes \p graphExec in \p stream.
|
static int |
cudart.cudaGraphUpload(CUgraphExec_st graphExec,
CUstream_st stream)
\brief Uploads an executable graph in a stream
Uploads \p hGraphExec to the device in \p hStream without executing it.
|
static int |
cudart.cudaLaunchCooperativeKernel(Pointer func,
dim3 gridDim,
dim3 blockDim,
Pointer args,
long sharedMem,
CUstream_st stream) |
static int |
cudart.cudaLaunchCooperativeKernel(Pointer func,
dim3 gridDim,
dim3 blockDim,
PointerPointer args,
long sharedMem,
CUstream_st stream)
\brief Launches a device function where thread blocks can cooperate and synchronize as they execute
The function invokes kernel \p func on \p gridDim (\p gridDim.x × \p gridDim.y
× \p gridDim.z) grid of blocks.
|
static int |
cudart.cudaLaunchHostFunc(CUstream_st stream,
cudaHostFn_t fn,
Pointer userData)
\brief Enqueues a host function call in a stream
Enqueues a host function to run in a stream.
|
static int |
cudart.cudaLaunchKernel(Pointer func,
dim3 gridDim,
dim3 blockDim,
Pointer args,
long sharedMem,
CUstream_st stream) |
static int |
cudart.cudaLaunchKernel(Pointer func,
dim3 gridDim,
dim3 blockDim,
PointerPointer args,
long sharedMem,
CUstream_st stream)
\brief Launches a device function
The function invokes kernel \p func on \p gridDim (\p gridDim.x × \p gridDim.y
× \p gridDim.z) grid of blocks.
|
static int |
cudart.cudaMallocAsync(Pointer devPtr,
long size,
CUstream_st hStream) |
static int |
cudart.cudaMallocAsync(PointerPointer devPtr,
long size,
CUstream_st hStream)
\brief Allocates memory with stream ordered semantics
Inserts an allocation operation into \p hStream.
|
static int |
cudart.cudaMallocFromPoolAsync(Pointer ptr,
long size,
CUmemPoolHandle_st memPool,
CUstream_st stream) |
static int |
cudart.cudaMallocFromPoolAsync(PointerPointer ptr,
long size,
CUmemPoolHandle_st memPool,
CUstream_st stream)
\brief Allocates memory from a specified pool with stream ordered semantics.
|
static int |
cudart.cudaMemcpy2DAsync(Pointer dst,
long dpitch,
Pointer src,
long spitch,
long width,
long height,
int kind,
CUstream_st stream)
\brief Copies data between host and device
Copies a matrix (\p height rows of \p width bytes each) from the memory
area pointed to by \p src to the memory area pointed to by \p dst, where
\p kind specifies the direction of the copy, and must be one of
::cudaMemcpyHostToHost, ::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault.
|
static int |
cudart.cudaMemcpy2DFromArrayAsync(Pointer dst,
long dpitch,
cudaArray src,
long wOffset,
long hOffset,
long width,
long height,
int kind,
CUstream_st stream)
\brief Copies data between host and device
Copies a matrix (\p height rows of \p width bytes each) from the CUDA
array \p src starting at \p hOffset rows and \p wOffset bytes from the
upper left corner to the memory area pointed to by \p dst,
where \p kind specifies the direction of the copy, and must be one of
::cudaMemcpyHostToHost, ::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault.
|
static int |
cudart.cudaMemcpy2DToArrayAsync(cudaArray dst,
long wOffset,
long hOffset,
Pointer src,
long spitch,
long width,
long height,
int kind,
CUstream_st stream)
\brief Copies data between host and device
Copies a matrix (\p height rows of \p width bytes each) from the memory
area pointed to by \p src to the CUDA array \p dst starting at \p hOffset
rows and \p wOffset bytes from the upper left corner, where \p kind specifies
the direction of the copy, and must be one of ::cudaMemcpyHostToHost,
::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault.
|
static int |
cudart.cudaMemcpy3DAsync(cudaMemcpy3DParms p,
CUstream_st stream)
\brief Copies data between 3D objects
|
static int |
cudart.cudaMemcpy3DPeerAsync(cudaMemcpy3DPeerParms p,
CUstream_st stream)
\brief Copies memory between devices asynchronously.
|
static int |
cudart.cudaMemcpyAsync(Pointer dst,
Pointer src,
long count,
int kind,
CUstream_st stream)
\brief Copies data between host and device
Copies \p count bytes from the memory area pointed to by \p src to the
memory area pointed to by \p dst, where \p kind specifies the
direction of the copy, and must be one of ::cudaMemcpyHostToHost,
::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault.
|
static int |
cudart.cudaMemcpyFromArrayAsync(Pointer dst,
cudaArray src,
long wOffset,
long hOffset,
long count,
int kind,
CUstream_st stream)
Deprecated.
Copies \p count bytes from the CUDA array \p src starting at \p hOffset rows
and \p wOffset bytes from the upper left corner to the memory area pointed to
by \p dst, where \p kind specifies the direction of the copy, and must be one of
::cudaMemcpyHostToHost, ::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault. Passing
::cudaMemcpyDefault is recommended, in which case the type of transfer is
inferred from the pointer values. However, ::cudaMemcpyDefault is only
allowed on systems that support unified virtual addressing.
::cudaMemcpyFromArrayAsync() is asynchronous with respect to the host, so
the call may return before the copy is complete. The copy can optionally
be associated to a stream by passing a non-zero \p stream argument. If \p
kind is ::cudaMemcpyHostToDevice or ::cudaMemcpyDeviceToHost and \p stream
is non-zero, the copy may overlap with operations in other streams.
|
static int |
cudart.cudaMemcpyFromSymbolAsync(Pointer dst,
Pointer symbol,
long count,
long offset,
int kind,
CUstream_st stream)
\brief Copies data from the given symbol on the device
Copies \p count bytes from the memory area pointed to by \p offset bytes
from the start of symbol \p symbol to the memory area pointed to by \p dst.
|
static int |
cudart.cudaMemcpyPeerAsync(Pointer dst,
int dstDevice,
Pointer src,
int srcDevice,
long count,
CUstream_st stream)
\brief Copies memory between two devices asynchronously.
|
static int |
cudart.cudaMemcpyToArrayAsync(cudaArray dst,
long wOffset,
long hOffset,
Pointer src,
long count,
int kind,
CUstream_st stream)
Deprecated.
Copies \p count bytes from the memory area pointed to by \p src to the
CUDA array \p dst starting at \p hOffset rows and \p wOffset bytes from
the upper left corner, where \p kind specifies the
direction of the copy, and must be one of ::cudaMemcpyHostToHost,
::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault. Passing
::cudaMemcpyDefault is recommended, in which case the type of transfer is
inferred from the pointer values. However, ::cudaMemcpyDefault is only
allowed on systems that support unified virtual addressing.
::cudaMemcpyToArrayAsync() is asynchronous with respect to the host, so
the call may return before the copy is complete. The copy can optionally
be associated to a stream by passing a non-zero \p stream argument. If \p
kind is ::cudaMemcpyHostToDevice or ::cudaMemcpyDeviceToHost and \p stream
is non-zero, the copy may overlap with operations in other streams.
|
static int |
cudart.cudaMemcpyToSymbolAsync(Pointer symbol,
Pointer src,
long count,
long offset,
int kind,
CUstream_st stream)
\brief Copies data to the given symbol on the device
Copies \p count bytes from the memory area pointed to by \p src
to the memory area pointed to by \p offset bytes from the start of symbol
\p symbol.
|
static int |
cudart.cudaMemPrefetchAsync_v2(Pointer devPtr,
long count,
cudaMemLocation location,
int flags,
CUstream_st stream) |
static int |
cudart.cudaMemPrefetchAsync(Pointer devPtr,
long count,
int dstDevice,
CUstream_st stream)
\brief Prefetches memory to the specified destination device
Prefetches memory to the specified destination device.
|
static int |
cudart.cudaMemset2DAsync(Pointer devPtr,
long pitch,
int value,
long width,
long height,
CUstream_st stream)
\brief Initializes or sets device memory to a value
Sets to the specified value \p value a matrix (\p height rows of \p width
bytes each) pointed to by \p dstPtr.
|
static int |
cudart.cudaMemset3DAsync(cudaPitchedPtr pitchedDevPtr,
int value,
cudaExtent extent,
CUstream_st stream)
\brief Initializes or sets device memory to a value
Initializes each element of a 3D array to the specified value \p value.
|
static int |
cudart.cudaMemsetAsync(Pointer devPtr,
int value,
long count,
CUstream_st stream)
\brief Initializes or sets device memory to a value
Fills the first \p count bytes of the memory area pointed to by \p devPtr
with the constant byte value \p value.
|
static int |
cudart.cudaSignalExternalSemaphoresAsync(CUexternalSemaphore_st extSemArray,
cudaExternalSemaphoreSignalParams paramsArray,
int numExtSems,
CUstream_st stream)
\brief Signals a set of external semaphore objects
Enqueues a signal operation on a set of externally allocated
semaphore object in the specified stream.
|
static int |
cudart.cudaStreamAddCallback(CUstream_st stream,
cudaStreamCallback_t callback,
Pointer userData,
int flags)
\brief Add a callback to a compute stream
\note This function is slated for eventual deprecation and removal.
|
static int |
cudart.cudaStreamAttachMemAsync(CUstream_st stream,
Pointer devPtr) |
static int |
cudart.cudaStreamAttachMemAsync(CUstream_st stream,
Pointer devPtr,
int flags) |
static int |
cudart.cudaStreamAttachMemAsync(CUstream_st stream,
Pointer devPtr,
long length,
int flags)
\brief Attach memory to a stream asynchronously
Enqueues an operation in \p stream to specify stream association of
\p length bytes of memory starting from \p devPtr.
|
static int |
cudart.cudaStreamBeginCapture(CUstream_st stream,
int mode)
\brief Begins graph capture on a stream
Begin graph capture on \p stream.
|
static int |
cudart.cudaStreamBeginCaptureToGraph(CUstream_st stream,
CUgraph_st graph,
CUgraphNode_st dependencies,
cudaGraphEdgeData dependencyData,
long numDependencies,
int mode)
\brief Begins graph capture on a stream to an existing graph
Begin graph capture on \p stream.
|
static int |
cudart.cudaStreamCopyAttributes(CUstream_st dst,
CUstream_st src)
\brief Copies attributes from source stream to destination stream.
|
static int |
cudart.cudaStreamCreate(CUstream_st pStream)
\brief Create an asynchronous stream
Creates a new asynchronous stream.
|
static int |
cudart.cudaStreamCreateWithFlags(CUstream_st pStream,
int flags)
\brief Create an asynchronous stream
Creates a new asynchronous stream.
|
static int |
cudart.cudaStreamCreateWithPriority(CUstream_st pStream,
int flags,
int priority)
\brief Create an asynchronous stream with the specified priority
Creates a stream with the specified priority and returns a handle in \p pStream.
|
static int |
cudart.cudaStreamDestroy(CUstream_st stream)
\brief Destroys and cleans up an asynchronous stream
Destroys and cleans up the asynchronous stream specified by \p stream.
|
static int |
cudart.cudaStreamEndCapture(CUstream_st stream,
CUgraph_st pGraph)
\brief Ends capture on a stream, returning the captured graph
End capture on \p stream, returning the captured graph via \p pGraph.
|
static int |
cudart.cudaStreamGetAttribute(CUstream_st hStream,
int attr,
cudaLaunchAttributeValue value_out)
\brief Queries stream attribute.
|
static int |
cudart.cudaStreamGetCaptureInfo_v3(CUstream_st stream,
int[] captureStatus_out) |
static int |
cudart.cudaStreamGetCaptureInfo_v3(CUstream_st stream,
int[] captureStatus_out,
long[] id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
cudaGraphEdgeData edgeData_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cudaStreamGetCaptureInfo_v3(CUstream_st stream,
IntBuffer captureStatus_out) |
static int |
cudart.cudaStreamGetCaptureInfo_v3(CUstream_st stream,
IntBuffer captureStatus_out,
LongBuffer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
cudaGraphEdgeData edgeData_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cudaStreamGetCaptureInfo_v3(CUstream_st stream,
IntPointer captureStatus_out) |
static int |
cudart.cudaStreamGetCaptureInfo_v3(CUstream_st stream,
IntPointer captureStatus_out,
LongPointer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
cudaGraphEdgeData edgeData_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cudaStreamGetCaptureInfo_v3(CUstream_st stream,
IntPointer captureStatus_out,
LongPointer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
PointerPointer edgeData_out,
SizeTPointer numDependencies_out)
\brief Query a stream's capture state (12.3+)
Query stream state related to stream capture.
|
static int |
cudart.cudaStreamGetCaptureInfo(CUstream_st stream,
int[] captureStatus_out) |
static int |
cudart.cudaStreamGetCaptureInfo(CUstream_st stream,
int[] captureStatus_out,
long[] id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cudaStreamGetCaptureInfo(CUstream_st stream,
IntBuffer captureStatus_out) |
static int |
cudart.cudaStreamGetCaptureInfo(CUstream_st stream,
IntBuffer captureStatus_out,
LongBuffer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cudaStreamGetCaptureInfo(CUstream_st stream,
IntPointer captureStatus_out) |
static int |
cudart.cudaStreamGetCaptureInfo(CUstream_st stream,
IntPointer captureStatus_out,
LongPointer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
SizeTPointer numDependencies_out)
\brief Query a stream's capture state
Query stream state related to stream capture.
|
static int |
cudart.cudaStreamGetFlags(CUstream_st hStream,
int[] flags) |
static int |
cudart.cudaStreamGetFlags(CUstream_st hStream,
IntBuffer flags) |
static int |
cudart.cudaStreamGetFlags(CUstream_st hStream,
IntPointer flags)
\brief Query the flags of a stream
Query the flags of a stream.
|
static int |
cudart.cudaStreamGetId(CUstream_st hStream,
long[] streamId) |
static int |
cudart.cudaStreamGetId(CUstream_st hStream,
LongBuffer streamId) |
static int |
cudart.cudaStreamGetId(CUstream_st hStream,
LongPointer streamId)
\brief Query the Id of a stream
Query the Id of a stream.
|
static int |
cudart.cudaStreamGetPriority(CUstream_st hStream,
int[] priority) |
static int |
cudart.cudaStreamGetPriority(CUstream_st hStream,
IntBuffer priority) |
static int |
cudart.cudaStreamGetPriority(CUstream_st hStream,
IntPointer priority)
\brief Query the priority of a stream
Query the priority of a stream.
|
static int |
cudart.cudaStreamIsCapturing(CUstream_st stream,
int[] pCaptureStatus) |
static int |
cudart.cudaStreamIsCapturing(CUstream_st stream,
IntBuffer pCaptureStatus) |
static int |
cudart.cudaStreamIsCapturing(CUstream_st stream,
IntPointer pCaptureStatus)
\brief Returns a stream's capture status
Return the capture status of \p stream via \p pCaptureStatus.
|
static int |
cudart.cudaStreamQuery(CUstream_st stream)
\brief Queries an asynchronous stream for completion status
Returns ::cudaSuccess if all operations in \p stream have
completed, or ::cudaErrorNotReady if not.
|
static int |
cudart.cudaStreamSetAttribute(CUstream_st hStream,
int attr,
cudaLaunchAttributeValue value)
\brief Sets stream attribute.
|
static int |
cudart.cudaStreamSynchronize(CUstream_st stream)
\brief Waits for stream tasks to complete
Blocks until \p stream has completed all operations.
|
static int |
cudart.cudaStreamUpdateCaptureDependencies_v2(CUstream_st stream,
CUgraphNode_st dependencies,
cudaGraphEdgeData dependencyData,
long numDependencies) |
static int |
cudart.cudaStreamUpdateCaptureDependencies_v2(CUstream_st stream,
CUgraphNode_st dependencies,
cudaGraphEdgeData dependencyData,
long numDependencies,
int flags)
\brief Update the set of dependencies in a capturing stream (12.3+)
Modifies the dependency set of a capturing stream.
|
static int |
cudart.cudaStreamUpdateCaptureDependencies(CUstream_st stream,
CUgraphNode_st dependencies,
long numDependencies) |
static int |
cudart.cudaStreamUpdateCaptureDependencies(CUstream_st stream,
CUgraphNode_st dependencies,
long numDependencies,
int flags)
\brief Update the set of dependencies in a capturing stream (11.3+)
Modifies the dependency set of a capturing stream.
|
static int |
cudart.cudaStreamWaitEvent(CUstream_st stream,
CUevent_st event) |
static int |
cudart.cudaStreamWaitEvent(CUstream_st stream,
CUevent_st event,
int flags)
\brief Make a compute stream wait on an event
Makes all future work submitted to \p stream wait for all work captured in
\p event.
|
static int |
cudart.cudaWaitExternalSemaphoresAsync(CUexternalSemaphore_st extSemArray,
cudaExternalSemaphoreWaitParams paramsArray,
int numExtSems,
CUstream_st stream)
\brief Waits on a set of external semaphore objects
Enqueues a wait operation on a set of externally allocated
semaphore object in the specified stream.
|
static int |
cudnn.cudnnGetStream(cudnnContext handle,
CUstream_st streamId) |
static int |
cudnn.cudnnSetStream(cudnnContext handle,
CUstream_st streamId) |
static int |
cudart.cuEventRecord(CUevent_st hEvent,
CUstream_st hStream)
\brief Records an event
Captures in \p hEvent the contents of \p hStream at the time of this call.
|
static int |
cudart.cuEventRecordWithFlags(CUevent_st hEvent,
CUstream_st hStream,
int flags)
\brief Records an event
Captures in \p hEvent the contents of \p hStream at the time of this call.
|
static int |
cufft.cufftSetStream(int plan,
CUstream_st stream) |
static int |
cudart.cuGLMapBufferObjectAsync(long[] dptr,
SizeTPointer size,
int buffer,
CUstream_st hStream)
Deprecated.
|
static int |
cudart.cuGLMapBufferObjectAsync(LongBuffer dptr,
SizeTPointer size,
int buffer,
CUstream_st hStream)
Deprecated.
|
static int |
cudart.cuGLMapBufferObjectAsync(LongPointer dptr,
SizeTPointer size,
int buffer,
CUstream_st hStream)
Deprecated.
This function is deprecated as of Cuda 3.0.
Maps the buffer object specified by \p buffer into the address space of the
current CUDA context and returns in \p *dptr and \p *size the base pointer
and size of the resulting mapping.
There must be a valid OpenGL context bound to the current thread
when this function is called. This must be the same context, or a
member of the same shareGroup, as the context that was bound when
the buffer was registered.
Stream \p hStream in the current CUDA context is synchronized with
the current GL context.
|
static int |
cudart.cuGLUnmapBufferObjectAsync(int buffer,
CUstream_st hStream)
Deprecated.
This function is deprecated as of Cuda 3.0.
Unmaps the buffer object specified by \p buffer for access by CUDA.
There must be a valid OpenGL context bound to the current thread
when this function is called. This must be the same context, or a
member of the same shareGroup, as the context that was bound when
the buffer was registered.
Stream \p hStream in the current CUDA context is synchronized with
the current GL context.
|
static int |
cudart.cuGraphicsMapResources(int count,
CUgraphicsResource_st resources,
CUstream_st hStream)
\brief Map graphics resources for access by CUDA
Maps the \p count graphics resources in \p resources for access by CUDA.
|
static int |
cudart.cuGraphicsUnmapResources(int count,
CUgraphicsResource_st resources,
CUstream_st hStream)
\brief Unmap graphics resources.
|
static int |
cudart.cuGraphLaunch(CUgraphExec_st hGraphExec,
CUstream_st hStream)
\brief Launches an executable graph in a stream
Executes \p hGraphExec in \p hStream.
|
static int |
cudart.cuGraphUpload(CUgraphExec_st hGraphExec,
CUstream_st hStream)
\brief Uploads an executable graph in a stream
Uploads \p hGraphExec to the device in \p hStream without executing it.
|
static int |
cudart.cuLaunchCooperativeKernel(CUfunc_st f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream_st hStream,
Pointer kernelParams) |
static int |
cudart.cuLaunchCooperativeKernel(CUfunc_st f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream_st hStream,
PointerPointer kernelParams)
\brief Launches a CUDA function ::CUfunction or a CUDA kernel ::CUkernel where thread blocks
can cooperate and synchronize as they execute
Invokes the function ::CUfunction or the kernel ::CUkernel \p f on a \p gridDimX x \p gridDimY x \p gridDimZ
grid of blocks.
|
static int |
cudart.cuLaunchGridAsync(CUfunc_st f,
int grid_width,
int grid_height,
CUstream_st hStream)
Deprecated.
Invokes the kernel \p f on a \p grid_width x \p grid_height grid of
blocks. Each block contains the number of threads specified by a previous
call to ::cuFuncSetBlockShape().
The block shape, dynamic shared memory size, and parameter information
must be set using
::cuFuncSetBlockShape(),
::cuFuncSetSharedSize(),
::cuParamSetSize(),
::cuParamSeti(),
::cuParamSetf(), and
::cuParamSetv()
prior to calling this function.
Launching a function via ::cuLaunchKernel() invalidates the function's
block shape, dynamic shared memory size, and parameter information. After
launching via cuLaunchKernel, this state must be re-initialized prior to
calling this function. Failure to do so results in undefined behavior.
|
static int |
cudart.cuLaunchHostFunc(CUstream_st hStream,
CUhostFn fn,
Pointer userData)
\brief Enqueues a host function call in a stream
Enqueues a host function to run in a stream.
|
static int |
cudart.cuLaunchKernel(CUfunc_st f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream_st hStream,
Pointer kernelParams,
Pointer extra) |
static int |
cudart.cuLaunchKernel(CUfunc_st f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream_st hStream,
PointerPointer kernelParams,
PointerPointer extra)
\brief Launches a CUDA function ::CUfunction or a CUDA kernel ::CUkernel
Invokes the function ::CUfunction or the kernel ::CUkernel \p f
on a \p gridDimX x \p gridDimY x \p gridDimZ grid of blocks.
|
static int |
cudart.cuMemAllocAsync(long[] dptr,
long bytesize,
CUstream_st hStream) |
static int |
cudart.cuMemAllocAsync(LongBuffer dptr,
long bytesize,
CUstream_st hStream) |
static int |
cudart.cuMemAllocAsync(LongPointer dptr,
long bytesize,
CUstream_st hStream)
\brief Allocates memory with stream ordered semantics
Inserts an allocation operation into \p hStream.
|
static int |
cudart.cuMemAllocFromPoolAsync(long[] dptr,
long bytesize,
CUmemPoolHandle_st pool,
CUstream_st hStream) |
static int |
cudart.cuMemAllocFromPoolAsync(LongBuffer dptr,
long bytesize,
CUmemPoolHandle_st pool,
CUstream_st hStream) |
static int |
cudart.cuMemAllocFromPoolAsync(LongPointer dptr,
long bytesize,
CUmemPoolHandle_st pool,
CUstream_st hStream)
\brief Allocates memory from a specified pool with stream ordered semantics.
|
static int |
cudart.cuMemcpy2DAsync(CUDA_MEMCPY2D_v2 pCopy,
CUstream_st hStream)
\brief Copies memory for 2D arrays
Perform a 2D memory copy according to the parameters specified in \p pCopy.
|
static int |
cudart.cuMemcpy3DAsync(CUDA_MEMCPY3D_v2 pCopy,
CUstream_st hStream)
\brief Copies memory for 3D arrays
Perform a 3D memory copy according to the parameters specified in
\p pCopy.
|
static int |
cudart.cuMemcpy3DPeerAsync(CUDA_MEMCPY3D_PEER_v1 pCopy,
CUstream_st hStream)
\brief Copies memory between contexts asynchronously.
|
static int |
cudart.cuMemcpyAsync(long dst,
long src,
long ByteCount,
CUstream_st hStream)
\brief Copies memory asynchronously
Copies data between two pointers.
|
static int |
cudart.cuMemcpyAtoHAsync(Pointer dstHost,
CUarray_st srcArray,
long srcOffset,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Array to Host
Copies from one 1D CUDA array to host memory.
|
static int |
cudart.cuMemcpyDtoDAsync(long dstDevice,
long srcDevice,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Device to Device
Copies from device memory to device memory.
|
static int |
cudart.cuMemcpyDtoHAsync(Pointer dstHost,
long srcDevice,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Device to Host
Copies from device to host memory.
|
static int |
cudart.cuMemcpyHtoAAsync(CUarray_st dstArray,
long dstOffset,
Pointer srcHost,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Host to Array
Copies from host memory to a 1D CUDA array.
|
static int |
cudart.cuMemcpyHtoDAsync(long dstDevice,
Pointer srcHost,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Host to Device
Copies from host memory to device memory.
|
static int |
cudart.cuMemcpyPeerAsync(long dstDevice,
CUctx_st dstContext,
long srcDevice,
CUctx_st srcContext,
long ByteCount,
CUstream_st hStream)
\brief Copies device memory between two contexts asynchronously.
|
static int |
cudart.cuMemFreeAsync(long dptr,
CUstream_st hStream)
\brief Frees memory with stream ordered semantics
Inserts a free operation into \p hStream.
|
static int |
cudart.cuMemMapArrayAsync(CUarrayMapInfo_v1 mapInfoList,
int count,
CUstream_st hStream)
\brief Maps or unmaps subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays
Performs map or unmap operations on subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays.
|
static int |
cudart.cuMemPrefetchAsync_v2(long devPtr,
long count,
CUmemLocation_v1 location,
int flags,
CUstream_st hStream)
\brief Prefetches memory to the specified destination location
Prefetches memory to the specified destination location.
|
static int |
cudart.cuMemPrefetchAsync(long devPtr,
long count,
int dstDevice,
CUstream_st hStream)
\brief Prefetches memory to the specified destination device
Note there is a later version of this API, ::cuMemPrefetchAsync_v2.
|
static int |
cudart.cuMemsetD16Async(long dstDevice,
short us,
long N,
CUstream_st hStream)
\brief Sets device memory
Sets the memory range of \p N 16-bit values to the specified value
\p us.
|
static int |
cudart.cuMemsetD2D16Async(long dstDevice,
long dstPitch,
short us,
long Width,
long Height,
CUstream_st hStream)
\brief Sets device memory
Sets the 2D memory range of \p Width 16-bit values to the specified value
\p us.
|
static int |
cudart.cuMemsetD2D32Async(long dstDevice,
long dstPitch,
int ui,
long Width,
long Height,
CUstream_st hStream)
\brief Sets device memory
Sets the 2D memory range of \p Width 32-bit values to the specified value
\p ui.
|
static int |
cudart.cuMemsetD2D8Async(long dstDevice,
long dstPitch,
byte uc,
long Width,
long Height,
CUstream_st hStream)
\brief Sets device memory
Sets the 2D memory range of \p Width 8-bit values to the specified value
\p uc.
|
static int |
cudart.cuMemsetD32Async(long dstDevice,
int ui,
long N,
CUstream_st hStream)
\brief Sets device memory
Sets the memory range of \p N 32-bit values to the specified value
\p ui.
|
static int |
cudart.cuMemsetD8Async(long dstDevice,
byte uc,
long N,
CUstream_st hStream)
\brief Sets device memory
Sets the memory range of \p N 8-bit values to the specified value
\p uc.
|
static int |
curand.curandSetStream(curandGenerator_st generator,
CUstream_st stream)
\brief Set the current stream for CURAND kernel launches.
|
static int |
cudart.cuSignalExternalSemaphoresAsync(CUextSemaphore_st extSemArray,
CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1 paramsArray,
int numExtSems,
CUstream_st stream)
\brief Signals a set of external semaphore objects
Enqueues a signal operation on a set of externally allocated
semaphore object in the specified stream.
|
static int |
cusolver.cusolverDnGetStream(cusolverDnContext handle,
CUstream_st streamId) |
static int |
cusolver.cusolverDnSetStream(cusolverDnContext handle,
CUstream_st streamId) |
static int |
cusolver.cusolverSpGetStream(cusolverSpContext handle,
CUstream_st streamId) |
static int |
cusolver.cusolverSpSetStream(cusolverSpContext handle,
CUstream_st streamId) |
static int |
cusparse.cusparseGetStream(cusparseContext handle,
CUstream_st streamId) |
static int |
cusparse.cusparseSetStream(cusparseContext handle,
CUstream_st streamId) |
static int |
cudart.cuStreamAddCallback(CUstream_st hStream,
CUstreamCallback callback,
Pointer userData,
int flags)
\brief Add a callback to a compute stream
\note This function is slated for eventual deprecation and removal.
|
static int |
cudart.cuStreamAttachMemAsync(CUstream_st hStream,
long dptr,
long length,
int flags)
\brief Attach memory to a stream asynchronously
Enqueues an operation in \p hStream to specify stream association of
\p length bytes of memory starting from \p dptr.
|
static int |
cudart.cuStreamBatchMemOp(CUstream_st stream,
int count,
CUstreamBatchMemOpParams_v1 paramArray,
int flags)
\brief Batch operations to synchronize the stream via memory operations
This is a batch version of ::cuStreamWaitValue32() and ::cuStreamWriteValue32().
|
static int |
cudart.cuStreamBeginCapture(CUstream_st hStream,
int mode)
\brief Begins graph capture on a stream
Begin graph capture on \p hStream.
|
static int |
cudart.cuStreamBeginCaptureToGraph(CUstream_st hStream,
CUgraph_st hGraph,
CUgraphNode_st dependencies,
CUgraphEdgeData dependencyData,
long numDependencies,
int mode)
\brief Begins graph capture on a stream to an existing graph
Begin graph capture on \p hStream, placing new nodes into an existing graph.
|
static int |
cudart.cuStreamCopyAttributes(CUstream_st dst,
CUstream_st src)
\brief Copies attributes from source stream to destination stream.
|
static int |
cudart.cuStreamCreate(CUstream_st phStream,
int Flags)
\brief Create a stream
Creates a stream and returns a handle in \p phStream.
|
static int |
cudart.cuStreamCreateWithPriority(CUstream_st phStream,
int flags,
int priority)
\brief Create a stream with the given priority
Creates a stream with the specified priority and returns a handle in \p phStream.
|
static int |
cudart.cuStreamDestroy(CUstream_st hStream)
\brief Destroys a stream
Destroys the stream specified by \p hStream.
|
static int |
cudart.cuStreamEndCapture(CUstream_st hStream,
CUgraph_st phGraph)
\brief Ends capture on a stream, returning the captured graph
End capture on \p hStream, returning the captured graph via \p phGraph.
|
static int |
cudart.cuStreamGetAttribute(CUstream_st hStream,
int attr,
CUlaunchAttributeValue value_out)
\brief Queries stream attribute.
|
static int |
cudart.cuStreamGetCaptureInfo_v3(CUstream_st hStream,
int[] captureStatus_out,
long[] id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
CUgraphEdgeData edgeData_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cuStreamGetCaptureInfo_v3(CUstream_st hStream,
IntBuffer captureStatus_out,
LongBuffer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
CUgraphEdgeData edgeData_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cuStreamGetCaptureInfo_v3(CUstream_st hStream,
IntPointer captureStatus_out,
LongPointer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
CUgraphEdgeData edgeData_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cuStreamGetCaptureInfo_v3(CUstream_st hStream,
IntPointer captureStatus_out,
LongPointer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
PointerPointer edgeData_out,
SizeTPointer numDependencies_out)
\brief Query a stream's capture state (12.3+)
Query stream state related to stream capture.
|
static int |
cudart.cuStreamGetCaptureInfo(CUstream_st hStream,
int[] captureStatus_out,
long[] id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cuStreamGetCaptureInfo(CUstream_st hStream,
IntBuffer captureStatus_out,
LongBuffer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
SizeTPointer numDependencies_out) |
static int |
cudart.cuStreamGetCaptureInfo(CUstream_st hStream,
IntPointer captureStatus_out,
LongPointer id_out,
CUgraph_st graph_out,
PointerPointer dependencies_out,
SizeTPointer numDependencies_out)
\brief Query a stream's capture state
Query stream state related to stream capture.
|
static int |
cudart.cuStreamGetCtx(CUstream_st hStream,
CUctx_st pctx)
\brief Query the context associated with a stream
Returns the CUDA context that the stream is associated with.
|
static int |
cudart.cuStreamGetFlags(CUstream_st hStream,
int[] flags) |
static int |
cudart.cuStreamGetFlags(CUstream_st hStream,
IntBuffer flags) |
static int |
cudart.cuStreamGetFlags(CUstream_st hStream,
IntPointer flags)
\brief Query the flags of a given stream
Query the flags of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority
and return the flags in \p flags.
|
static int |
cudart.cuStreamGetId(CUstream_st hStream,
long[] streamId) |
static int |
cudart.cuStreamGetId(CUstream_st hStream,
LongBuffer streamId) |
static int |
cudart.cuStreamGetId(CUstream_st hStream,
LongPointer streamId)
\brief Returns the unique Id associated with the stream handle supplied
Returns in \p streamId the unique Id which is associated with the given stream handle.
|
static int |
cudart.cuStreamGetPriority(CUstream_st hStream,
int[] priority) |
static int |
cudart.cuStreamGetPriority(CUstream_st hStream,
IntBuffer priority) |
static int |
cudart.cuStreamGetPriority(CUstream_st hStream,
IntPointer priority)
\brief Query the priority of a given stream
Query the priority of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority
and return the priority in \p priority.
|
static int |
cudart.cuStreamIsCapturing(CUstream_st hStream,
int[] captureStatus) |
static int |
cudart.cuStreamIsCapturing(CUstream_st hStream,
IntBuffer captureStatus) |
static int |
cudart.cuStreamIsCapturing(CUstream_st hStream,
IntPointer captureStatus)
\brief Returns a stream's capture status
Return the capture status of \p hStream via \p captureStatus.
|
static int |
cudart.cuStreamQuery(CUstream_st hStream)
\brief Determine status of a compute stream
Returns ::CUDA_SUCCESS if all operations in the stream specified by
\p hStream have completed, or ::CUDA_ERROR_NOT_READY if not.
|
static int |
cudart.cuStreamSetAttribute(CUstream_st hStream,
int attr,
CUlaunchAttributeValue value)
\brief Sets stream attribute.
|
static int |
cudart.cuStreamSynchronize(CUstream_st hStream)
\brief Wait until a stream's tasks are completed
Waits until the device has completed all operations in the stream specified
by \p hStream.
|
static int |
cudart.cuStreamUpdateCaptureDependencies_v2(CUstream_st hStream,
CUgraphNode_st dependencies,
CUgraphEdgeData dependencyData,
long numDependencies,
int flags)
\brief Update the set of dependencies in a capturing stream (12.3+)
Modifies the dependency set of a capturing stream.
|
static int |
cudart.cuStreamUpdateCaptureDependencies(CUstream_st hStream,
CUgraphNode_st dependencies,
long numDependencies,
int flags)
\brief Update the set of dependencies in a capturing stream (11.3+)
Modifies the dependency set of a capturing stream.
|
static int |
cudart.cuStreamWaitEvent(CUstream_st hStream,
CUevent_st hEvent,
int Flags)
\brief Make a compute stream wait on an event
Makes all future work submitted to \p hStream wait for all work captured in
\p hEvent.
|
static int |
cudart.cuStreamWaitValue32(CUstream_st stream,
long addr,
int value,
int flags)
\brief Wait on a memory location
Enqueues a synchronization of the stream on the given memory location.
|
static int |
cudart.cuStreamWaitValue64(CUstream_st stream,
long addr,
long value,
int flags)
\brief Wait on a memory location
Enqueues a synchronization of the stream on the given memory location.
|
static int |
cudart.cuStreamWriteValue32(CUstream_st stream,
long addr,
int value,
int flags)
\brief Write a value to memory
Write a value to memory.
|
static int |
cudart.cuStreamWriteValue64(CUstream_st stream,
long addr,
long value,
int flags)
\brief Write a value to memory
Write a value to memory.
|
static int |
cudart.cuWaitExternalSemaphoresAsync(CUextSemaphore_st extSemArray,
CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1 paramsArray,
int numExtSems,
CUstream_st stream)
\brief Waits on a set of external semaphore objects
Enqueues a wait operation on a set of externally allocated
semaphore object in the specified stream.
|
static int |
nccl.ncclAllGather(Pointer sendbuff,
Pointer recvbuff,
long sendcount,
int datatype,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclAllReduce(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int op,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclBcast(Pointer buff,
long count,
int datatype,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclBroadcast(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclRecv(Pointer recvbuff,
long count,
int datatype,
int peer,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclReduce(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int op,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclReduceScatter(Pointer sendbuff,
Pointer recvbuff,
long recvcount,
int datatype,
int op,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclSend(Pointer sendbuff,
long count,
int datatype,
int peer,
ncclComm comm,
CUstream_st stream) |
static int |
nppc.nppSetStream(CUstream_st hStream)
Set the NPP CUDA stream.
|
static int |
nvcomp.nvcompBatchedANSCompressAsync(PointerPointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedANSOpts_t format_opts,
CUstream_st stream)
\brief Perform compression.
|
static int |
nvcomp.nvcompBatchedANSCompressAsync(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedANSOpts_t format_opts,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedANSDecompressAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream)
\brief Perform decompression.
|
static int |
nvcomp.nvcompBatchedANSDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
int[] device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedANSDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntBuffer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedANSDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedANSGetDecompressSizeAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream)
\brief Compute uncompressed sizes.
|
static int |
nvcomp.nvcompBatchedANSGetDecompressSizeAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedBitcompCompressAsync(PointerPointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedBitcompFormatOpts format_opts,
CUstream_st stream)
\brief Perform batched asynchronous compression.
|
static int |
nvcomp.nvcompBatchedBitcompCompressAsync(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedBitcompFormatOpts format_opts,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedBitcompDecompressAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream)
\brief Perform batched asynchronous decompression.
|
static int |
nvcomp.nvcompBatchedBitcompDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
int[] device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedBitcompDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntBuffer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedBitcompDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedBitcompGetDecompressSizeAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream)
\brief Asynchronously get the number of bytes of the uncompressed data in
every partitions.
|
static int |
nvcomp.nvcompBatchedBitcompGetDecompressSizeAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedCascadedCompressAsync(PointerPointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedCascadedOpts_t format_opts,
CUstream_st stream)
\brief Perform batched asynchronous compression.
|
static int |
nvcomp.nvcompBatchedCascadedCompressAsync(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedCascadedOpts_t format_opts,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedCascadedDecompressAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream)
\brief Perform batched asynchronous decompression.
|
static int |
nvcomp.nvcompBatchedCascadedDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
int[] device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedCascadedDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntBuffer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedCascadedDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedCascadedGetDecompressSizeAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream)
\brief Asynchronously get the number of bytes of the uncompressed data in
every partitions.
|
static int |
nvcomp.nvcompBatchedCascadedGetDecompressSizeAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedCRC32Async(PointerPointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long batch_size,
IntPointer device_CRC32_ptrs,
CUstream_st stream)
\brief Perform CRC32 checksum calculation asynchronously.
|
static int |
nvcomp.nvcompBatchedCRC32Async(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long batch_size,
int[] device_CRC32_ptrs,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedCRC32Async(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long batch_size,
IntBuffer device_CRC32_ptrs,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedCRC32Async(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long batch_size,
IntPointer device_CRC32_ptrs,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedDeflateCompressAsync(PointerPointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedDeflateOpts_t format_opts,
CUstream_st stream)
\brief Perform compression asynchronously.
|
static int |
nvcomp.nvcompBatchedDeflateCompressAsync(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedDeflateOpts_t format_opts,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedDeflateDecompressAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream)
\brief Perform decompression asynchronously.
|
static int |
nvcomp.nvcompBatchedDeflateDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
int[] device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedDeflateDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntBuffer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedDeflateDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedDeflateGetDecompressSizeAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream)
\brief Calculates the decompressed size of each chunk asynchronously.
|
static int |
nvcomp.nvcompBatchedDeflateGetDecompressSizeAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedGdeflateCompressAsync(PointerPointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedGdeflateOpts_t format_opts,
CUstream_st stream)
\brief Perform compression asynchronously.
|
static int |
nvcomp.nvcompBatchedGdeflateCompressAsync(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedGdeflateOpts_t format_opts,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedGdeflateDecompressAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream)
\brief Perform decompression asynchronously.
|
static int |
nvcomp.nvcompBatchedGdeflateDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
int[] device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedGdeflateDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntBuffer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedGdeflateDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedGdeflateGetDecompressSizeAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream)
\brief Calculates the decompressed size of each chunk asynchronously.
|
static int |
nvcomp.nvcompBatchedGdeflateGetDecompressSizeAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedGzipDecompressAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream)
\brief Perform decompression asynchronously.
|
static int |
nvcomp.nvcompBatchedGzipDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
int[] device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedGzipDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntBuffer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedGzipDecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedGzipGetDecompressSizeAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream)
\brief Calculates the decompressed size of each chunk asynchronously.
|
static int |
nvcomp.nvcompBatchedGzipGetDecompressSizeAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedLZ4CompressAsync(PointerPointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedLZ4Opts_t format_opts,
CUstream_st stream)
\brief Perform compression asynchronously.
|
static int |
nvcomp.nvcompBatchedLZ4CompressAsync(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedLZ4Opts_t format_opts,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedLZ4DecompressAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream)
\brief Perform decompression asynchronously.
|
static int |
nvcomp.nvcompBatchedLZ4DecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
int[] device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedLZ4DecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntBuffer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedLZ4DecompressAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedLZ4GetDecompressSizeAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream)
\brief Calculates the decompressed size of each chunk asynchronously.
|
static int |
nvcomp.nvcompBatchedLZ4GetDecompressSizeAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedSnappyCompressAsync(PointerPointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedSnappyOpts_t format_ops,
CUstream_st stream)
\brief Perform compression.
|
static int |
nvcomp.nvcompBatchedSnappyCompressAsync(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedSnappyOpts_t format_ops,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedSnappyDecompressAsync(PointerPointer device_compresed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream)
\brief Perform decompression.
|
static int |
nvcomp.nvcompBatchedSnappyDecompressAsync(Pointer device_compresed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
int[] device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedSnappyDecompressAsync(Pointer device_compresed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntBuffer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedSnappyDecompressAsync(Pointer device_compresed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedSnappyGetDecompressSizeAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream)
\brief Compute uncompressed sizes.
|
static int |
nvcomp.nvcompBatchedSnappyGetDecompressSizeAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedZstdCompressAsync(PointerPointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedZstdOpts_t format_opts,
CUstream_st stream)
\brief Perform compression asynchronously.
|
static int |
nvcomp.nvcompBatchedZstdCompressAsync(Pointer device_uncompressed_ptrs,
SizeTPointer device_uncompressed_bytes,
long max_uncompressed_chunk_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
nvcompBatchedZstdOpts_t format_opts,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedZstdDecompressAsync(PointerPointer device_compresed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
PointerPointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream)
\brief Perform decompression.
|
static int |
nvcomp.nvcompBatchedZstdDecompressAsync(Pointer device_compresed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
int[] device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedZstdDecompressAsync(Pointer device_compresed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntBuffer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedZstdDecompressAsync(Pointer device_compresed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
SizeTPointer device_actual_uncompressed_bytes,
long batch_size,
Pointer device_temp_ptr,
long temp_bytes,
Pointer device_uncompressed_ptrs,
IntPointer device_statuses,
CUstream_st stream) |
static int |
nvcomp.nvcompBatchedZstdGetDecompressSizeAsync(PointerPointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream)
\brief Compute uncompressed sizes.
|
static int |
nvcomp.nvcompBatchedZstdGetDecompressSizeAsync(Pointer device_compressed_ptrs,
SizeTPointer device_compressed_bytes,
SizeTPointer device_uncompressed_bytes,
long batch_size,
CUstream_st stream) |
static int |
nvjpeg.nvjpegBufferDeviceResize(nvjpegBufferDevice buffer,
long size,
CUstream_st stream) |
static int |
nvjpeg.nvjpegBufferPinnedResize(nvjpegBufferPinned buffer,
long size,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecode(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
byte[] data,
long length,
int output_format,
nvjpegImage_t destination,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecode(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
ByteBuffer data,
long length,
int output_format,
nvjpegImage_t destination,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecode(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
BytePointer data,
long length,
int output_format,
nvjpegImage_t destination,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeBatched(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
byte[] data,
SizeTPointer lengths,
nvjpegImage_t destinations,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeBatched(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
ByteBuffer data,
SizeTPointer lengths,
nvjpegImage_t destinations,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeBatched(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
BytePointer data,
SizeTPointer lengths,
nvjpegImage_t destinations,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeBatched(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
PointerPointer data,
SizeTPointer lengths,
nvjpegImage_t destinations,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeBatchedEx(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
byte[] data,
SizeTPointer lengths,
nvjpegImage_t destinations,
nvjpegDecodeParams decode_params,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeBatchedEx(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
ByteBuffer data,
SizeTPointer lengths,
nvjpegImage_t destinations,
nvjpegDecodeParams decode_params,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeBatchedEx(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
BytePointer data,
SizeTPointer lengths,
nvjpegImage_t destinations,
nvjpegDecodeParams decode_params,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeBatchedEx(nvjpegHandle handle,
nvjpegJpegState jpeg_handle,
PointerPointer data,
SizeTPointer lengths,
nvjpegImage_t destinations,
nvjpegDecodeParams decode_params,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeJpeg(nvjpegHandle handle,
nvjpegJpegDecoder decoder,
nvjpegJpegState decoder_state,
nvjpegJpegStream jpeg_bitstream,
nvjpegImage_t destination,
nvjpegDecodeParams decode_params,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeJpegDevice(nvjpegHandle handle,
nvjpegJpegDecoder decoder,
nvjpegJpegState decoder_state,
nvjpegImage_t destination,
CUstream_st stream) |
static int |
nvjpeg.nvjpegDecodeJpegTransferToDevice(nvjpegHandle handle,
nvjpegJpegDecoder decoder,
nvjpegJpegState decoder_state,
nvjpegJpegStream jpeg_stream,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncodeImage(nvjpegHandle handle,
nvjpegEncoderState encoder_state,
nvjpegEncoderParams encoder_params,
nvjpegImage_t source,
int input_format,
int image_width,
int image_height,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncodeRetrieveBitstream(nvjpegHandle handle,
nvjpegEncoderState encoder_state,
byte[] data,
SizeTPointer length,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncodeRetrieveBitstream(nvjpegHandle handle,
nvjpegEncoderState encoder_state,
ByteBuffer data,
SizeTPointer length,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncodeRetrieveBitstream(nvjpegHandle handle,
nvjpegEncoderState encoder_state,
BytePointer data,
SizeTPointer length,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncodeRetrieveBitstreamDevice(nvjpegHandle handle,
nvjpegEncoderState encoder_state,
byte[] data,
SizeTPointer length,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncodeRetrieveBitstreamDevice(nvjpegHandle handle,
nvjpegEncoderState encoder_state,
ByteBuffer data,
SizeTPointer length,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncodeRetrieveBitstreamDevice(nvjpegHandle handle,
nvjpegEncoderState encoder_state,
BytePointer data,
SizeTPointer length,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncoderParamsCopyHuffmanTables(nvjpegEncoderState encoder_state,
nvjpegEncoderParams encode_params,
nvjpegJpegStream jpeg_stream,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncoderParamsCopyMetadata(nvjpegEncoderState encoder_state,
nvjpegEncoderParams encode_params,
nvjpegJpegStream jpeg_stream,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncoderParamsCopyQuantizationTables(nvjpegEncoderParams encode_params,
nvjpegJpegStream jpeg_stream,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncoderParamsCreate(nvjpegHandle handle,
nvjpegEncoderParams encoder_params,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncoderParamsSetEncoding(nvjpegEncoderParams encoder_params,
int etype,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncoderParamsSetOptimizedHuffman(nvjpegEncoderParams encoder_params,
int optimized,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncoderParamsSetQuality(nvjpegEncoderParams encoder_params,
int quality,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncoderParamsSetSamplingFactors(nvjpegEncoderParams encoder_params,
int chroma_subsampling,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncoderStateCreate(nvjpegHandle handle,
nvjpegEncoderState encoder_state,
CUstream_st stream) |
static int |
nvjpeg.nvjpegEncodeYUV(nvjpegHandle handle,
nvjpegEncoderState encoder_state,
nvjpegEncoderParams encoder_params,
nvjpegImage_t source,
int chroma_subsampling,
int image_width,
int image_height,
CUstream_st stream) |
static void |
nvToolsExt.nvtxNameCudaStreamA(CUstream_st stream,
BytePointer name)
\brief Annotates a CUDA stream.
|
static void |
nvToolsExt.nvtxNameCudaStreamA(CUstream_st stream,
String name) |
static void |
nvToolsExt.nvtxNameCudaStreamW(CUstream_st stream,
CharPointer name) |
static void |
nvToolsExt.nvtxNameCudaStreamW(CUstream_st stream,
IntPointer name) |
static void |
nvToolsExt.nvtxNameCuStreamA(CUstream_st stream,
BytePointer name)
\brief Annotates a CUDA stream.
|
static void |
nvToolsExt.nvtxNameCuStreamA(CUstream_st stream,
String name) |
static void |
nvToolsExt.nvtxNameCuStreamW(CUstream_st stream,
CharPointer name) |
static void |
nvToolsExt.nvtxNameCuStreamW(CUstream_st stream,
IntPointer name) |
static int |
nccl.pncclAllGather(Pointer sendbuff,
Pointer recvbuff,
long sendcount,
int datatype,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclAllReduce(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int op,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclBcast(Pointer buff,
long count,
int datatype,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclBroadcast(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclRecv(Pointer recvbuff,
long count,
int datatype,
int peer,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclReduce(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int op,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclReduceScatter(Pointer sendbuff,
Pointer recvbuff,
long recvcount,
int datatype,
int op,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclSend(Pointer sendbuff,
long count,
int datatype,
int peer,
ncclComm comm,
CUstream_st stream) |
Modifier and Type | Method and Description |
---|---|
CUstream_st |
NppStreamContext.hStream()
From current Cuda stream ID.
|
Modifier and Type | Method and Description |
---|---|
NppStreamContext |
NppStreamContext.hStream(CUstream_st setter) |
Constructor and Description |
---|
ANSManager(long uncomp_chunk_size,
nvcompBatchedANSOpts_t format_opts,
CUstream_st user_stream,
int device_id,
int checksum_policy) |
BitcompManager(long uncomp_chunk_size,
nvcompBatchedBitcompFormatOpts format_opts,
CUstream_st user_stream,
int device_id,
int checksum_policy) |
DeflateManager(long uncomp_chunk_size,
nvcompBatchedDeflateOpts_t format_opts,
CUstream_st user_stream,
int device_id,
int checksum_policy) |
GdeflateManager(long uncomp_chunk_size,
nvcompBatchedGdeflateOpts_t format_opts,
CUstream_st user_stream,
int device_id,
int checksum_policy) |
LZ4Manager(long uncomp_chunk_size,
nvcompBatchedLZ4Opts_t format_opts,
CUstream_st user_stream,
int device_id,
int checksum_policy) |
SnappyManager(long uncomp_chunk_size,
nvcompBatchedSnappyOpts_t format_opts,
CUstream_st user_stream,
int device_id,
int checksum_policy) |
ZstdManager(long uncomp_chunk_size,
nvcompBatchedZstdOpts_t format_opts,
CUstream_st user_stream,
int device_id,
int checksum_policy) |
Modifier and Type | Method and Description |
---|---|
int |
tPinnedFreeV2.call(Pointer ctx,
Pointer ptr,
long size,
CUstream_st stream) |
int |
tDevFreeV2.call(Pointer ctx,
Pointer ptr,
long size,
CUstream_st stream) |
int |
tPinnedMallocV2.call(Pointer ctx,
PointerPointer ptr,
long size,
CUstream_st stream) |
int |
tDevMallocV2.call(Pointer ctx,
PointerPointer ptr,
long size,
CUstream_st stream) |
Copyright © 2024. All rights reserved.