Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions cuda_core/cuda/core/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,18 +235,22 @@ def _make_restore_args(driver, gpu_mapping: _Mapping[_Any, _Any] | None):
def _as_cuuuid(driver, value, buffers):
"""Convert *value* to a ``CUuuid``.

Accepts a ``CUuuid`` instance (returned as-is) or a UUID string in
the ``"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"`` format returned by
:attr:`Device.uuid`.
Accepts a UUID string in the ``"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"``
format returned by :attr:`Device.uuid`.
"""
if isinstance(value, str):
raw = bytes.fromhex(value.replace("-", ""))
try:
raw = bytes.fromhex(value.replace("-", ""))
except ValueError:
raise ValueError(
f"GPU UUID string must be 32 hex characters (with optional hyphens), got {value!r}"
) from None
if len(raw) != 16:
raise ValueError(f"GPU UUID string must be 32 hex characters (with optional hyphens), got {value!r}")
buf = _ctypes.create_string_buffer(raw, 16)
buffers.append(buf)
return driver.CUuuid(_ctypes.addressof(buf))
return value
raise TypeError("GPU UUID values must be UUID strings")


__all__ = [
Expand Down
8 changes: 4 additions & 4 deletions cuda_core/docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,10 @@ should be used during restore. For migration workflows, provide mappings for
every GPU visible to the NVIDIA kernel-mode driver at checkpoint time.
User-space masking such as ``CUDA_VISIBLE_DEVICES`` does not reduce this
mapping requirement, so applications that rely on user-space GPU masking may
not be valid migration targets. The mapping may use ``CUuuid`` objects or the
UUID strings returned by :attr:`Device.uuid`. A successful restore returns the
process to the locked state; call ``Process.unlock`` after restore to allow
CUDA API calls to resume.
not be valid migration targets. The mapping should use the UUID strings
returned by :attr:`Device.uuid`. A successful restore returns the process to
the locked state; call ``Process.unlock`` after restore to allow CUDA API
calls to resume.

The CUDA driver requires restore to run from the process restore thread.
Use ``Process.restore_thread_id`` to discover that thread before calling
Expand Down
Loading