Skip to content

Commit ceef6b5

Browse files
ggml: avoid creating CUDA context during device init (ggml-org#20595)
1 parent 07c6a59 commit ceef6b5

1 file changed

Lines changed: 7 additions & 11 deletions

File tree

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -254,11 +254,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
254254
info.devices[id].supports_cooperative_launch = false;
255255
#endif // !(GGML_USE_MUSA)
256256

257-
// cudaMemGetInfo returns info for the current device
258-
size_t free_mem;
259-
CUDA_CHECK(cudaSetDevice(id));
260-
CUDA_CHECK(cudaMemGetInfo(&free_mem, NULL));
261-
262257
#if defined(GGML_USE_HIP)
263258
info.devices[id].smpbo = prop.sharedMemPerBlock;
264259

@@ -273,25 +268,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
273268
info.devices[id].cc += prop.minor * 0x10;
274269
}
275270
}
276-
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, VRAM: %zu MiB (%zu MiB free)\n",
271+
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, VRAM: %zu MiB\n",
277272
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
278273
device_vmm ? "yes" : "no", prop.warpSize,
279-
(size_t)(prop.totalGlobalMem / (1024 * 1024)), free_mem / (1024 * 1024));
274+
(size_t)(prop.totalGlobalMem / (1024 * 1024)));
280275
#elif defined(GGML_USE_MUSA)
281276
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
282277
info.devices[id].warp_size = 32;
283278
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
284279
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
285280
info.devices[id].cc += prop.minor * 0x10;
286-
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB (%zu MiB free)\n",
281+
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB\n",
287282
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
288-
(size_t)(prop.totalGlobalMem / (1024 * 1024)), free_mem / (1024 * 1024));
283+
(size_t)(prop.totalGlobalMem / (1024 * 1024)));
289284
#else
290285
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
291286
info.devices[id].cc = 100*prop.major + 10*prop.minor;
292-
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB (%zu MiB free)\n",
287+
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB\n",
293288
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
294-
(size_t)(prop.totalGlobalMem / (1024 * 1024)), free_mem / (1024 * 1024));
289+
(size_t)(prop.totalGlobalMem / (1024 * 1024)));
295290
std::string device_name(prop.name);
296291
if (device_name == "NVIDIA GeForce MX450") {
297292
turing_devices_without_mma.push_back({ id, device_name });
@@ -306,6 +301,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
306301
// TODO: Check for future drivers the default scheduling strategy and
307302
// remove this call again when cudaDeviceScheduleSpin is default.
308303
if (prop.major == 12 && prop.minor == 1) {
304+
CUDA_CHECK(cudaSetDevice(id));
309305
CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
310306
}
311307

0 commit comments

Comments
 (0)