@@ -254,11 +254,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
254254 info.devices [id].supports_cooperative_launch = false ;
255255#endif // !(GGML_USE_MUSA)
256256
257- // cudaMemGetInfo returns info for the current device
258- size_t free_mem;
259- CUDA_CHECK (cudaSetDevice (id));
260- CUDA_CHECK (cudaMemGetInfo (&free_mem, NULL ));
261-
262257#if defined(GGML_USE_HIP)
263258 info.devices [id].smpbo = prop.sharedMemPerBlock ;
264259
@@ -273,25 +268,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
273268 info.devices [id].cc += prop.minor * 0x10 ;
274269 }
275270 }
276- GGML_LOG_INFO (" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, VRAM: %zu MiB (%zu MiB free) \n " ,
271+ GGML_LOG_INFO (" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, VRAM: %zu MiB\n " ,
277272 id, prop.name , prop.gcnArchName , info.devices [id].cc & 0xffff ,
278273 device_vmm ? " yes" : " no" , prop.warpSize ,
279- (size_t )(prop.totalGlobalMem / (1024 * 1024 )), free_mem / ( 1024 * 1024 ) );
274+ (size_t )(prop.totalGlobalMem / (1024 * 1024 )));
280275#elif defined(GGML_USE_MUSA)
281276 // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
282277 info.devices [id].warp_size = 32 ;
283278 info.devices [id].smpbo = prop.sharedMemPerBlockOptin ;
284279 info.devices [id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100 ;
285280 info.devices [id].cc += prop.minor * 0x10 ;
286- GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB (%zu MiB free) \n " ,
281+ GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB\n " ,
287282 id, prop.name , prop.major , prop.minor , device_vmm ? " yes" : " no" ,
288- (size_t )(prop.totalGlobalMem / (1024 * 1024 )), free_mem / ( 1024 * 1024 ) );
283+ (size_t )(prop.totalGlobalMem / (1024 * 1024 )));
289284#else
290285 info.devices [id].smpbo = prop.sharedMemPerBlockOptin ;
291286 info.devices [id].cc = 100 *prop.major + 10 *prop.minor ;
292- GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB (%zu MiB free) \n " ,
287+ GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB\n " ,
293288 id, prop.name , prop.major , prop.minor , device_vmm ? " yes" : " no" ,
294- (size_t )(prop.totalGlobalMem / (1024 * 1024 )), free_mem / ( 1024 * 1024 ) );
289+ (size_t )(prop.totalGlobalMem / (1024 * 1024 )));
295290 std::string device_name (prop.name );
296291 if (device_name == " NVIDIA GeForce MX450" ) {
297292 turing_devices_without_mma.push_back ({ id, device_name });
@@ -306,6 +301,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
306301 // TODO: Check for future drivers the default scheduling strategy and
307302 // remove this call again when cudaDeviceScheduleSpin is default.
308303 if (prop.major == 12 && prop.minor == 1 ) {
304+ CUDA_CHECK (cudaSetDevice (id));
309305 CUDA_CHECK (cudaSetDeviceFlags (cudaDeviceScheduleSpin));
310306 }
311307
0 commit comments