Skip to content

Commit b6e029c

Browse files
authored
Add CUDA configuration to firedrake-configure and github actions (#4988)
1 parent 8bd77a8 commit b6e029c

8 files changed

Lines changed: 410 additions & 52 deletions

File tree

.github/actionlint.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
self-hosted-runner:
2+
labels:
3+
# Custom label for GPU-enabled self-hosted runners
4+
- gpu

.github/workflows/core.yml

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ on:
2323
description: Whether to test using macOS
2424
type: boolean
2525
default: false
26+
test_gpu:
27+
description: Whether to test using CUDA-enabled PETSc
28+
type: boolean
29+
default: false
2630
deploy_website:
2731
description: Whether to deploy the website
2832
type: boolean
@@ -54,6 +58,10 @@ on:
5458
description: Whether to test using macOS
5559
type: boolean
5660
default: false
61+
test_gpu:
62+
description: Whether to test using CUDA-enabled PETSc
63+
type: boolean
64+
default: false
5765
deploy_website:
5866
description: Whether to deploy the website
5967
type: boolean
@@ -465,6 +473,137 @@ jobs:
465473
run: |
466474
find . -delete
467475
476+
test_gpu:
477+
name: Build and test Firedrake (Linux CUDA)
478+
runs-on: [self-hosted, Linux, gpu]
479+
container:
480+
image: ubuntu:latest
481+
options: --gpus all
482+
if: inputs.test_gpu
483+
env:
484+
OMPI_ALLOW_RUN_AS_ROOT: 1
485+
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
486+
OMP_NUM_THREADS: 1
487+
OPENBLAS_NUM_THREADS: 1
488+
FIREDRAKE_CI: 1
489+
PYOP2_SPMD_STRICT: 1
490+
# Disable fast math as it exposes compiler bugs
491+
PYOP2_CFLAGS: -fno-fast-math
492+
# NOTE: One should occasionally update test_durations.json by running
493+
# 'make test_durations' inside a 'firedrake:latest' Docker image.
494+
EXTRA_PYTEST_ARGS: --splitting-algorithm least_duration --timeout=600 --timeout-method=thread -o faulthandler_timeout=660 --durations-path=./firedrake-repo/tests/test_durations.json --durations=50
495+
PYTEST_MPI_MAX_NPROCS: 8
496+
# Prevent PETSc from exiting with an error due to using non-GPU aware system MPI
497+
PETSC_OPTIONS: -use_gpu_aware_mpi 0
498+
steps:
499+
- name: Confirm Nvidia GPUs are enabled
500+
# The presence of the nvidia-smi command indicates that the Nvidia drivers have
501+
# successfully been imported into the container, there is no point continuing
502+
# if nvidia-smi is not present
503+
run: nvidia-smi
504+
505+
- name: Fix HOME
506+
# For unknown reasons GitHub actions overwrite HOME to /github/home
507+
# which will break everything unless fixed
508+
# (https://github.com/actions/runner/issues/863)
509+
run: echo "HOME=/root" >> "$GITHUB_ENV"
510+
511+
512+
# Git is needed for actions/checkout and Python for firedrake-configure
513+
# curl needed for adding new deb repositories to ubuntu
514+
- name: Install system dependencies (1)
515+
run: |
516+
apt-get update
517+
apt-get -y install git python3 curl
518+
519+
520+
- name: Pre-run cleanup
521+
# Make sure the current directory is empty
522+
run: find . -delete
523+
524+
- uses: actions/checkout@v5
525+
with:
526+
path: firedrake-repo
527+
ref: ${{ inputs.source_ref }}
528+
529+
- name: Add Nvidia CUDA deb repositories
530+
run: |
531+
deburl=$( python3 ./firedrake-repo/scripts/firedrake-configure --show-extra-repo-pkg-url --gpu-arch cuda )
532+
debfile=$( basename "${deburl}" )
533+
curl -fsSLO "${deburl}"
534+
dpkg -i "${debfile}"
535+
apt-get update
536+
537+
- name: Install system dependencies (2)
538+
run: |
539+
apt-get -y install \
540+
$(python3 ./firedrake-repo/scripts/firedrake-configure --arch default --gpu-arch cuda --show-system-packages)
541+
apt-get -y install python3-venv
542+
: # Dependencies needed to run the test suite
543+
apt-get -y install fonts-dejavu graphviz graphviz-dev parallel poppler-utils
544+
545+
- name: Install PETSc
546+
env:
547+
EXTRA_OPTIONS: -use_gpu_aware_mpi 0
548+
run: |
549+
if [ ${{ inputs.target_branch }} = 'release' ]; then
550+
git clone --depth 1 \
551+
--branch $(python3 ./firedrake-repo/scripts/firedrake-configure --gpu-arch cuda --show-petsc-version) \
552+
https://gitlab.com/petsc/petsc.git
553+
else
554+
git clone --depth 1 https://gitlab.com/petsc/petsc.git
555+
fi
556+
cd petsc
557+
python3 ../firedrake-repo/scripts/firedrake-configure \
558+
--arch default --gpu-arch cuda --show-petsc-configure-options | \
559+
xargs -L1 ./configure --with-make-np=4
560+
make
561+
make check
562+
{
563+
echo "PETSC_DIR=/__w/firedrake/firedrake/petsc"
564+
echo "PETSC_ARCH=arch-firedrake-default-cuda"
565+
echo "SLEPC_DIR=/__w/firedrake/firedrake/petsc/arch-firedrake-default-cuda"
566+
} >> "$GITHUB_ENV"
567+
568+
- name: Install Firedrake
569+
id: install
570+
run: |
571+
export $(python3 ./firedrake-repo/scripts/firedrake-configure --arch default --gpu-arch cuda --show-env)
572+
python3 -m venv venv
573+
. venv/bin/activate
574+
575+
: # Empty the pip cache to ensure that everything is compiled from scratch
576+
pip cache purge
577+
578+
if [ ${{ inputs.target_branch }} = 'release' ]; then
579+
EXTRA_PIP_FLAGS=''
580+
else
581+
: # Install build dependencies
582+
pip install "$PETSC_DIR"/src/binding/petsc4py
583+
pip install -r ./firedrake-repo/requirements-build.txt
584+
585+
: # We have to pass '--no-build-isolation' to use a custom petsc4py
586+
EXTRA_PIP_FLAGS='--no-build-isolation'
587+
fi
588+
589+
pip install --verbose $EXTRA_PIP_FLAGS \
590+
--no-binary h5py \
591+
'./firedrake-repo[check]'
592+
593+
firedrake-clean
594+
pip list
595+
596+
- name: Run smoke tests
597+
run: |
598+
. venv/bin/activate
599+
firedrake-check
600+
timeout-minutes: 10
601+
602+
- name: Post-run cleanup
603+
if: always()
604+
run: |
605+
find . -delete
606+
468607
lint:
469608
name: Lint codebase
470609
runs-on: ubuntu-latest

.github/workflows/pr.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,6 @@ jobs:
1212
target_branch: ${{ github.base_ref }}
1313
# Only run macOS tests if the PR is labelled 'macOS'
1414
test_macos: ${{ contains(github.event.pull_request.labels.*.name, 'macOS') }}
15+
# Only run GPU tests if the PR is labelled 'gpu'
16+
test_gpu: ${{ contains(github.event.pull_request.labels.*.name, 'gpu') }}
1517
secrets: inherit

.github/workflows/push.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ jobs:
1313
source_ref: ${{ github.ref_name }}
1414
target_branch: ${{ github.ref_name }}
1515
test_macos: true
16+
test_gpu: true
1617
deploy_website: true
1718
secrets: inherit
1819

firedrake/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,9 @@ class NonUniqueMeshSequenceError(FiredrakeException):
4747
"""Raised when calling `.unique()` on a MeshSequence which contains
4848
non-unique meshes.
4949
"""
50+
51+
52+
class UnrecognisedDeviceError(FiredrakeException):
53+
"""Raised when a GPU device has been initialised in PETSc that Firedrake
54+
does not support.
55+
"""

firedrake/utils.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
from pyop2.datatypes import IntType # noqa: F401
88
from pyop2.datatypes import as_ctypes # noqa: F401
99
from pyop2.mpi import MPI
10+
from petsc4py import PETSc
11+
from functools import cache
12+
from firedrake.exceptions import UnrecognisedDeviceError
1013
import petsctools
1114

1215

@@ -23,6 +26,64 @@
2326
SLATE_SUPPORTS_COMPLEX = False
2427

2528

29+
@cache
30+
def device_matrix_type(warn: bool = True) -> str | None:
31+
r"""Get device matrix type
32+
33+
Attempt to initialise a GPU device and return the PETSc mat_type
34+
compatible with that device, or None if no device is detected.
35+
Typical Usage Example:
36+
mat_type = device_matrix_type(pc.comm.rank == 0)
37+
38+
Parameters
39+
----------
40+
warn
41+
Emit a warning containing the reason a device mat_type
42+
has not been returned. Defaults to False.
43+
44+
Raises
45+
------
46+
UnrecognisedDeviceError
47+
Raised when PETSc initialises a GPU device that
48+
Firedrake does not understand
49+
50+
Returns
51+
-------
52+
str | None
53+
The PETSc mat_type compatible with the GPU device detected on
54+
this system or None
55+
56+
"""
57+
_device_mat_type_map = {"HOST": None, "CUDA": "aijcusparse"}
58+
try:
59+
dev = PETSc.Device.create()
60+
except PETSc.Error:
61+
# Could not initialise device - not a failure condition as this could
62+
# be a GPU-enabled PETSc installation running on a CPU-only host.
63+
if warn:
64+
warnings.warn(
65+
"This installation of Firedrake is GPU-enabled, but no GPU device has been detected"
66+
)
67+
return None
68+
dev_type = dev.getDeviceType()
69+
dev.destroy()
70+
if dev_type not in _device_mat_type_map:
71+
raise UnrecognisedDeviceError(
72+
f"Unknown device type: {dev_type} initialised by PETSc. Firedrake "
73+
f"currently understands {', '.join([k for k in _device_mat_type_map if k != 'HOST'])}"
74+
"devices"
75+
)
76+
77+
if warn:
78+
if dev_type == "HOST":
79+
warnings.warn(
80+
"This installation of Firedrake is not GPU-enabled, to enable GPU functionality "
81+
"PETSc will need to be rebuilt with some GPU capability appropriate for this system "
82+
"(e.g. '--with-cuda=1')."
83+
)
84+
return _device_mat_type_map[dev_type]
85+
86+
2687
def _new_uid(comm):
2788
uid = comm.Get_attr(FIREDRAKE_UID)
2889
if uid is None:

0 commit comments

Comments
 (0)