@@ -683,7 +683,7 @@ void MPMesh::startCommunication(){
683683
684684 MPI_Waitall (requests.size (), requests.data (), MPI_STATUSES_IGNORE);
685685
686- pumipic::RecordTime (" Start Communication" , timer.seconds ());
686+ pumipic::RecordTime (" Start Communication" + std::to_string (self) , timer.seconds ());
687687
688688 if (p_MPs->getOpMode () != polyMPO::MP_DEBUG)
689689 return ;
@@ -727,8 +727,9 @@ void MPMesh::startCommunication(){
727727void MPMesh::reconstruct_coeff_full (){
728728
729729 Kokkos::Timer timer;
730- int numProcsTot;
730+ int self, numProcsTot;
731731 MPI_Comm comm = p_MPs->getMPIComm ();
732+ MPI_Comm_rank (comm, &self);
732733 MPI_Comm_size (comm, &numProcsTot);
733734
734735 // Mesh Information
@@ -782,13 +783,12 @@ void MPMesh::reconstruct_coeff_full(){
782783 };
783784 p_MPs->parallel_for (assemble, " assembly" );
784785
785- pumipic::RecordTime (" Assemble Matrix Per Process" , timer.seconds ());
786+ pumipic::RecordTime (" Assemble Matrix Per Process" + std::to_string (self) , timer.seconds ());
786787 // Mode 0 is Gather: Halos Send to Owners
787788 // Mode 1 is Scatter: Owners Send to Halos
788789 // Op 0 is addition
789790 // Op 1 is replacement
790-
791- Kokkos::Timer timer2;
791+ timer.reset ();
792792 int mode = 0 ;
793793 int op = 0 ;
794794 if (numProcsTot >1 ){
@@ -797,14 +797,19 @@ void MPMesh::reconstruct_coeff_full(){
797797 op=1 ;
798798 communicate_and_take_halo_contributions (vtxMatrices, numVertices, numEntriesMatrix, mode, op);
799799 }
800- pumipic::RecordTime (" Communicate Matrix Values" , timer2 .seconds ());
800+ pumipic::RecordTime (" Communicate Matrix Values" + std::to_string (self), timer .seconds ());
801801
802802 solveMatrix (vtxMatrices, radius, scaling);
803803}
804804
805805void MPMesh::solveMatrix (const Kokkos::View<double **>& vtxMatrices, double & radius, bool scaling){
806+
806807 Kokkos::Timer timer;
807-
808+
809+ int self;
810+ MPI_Comm comm = p_MPs->getMPIComm ();
811+ MPI_Comm_rank (comm, &self);
812+
808813 auto dual_triangle_area=p_mesh->getMeshField <MeshF_DualTriangleArea>();
809814 int nVertices = p_mesh->getNumVertices ();
810815
@@ -836,15 +841,16 @@ void MPMesh::solveMatrix(const Kokkos::View<double**>& vtxMatrices, double& radi
836841 });
837842 this ->precomputedVtxCoeffs = VtxCoeffs;
838843
839- pumipic::RecordTime (" polyMPOsolveMatrixCoeffCompute " , timer.seconds ());
844+ pumipic::RecordTime (" SolveMatrix " + std::to_string (self) , timer.seconds ());
840845}
841846
842847template <MeshFieldIndex meshFieldIndex>
843848void MPMesh::reconstruct_full () {
844849 Kokkos::Timer timer;
845850
846- int numProcsTot;
851+ int self, numProcsTot;
847852 MPI_Comm comm = p_MPs->getMPIComm ();
853+ MPI_Comm_rank (comm, &self);
848854 MPI_Comm_size (comm, &numProcsTot);
849855
850856 auto VtxCoeffs=this ->precomputedVtxCoeffs ;
@@ -894,15 +900,21 @@ void MPMesh::reconstruct_full() {
894900 }
895901 };
896902 p_MPs->parallel_for (reconstruct, " reconstruct" );
897- pumipic::RecordTime (" Assemble Field per process" , timer.seconds ());
903+ pumipic::RecordTime (" Assemble Field per process" + std::to_string (self) , timer.seconds ());
898904
899- Kokkos::Timer timer2;
905+ timer. reset ();
900906 if (numProcsTot>1 )
901907 communicate_and_take_halo_contributions (meshField, numVertices, numEntries, 0 , 0 );
902- pumipic::RecordTime (" Communicate Field Values" , timer2 .seconds ());
908+ pumipic::RecordTime (" Communicate Field Values" + std::to_string (self), timer .seconds ());
903909}
904910
905911void MPMesh::communicate_and_take_halo_contributions (const Kokkos::View<double **>& meshField, int nEntities, int numEntries, int mode, int op){
912+
913+ int self;
914+ MPI_Comm comm = p_MPs->getMPIComm ();
915+ MPI_Comm_rank (comm, &self);
916+
917+ Kokkos::Timer timer;
906918 auto reconVals_host = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace (), meshField);
907919 std::vector<std::vector<double >> fieldData (nEntities, std::vector<double >(numEntries, 0.0 ));
908920 for (int i = 0 ; i < nEntities; ++i) {
@@ -913,10 +925,14 @@ void MPMesh::communicate_and_take_halo_contributions(const Kokkos::View<double**
913925
914926 std::vector<std::vector<int >> recvIDVec;
915927 std::vector<std::vector<double >> recvDataVec;
928+ pumipic::RecordTime (" Communication-GPU to CPU-E-" + std::to_string (numEntries) + std::to_string (self), timer.seconds ());
929+
930+ timer.reset ();
916931 communicateFields (fieldData, nEntities, numEntries, mode, recvIDVec, recvDataVec);
932+ pumipic::RecordTime (" Communication-InterProcess-E-" + std::to_string (numEntries) + std::to_string (self), timer.seconds ());
917933
934+ timer.reset ();
918935 int numProcsTot = recvIDVec.size ();
919-
920936 // Flatten IDs
921937 int totalSize = 0 ;
922938 std::vector<int > offsets (numProcsTot, 0 );
@@ -954,21 +970,21 @@ void MPMesh::communicate_and_take_halo_contributions(const Kokkos::View<double**
954970 for (int i=0 ; i<numProcsTot; i++){
955971 assert (recvDataVec[i].size () == recvIDVec[i].size () * numEntries);
956972 }
957-
973+ pumipic::RecordTime (" Communication-CPU to GPU-E-" + std::to_string (numEntries) + std::to_string (self), timer.seconds ());
974+
958975 // Take contributions from other procs
976+ timer.reset ();
959977 Kokkos::parallel_for (" halo contribution" , recvIDGPU.size (), KOKKOS_LAMBDA (const int i){
960978 int vertex = recvIDGPU (i);
961979 for (int k=0 ; k<numEntries; k++){
962980 if (op==0 ) Kokkos::atomic_add (&meshField (vertex,k), recvDataGPU (i*numEntries+k));
963981 if (op==1 ) meshField (vertex, k) = recvDataGPU (i * numEntries + k);
964982 }
965983 });
966-
984+ pumipic::RecordTime (" Communication-GPU reduction-E-" + std::to_string (numEntries) + std::to_string (self), timer.seconds ());
985+
967986 if (p_MPs->getOpMode () != polyMPO::MP_DEBUG)
968987 return ;
969- int self;
970- MPI_Comm comm = p_MPs->getMPIComm ();
971- MPI_Comm_rank (comm, &self);
972988 if (self==1 ){
973989 for (int i=0 ; i< totalSize; i++){
974990 if (flatDataVec[i*numEntries]==0 ) continue ;
0 commit comments