@@ -78,7 +78,8 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
7878 cl_short lut11to16[2048 ];
7979 cl_float x_table[512 * 424 ];
8080 cl_float z_table[512 * 424 ];
81- cl_float3 p0_table[512 * 424 ];
81+ cl_float3 p0_sin_table[512 * 424 ];
82+ cl_float3 p0_cos_table[512 * 424 ];
8283 libfreenect2::DepthPacketProcessor::Config config;
8384 DepthPacketProcessor::Parameters params;
8485
@@ -105,7 +106,8 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
105106 size_t buf_packet_size;
106107
107108 cl::Buffer buf_lut11to16;
108- cl::Buffer buf_p0_table;
109+ cl::Buffer buf_p0_sin_table;
110+ cl::Buffer buf_p0_cos_table;
109111 cl::Buffer buf_x_table;
110112 cl::Buffer buf_z_table;
111113 cl::Buffer buf_packet;
@@ -200,9 +202,12 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
200202 oss << " -D AB_MULTIPLIER_PER_FRQ2=" << params.ab_multiplier_per_frq [2 ] << " f" ;
201203 oss << " -D AB_OUTPUT_MULTIPLIER=" << params.ab_output_multiplier << " f" ;
202204
203- oss << " -D PHASE_IN_RAD0=" << params.phase_in_rad [0 ] << " f" ;
204- oss << " -D PHASE_IN_RAD1=" << params.phase_in_rad [1 ] << " f" ;
205- oss << " -D PHASE_IN_RAD2=" << params.phase_in_rad [2 ] << " f" ;
205+ oss << " -D PHASE_IN_RAD0_SIN=" << std::sin (-params.phase_in_rad [0 ]) << " f" ;
206+ oss << " -D PHASE_IN_RAD0_COS=" << std::cos (params.phase_in_rad [0 ]) << " f" ;
207+ oss << " -D PHASE_IN_RAD1_SIN=" << std::sin (-params.phase_in_rad [1 ]) << " f" ;
208+ oss << " -D PHASE_IN_RAD1_COS=" << std::cos (params.phase_in_rad [1 ]) << " f" ;
209+ oss << " -D PHASE_IN_RAD2_SIN=" << std::sin (-params.phase_in_rad [2 ]) << " f" ;
210+ oss << " -D PHASE_IN_RAD2_COS=" << std::cos (params.phase_in_rad [2 ]) << " f" ;
206211
207212 oss << " -D JOINT_BILATERAL_AB_THRESHOLD=" << params.joint_bilateral_ab_threshold << " f" ;
208213 oss << " -D JOINT_BILATERAL_MAX_EDGE=" << params.joint_bilateral_max_edge << " f" ;
@@ -382,7 +387,9 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
382387
383388 buf_lut11to16 = cl::Buffer (context, CL_READ_ONLY_CACHE, buf_lut11to16_size, NULL , &err);
384389 CHECK_CL_ERROR (err, " cl::Buffer" );
385- buf_p0_table = cl::Buffer (context, CL_READ_ONLY_CACHE, buf_p0_table_size, NULL , &err);
390+ buf_p0_sin_table = cl::Buffer (context, CL_READ_ONLY_CACHE, buf_p0_table_size, NULL , &err);
391+ CHECK_CL_ERROR (err, " cl::Buffer" );
392+ buf_p0_cos_table = cl::Buffer (context, CL_READ_ONLY_CACHE, buf_p0_table_size, NULL , &err);
386393 CHECK_CL_ERROR (err, " cl::Buffer" );
387394 buf_x_table = cl::Buffer (context, CL_READ_ONLY_CACHE, buf_x_table_size, NULL , &err);
388395 CHECK_CL_ERROR (err, " cl::Buffer" );
@@ -430,17 +437,19 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
430437 CHECK_CL_ERROR (err, " setArg" );
431438 err = kernel_processPixelStage1.setArg (1 , buf_z_table);
432439 CHECK_CL_ERROR (err, " setArg" );
433- err = kernel_processPixelStage1.setArg (2 , buf_p0_table);
440+ err = kernel_processPixelStage1.setArg (2 , buf_p0_sin_table);
441+ CHECK_CL_ERROR (err, " setArg" );
442+ err = kernel_processPixelStage1.setArg (3 , buf_p0_cos_table);
434443 CHECK_CL_ERROR (err, " setArg" );
435- err = kernel_processPixelStage1.setArg (3 , buf_packet);
444+ err = kernel_processPixelStage1.setArg (4 , buf_packet);
436445 CHECK_CL_ERROR (err, " setArg" );
437- err = kernel_processPixelStage1.setArg (4 , buf_a);
446+ err = kernel_processPixelStage1.setArg (5 , buf_a);
438447 CHECK_CL_ERROR (err, " setArg" );
439- err = kernel_processPixelStage1.setArg (5 , buf_b);
448+ err = kernel_processPixelStage1.setArg (6 , buf_b);
440449 CHECK_CL_ERROR (err, " setArg" );
441- err = kernel_processPixelStage1.setArg (6 , buf_n);
450+ err = kernel_processPixelStage1.setArg (7 , buf_n);
442451 CHECK_CL_ERROR (err, " setArg" );
443- err = kernel_processPixelStage1.setArg (7 , buf_ir);
452+ err = kernel_processPixelStage1.setArg (8 , buf_ir);
444453 CHECK_CL_ERROR (err, " setArg" );
445454
446455 kernel_filterPixelStage1 = cl::Kernel (program, " filterPixelStage1" , &err);
@@ -484,14 +493,16 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
484493 err = kernel_filterPixelStage2.setArg (3 , buf_filtered);
485494 CHECK_CL_ERROR (err, " setArg" );
486495
487- cl::Event event0, event1, event2, event3;
496+ cl::Event event0, event1, event2, event3, event4 ;
488497 err = queue.enqueueWriteBuffer (buf_lut11to16, CL_FALSE, 0 , buf_lut11to16_size, lut11to16, NULL , &event0);
489498 CHECK_CL_ERROR (err, " enqueueWriteBuffer" );
490- err = queue.enqueueWriteBuffer (buf_p0_table , CL_FALSE, 0 , buf_p0_table_size, p0_table , NULL , &event1);
499+ err = queue.enqueueWriteBuffer (buf_p0_sin_table , CL_FALSE, 0 , buf_p0_table_size, p0_sin_table , NULL , &event1);
491500 CHECK_CL_ERROR (err, " enqueueWriteBuffer" );
492- err = queue.enqueueWriteBuffer (buf_x_table , CL_FALSE, 0 , buf_x_table_size, x_table , NULL , &event2);
501+ err = queue.enqueueWriteBuffer (buf_p0_cos_table , CL_FALSE, 0 , buf_p0_table_size, p0_cos_table , NULL , &event2);
493502 CHECK_CL_ERROR (err, " enqueueWriteBuffer" );
494- err = queue.enqueueWriteBuffer (buf_z_table, CL_FALSE, 0 , buf_z_table_size, z_table, NULL , &event3);
503+ err = queue.enqueueWriteBuffer (buf_x_table, CL_FALSE, 0 , buf_x_table_size, x_table, NULL , &event3);
504+ CHECK_CL_ERROR (err, " enqueueWriteBuffer" );
505+ err = queue.enqueueWriteBuffer (buf_z_table, CL_FALSE, 0 , buf_z_table_size, z_table, NULL , &event4);
495506 CHECK_CL_ERROR (err, " enqueueWriteBuffer" );
496507
497508 err = event0.wait ();
@@ -502,6 +513,8 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
502513 CHECK_CL_ERROR (err, " wait" );
503514 err = event3.wait ();
504515 CHECK_CL_ERROR (err, " wait" );
516+ err = event4.wait ();
517+ CHECK_CL_ERROR (err, " wait" );
505518 }
506519
507520 programInitialized = true ;
@@ -606,16 +619,24 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
606619 {
607620 for (int r = 0 ; r < 424 ; ++r)
608621 {
609- cl_float3 *it = &p0_table[r * 512 ];
622+ cl_float3 *itS = &p0_sin_table[r * 512 ];
623+ cl_float3 *itC = &p0_cos_table[r * 512 ];
610624 const uint16_t *it0 = &p0table->p0table0 [r * 512 ];
611625 const uint16_t *it1 = &p0table->p0table1 [r * 512 ];
612626 const uint16_t *it2 = &p0table->p0table2 [r * 512 ];
613- for (int c = 0 ; c < 512 ; ++c, ++it , ++it0, ++it1, ++it2)
627+ for (int c = 0 ; c < 512 ; ++c, ++itS, ++itC , ++it0, ++it1, ++it2)
614628 {
615- it->s [0 ] = -((float ) * it0) * 0.000031 * M_PI;
616- it->s [1 ] = -((float ) * it1) * 0.000031 * M_PI;
617- it->s [2 ] = -((float ) * it2) * 0.000031 * M_PI;
618- it->s [3 ] = 0 .0f ;
629+ const float x = ((float )*it0) * 0.000031 * M_PI;
630+ const float y = ((float )*it1) * 0.000031 * M_PI;
631+ const float z = ((float )*it2) * 0.000031 * M_PI;
632+ itS->s [0 ] = std::sin (x);
633+ itS->s [1 ] = std::sin (y);
634+ itS->s [2 ] = std::sin (z);
635+ itS->s [3 ] = 0 .0f ;
636+ itC->s [0 ] = std::cos (-x);
637+ itC->s [1 ] = std::cos (-y);
638+ itC->s [2 ] = std::cos (-z);
639+ itC->s [3 ] = 0 .0f ;
619640 }
620641 }
621642 }
0 commit comments