@@ -294,30 +294,6 @@ static void unary_op_trunc_kernel(const T * x, T * dst, const int k, const sycl:
294294 }
295295}
296296
297- template <typename T>
298- static void upscale (const T *x, T *dst, const int nb00, const int nb01,
299- const int nb02, const int nb03, const int ne10, const int ne11,
300- const int ne12, const int ne13, const float sf0, const float sf1,
301- const float sf2, const float sf3, const sycl::nd_item<1 > &item_ct1) {
302- int index = item_ct1.get_local_id (0 ) +
303- item_ct1.get_group (0 ) * item_ct1.get_local_range (0 );
304- if (index >= ne10 * ne11 * ne12 * ne13) {
305- return ;
306- }
307- // operation
308- int i10 = index % ne10;
309- int i11 = (index / ne10) % ne11;
310- int i12 = (index / (ne10 * ne11)) % ne12;
311- int i13 = (index / (ne10 * ne11 * ne12)) % ne13;
312-
313- int i00 = static_cast <int >(i10 / sf0);
314- int i01 = static_cast <int >(i11 / sf1);
315- int i02 = static_cast <int >(i12 / sf2);
316- int i03 = static_cast <int >(i13 / sf3);
317-
318- dst[index] = *(const T *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
319- }
320-
321297template <typename T>
322298static void clamp (const T * x, T * dst, const float min, const float max, const int k,
323299 const sycl::nd_item<1 > &item_ct1) {
@@ -392,20 +368,6 @@ static void arange_kernel(T * dst, const int k, T start, T step,
392368 }
393369}
394370
395- template <typename T>
396- static void upscale_sycl (const T *x, T *dst, const int nb00, const int nb01,
397- const int nb02, const int nb03, const int ne10, const int ne11,
398- const int ne12, const int ne13, const float sf0, const float sf1,
399- const float sf2, const float sf3, queue_ptr stream) {
400- int dst_size = ne10 * ne11 * ne12 * ne13;
401- int num_blocks = ceil_div (dst_size, SYCL_UPSCALE_BLOCK_SIZE);
402- sycl::range<1 > gridDim (num_blocks * SYCL_UPSCALE_BLOCK_SIZE);
403- stream->parallel_for (
404- sycl::nd_range<1 >(gridDim, sycl::range<1 >(SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<1 > item_ct1) {
405- upscale (x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, item_ct1);
406- });
407- }
408-
409371template <typename KernelInvoker, typename ... Args>
410372static inline void dispatch_ggml_sycl_op_unary (ggml_backend_sycl_context & ctx, ggml_tensor * dst, KernelInvoker kernel_invoker, Args&&... args) {
411373 GGML_ASSERT (dst->src [0 ]->type == GGML_TYPE_F32 || dst->src [0 ]->type == GGML_TYPE_F16);
@@ -505,42 +467,6 @@ static inline void dispatch_ggml_sycl_op_fused_glu(ggml_backend_sycl_context & c
505467 }
506468}
507469
508- template <typename KernelInvoker, typename ... Args>
509- static inline void dispatch_ggml_sycl_op_upscale (ggml_backend_sycl_context & ctx, ggml_tensor * dst, KernelInvoker kernel_invoker, Args&&... args) {
510- GGML_ASSERT (dst->src [0 ]->type == GGML_TYPE_F32 || dst->src [0 ]->type == GGML_TYPE_F16);
511- GGML_ASSERT (dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
512-
513- GGML_ASSERT (dst->src [0 ]->type == dst->type );
514-
515- dpct::queue_ptr main_stream = ctx.stream ();
516- SYCL_CHECK (ggml_sycl_set_device (ctx.device ));
517-
518- const float sf0 = (float ) dst->ne [0 ] / dst->src [0 ]->ne [0 ];
519- const float sf1 = (float ) dst->ne [1 ] / dst->src [0 ]->ne [1 ];
520- const float sf2 = (float ) dst->ne [2 ] / dst->src [0 ]->ne [2 ];
521- const float sf3 = (float ) dst->ne [3 ] / dst->src [0 ]->ne [3 ];
522- switch (dst->type ) {
523- case GGML_TYPE_F16:
524- {
525- auto data_pts = cast_data<sycl::half>(dst);
526- kernel_invoker (data_pts.src , data_pts.dst , (int )dst->src [0 ]->nb [0 ], (int )dst->src [0 ]->nb [1 ], (int )dst->src [0 ]->nb [2 ],
527- (int )dst->src [0 ]->nb [3 ], (int )dst->ne [0 ], (int )dst->ne [1 ], (int )dst->ne [2 ], (int )dst->ne [3 ], sf0, sf1, sf2, sf3,
528- main_stream, std::forward<Args>(args)...);
529- break ;
530- }
531- case GGML_TYPE_F32:
532- {
533- auto data_pts = cast_data<float >(dst);
534- kernel_invoker (data_pts.src , data_pts.dst , (int )dst->src [0 ]->nb [0 ], (int )dst->src [0 ]->nb [1 ], (int )dst->src [0 ]->nb [2 ],
535- (int )dst->src [0 ]->nb [3 ], (int )dst->ne [0 ], (int )dst->ne [1 ], (int )dst->ne [2 ], (int )dst->ne [3 ], sf0, sf1, sf2, sf3,
536- main_stream, std::forward<Args>(args)...);
537- break ;
538- }
539- default :
540- GGML_ABORT (" GGML tensor type not supported!\n " );
541- }
542- }
543-
544470template <typename F>
545471static inline void ggml_sycl_op_unary (
546472 ggml_backend_sycl_context & ctx, ggml_tensor * dst, F func) {
@@ -784,15 +710,6 @@ static inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor
784710 });
785711}
786712
787- static inline void ggml_sycl_op_upscale (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
788- ggml_sycl_detail::dispatch_ggml_sycl_op_upscale (ctx, dst,
789- [](const auto * src, auto * dst_ptr, int nb00, int nb01, int nb02, int nb03,
790- int ne10, int ne11, int ne12, int ne13, float sf0, float sf1, float sf2, float sf3,
791- queue_ptr stream) {
792- ggml_sycl_detail::upscale_sycl (src, dst_ptr, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, stream);
793- });
794- }
795-
796713static inline void ggml_sycl_op_clamp (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
797714 float min_val;
798715 float max_val;
@@ -1131,12 +1048,6 @@ void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
11311048 ggml_sycl_op_sqr (ctx, dst);
11321049}
11331050
1134- void ggml_sycl_upscale (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1135- scope_op_debug_print scope_dbg_print (__func__, dst, /* num_src=*/ 1 );
1136- ggml_sycl_op_upscale (ctx, dst);
1137- }
1138-
1139-
11401051void ggml_sycl_clamp (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
11411052 scope_op_debug_print scope_dbg_print (__func__, dst, /* num_src=*/ 1 );
11421053 ggml_sycl_op_clamp (ctx, dst);
0 commit comments