@@ -2362,6 +2362,27 @@ static inline size_t init_cpy_req(htp_general_req * req, dspqueue_buffer * bufs,
23622362 return n_bufs;
23632363}
23642364
2365+ static inline size_t init_cont_req (htp_general_req * req, dspqueue_buffer * bufs, const ggml_tensor * t) {
2366+ // CONT is just a contiguous copy — reuse CPY op
2367+ req->op = HTP_OP_CPY;
2368+
2369+ size_t n_bufs = 0 ;
2370+ n_bufs += htp_req_buff_init (&req->src0 , &bufs[n_bufs], t->src [0 ], DSPQBUF_TYPE_CPU_WRITE_DSP_READ);
2371+ n_bufs += htp_req_buff_init (&req->dst , &bufs[n_bufs], t, DSPQBUF_TYPE_DSP_WRITE_CPU_READ);
2372+
2373+ return n_bufs;
2374+ }
2375+
2376+ static inline size_t init_repeat_req (htp_general_req * req, dspqueue_buffer * bufs, const ggml_tensor * t) {
2377+ req->op = HTP_OP_REPEAT;
2378+
2379+ size_t n_bufs = 0 ;
2380+ n_bufs += htp_req_buff_init (&req->src0 , &bufs[n_bufs], t->src [0 ], DSPQBUF_TYPE_CPU_WRITE_DSP_READ);
2381+ n_bufs += htp_req_buff_init (&req->dst , &bufs[n_bufs], t, DSPQBUF_TYPE_DSP_WRITE_CPU_READ);
2382+
2383+ return n_bufs;
2384+ }
2385+
23652386static inline size_t init_get_rows_req (htp_general_req * req, dspqueue_buffer * bufs, const ggml_tensor * t) {
23662387 req->op = HTP_OP_GET_ROWS;
23672388
@@ -2449,12 +2470,33 @@ static inline size_t init_unary_req(htp_general_req * req, dspqueue_buffer * buf
24492470 break ;
24502471
24512472 case GGML_OP_UNARY:
2452- if (ggml_get_unary_op (t) == GGML_UNARY_OP_SILU) {
2473+ switch (ggml_get_unary_op (t)) {
2474+ case GGML_UNARY_OP_SILU:
24532475 req->op = HTP_OP_UNARY_SILU;
24542476 supported = true ;
2455- } else if (ggml_get_unary_op (t) == GGML_UNARY_OP_GELU) {
2477+ break ;
2478+ case GGML_UNARY_OP_GELU:
24562479 req->op = HTP_OP_UNARY_GELU;
24572480 supported = true ;
2481+ break ;
2482+ case GGML_UNARY_OP_SIGMOID:
2483+ req->op = HTP_OP_UNARY_SIGMOID;
2484+ supported = true ;
2485+ break ;
2486+ case GGML_UNARY_OP_NEG:
2487+ req->op = HTP_OP_UNARY_NEG;
2488+ supported = true ;
2489+ break ;
2490+ case GGML_UNARY_OP_EXP:
2491+ req->op = HTP_OP_UNARY_EXP;
2492+ supported = true ;
2493+ break ;
2494+ case GGML_UNARY_OP_SOFTPLUS:
2495+ req->op = HTP_OP_UNARY_SOFTPLUS;
2496+ supported = true ;
2497+ break ;
2498+ default :
2499+ break ;
24582500 }
24592501 break ;
24602502
@@ -2640,16 +2682,28 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
26402682 ggml_hexagon_dispatch_op<init_sum_rows_req>(sess, node, flags);
26412683 break ;
26422684 case GGML_OP_UNARY:
2643- if ((ggml_get_unary_op (node) == GGML_UNARY_OP_SILU) ||
2644- (ggml_get_unary_op (node) == GGML_UNARY_OP_GELU)) {
2645- ggml_hexagon_dispatch_op<init_unary_req>(sess, node, flags);
2685+ switch (ggml_get_unary_op (node)) {
2686+ case GGML_UNARY_OP_NEG:
2687+ case GGML_UNARY_OP_EXP:
2688+ case GGML_UNARY_OP_SIGMOID:
2689+ case GGML_UNARY_OP_SOFTPLUS:
2690+ case GGML_UNARY_OP_SILU:
2691+ case GGML_UNARY_OP_GELU:
2692+ ggml_hexagon_dispatch_op<init_unary_req>(sess, node, flags);
2693+ break ;
2694+ default :
2695+ break ;
26462696 }
26472697 break ;
26482698 case GGML_OP_GLU:
2649- if ((ggml_get_glu_op (node) == GGML_GLU_OP_SWIGLU) ||
2650- (ggml_get_glu_op (node) == GGML_GLU_OP_SWIGLU_OAI) ||
2651- (ggml_get_glu_op (node) == GGML_GLU_OP_GEGLU)) {
2652- ggml_hexagon_dispatch_op<init_unary_req>(sess, node, flags);
2699+ switch (ggml_get_glu_op (node)) {
2700+ case GGML_GLU_OP_SWIGLU:
2701+ case GGML_GLU_OP_SWIGLU_OAI:
2702+ case GGML_GLU_OP_GEGLU:
2703+ ggml_hexagon_dispatch_op<init_unary_req>(sess, node, flags);
2704+ break ;
2705+ default :
2706+ break ;
26532707 }
26542708 break ;
26552709 case GGML_OP_SOFT_MAX:
@@ -2676,6 +2730,14 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
26762730 ggml_hexagon_dispatch_op<init_cpy_req>(sess, node, flags);
26772731 break ;
26782732
2733+ case GGML_OP_CONT:
2734+ ggml_hexagon_dispatch_op<init_cont_req>(sess, node, flags);
2735+ break ;
2736+
2737+ case GGML_OP_REPEAT:
2738+ ggml_hexagon_dispatch_op<init_repeat_req>(sess, node, flags);
2739+ break ;
2740+
26792741 case GGML_OP_ARGSORT:
26802742 ggml_hexagon_dispatch_op<init_argsort_req>(sess, node, flags);
26812743 break ;
@@ -3006,6 +3068,39 @@ static bool ggml_hexagon_supported_cpy(const struct ggml_hexagon_session * sess,
30063068 return true ;
30073069}
30083070
3071+ static bool ggml_hexagon_supported_cont (const struct ggml_hexagon_session * sess, const struct ggml_tensor * op) {
3072+ GGML_UNUSED (sess);
3073+ const struct ggml_tensor * src0 = op->src [0 ];
3074+
3075+ // CONT is same-type only, supports f32 and f16
3076+ if (src0->type != GGML_TYPE_F32 && src0->type != GGML_TYPE_F16) return false ;
3077+
3078+ return true ;
3079+ }
3080+
3081+ static bool ggml_hexagon_supported_repeat (const struct ggml_hexagon_session * sess, const struct ggml_tensor * op) {
3082+ GGML_UNUSED (sess);
3083+ const struct ggml_tensor * src0 = op->src [0 ];
3084+ const struct ggml_tensor * dst = op;
3085+
3086+ // Support f32 and f16
3087+ if (src0->type != GGML_TYPE_F32 && src0->type != GGML_TYPE_F16) return false ;
3088+
3089+ // src and dst must be the same type
3090+ if (src0->type != dst->type ) return false ;
3091+
3092+ // dst dims must be multiples of src dims
3093+ if (dst->ne [0 ] % src0->ne [0 ] != 0 ) return false ;
3094+ if (dst->ne [1 ] % src0->ne [1 ] != 0 ) return false ;
3095+ if (dst->ne [2 ] % src0->ne [2 ] != 0 ) return false ;
3096+ if (dst->ne [3 ] % src0->ne [3 ] != 0 ) return false ;
3097+
3098+ // require contiguous tensors (no transposition)
3099+ if (ggml_is_transposed (src0) || ggml_is_transposed (dst)) return false ;
3100+
3101+ return true ;
3102+ }
3103+
30093104static bool ggml_backend_hexagon_device_supports_op (ggml_backend_dev_t dev, const struct ggml_tensor * op) {
30103105 auto sess = static_cast <ggml_hexagon_session *>(dev->context );
30113106
@@ -3063,21 +3158,32 @@ static bool ggml_backend_hexagon_device_supports_op(ggml_backend_dev_t dev, cons
30633158 break ;
30643159
30653160 case GGML_OP_UNARY:
3066- {
3067- const auto unary_op = ggml_get_unary_op (op);
3068- if (unary_op == GGML_UNARY_OP_SILU || unary_op == GGML_UNARY_OP_GELU) {
3161+ switch (ggml_get_unary_op (op)) {
3162+ case GGML_UNARY_OP_NEG:
3163+ case GGML_UNARY_OP_EXP:
3164+ case GGML_UNARY_OP_SIGMOID:
3165+ case GGML_UNARY_OP_SOFTPLUS:
3166+ supp = ggml_hexagon_supported_unary (sess, op);
3167+ break ;
3168+ case GGML_UNARY_OP_SILU:
3169+ case GGML_UNARY_OP_GELU:
30693170 supp = ggml_hexagon_supported_activations (sess, op);
3070- }
3071- break ;
3171+ break ;
3172+ default :
3173+ break ;
30723174 }
3175+ break ;
30733176 case GGML_OP_GLU:
3074- {
3075- const auto glu_op = ggml_get_glu_op (op);
3076- if ((glu_op == GGML_GLU_OP_SWIGLU) || (glu_op == GGML_GLU_OP_SWIGLU_OAI) || (glu_op == GGML_GLU_OP_GEGLU)) {
3177+ switch (ggml_get_glu_op (op)) {
3178+ case GGML_GLU_OP_SWIGLU:
3179+ case GGML_GLU_OP_SWIGLU_OAI:
3180+ case GGML_GLU_OP_GEGLU:
30773181 supp = ggml_hexagon_supported_activations (sess, op);
3078- }
3079- break ;
3182+ break ;
3183+ default :
3184+ break ;
30803185 }
3186+ break ;
30813187 case GGML_OP_ROPE:
30823188 supp = ggml_hexagon_supported_rope (sess, op);
30833189 break ;
@@ -3098,6 +3204,14 @@ static bool ggml_backend_hexagon_device_supports_op(ggml_backend_dev_t dev, cons
30983204 supp = ggml_hexagon_supported_cpy (sess, op);
30993205 break ;
31003206
3207+ case GGML_OP_CONT:
3208+ supp = ggml_hexagon_supported_cont (sess, op);
3209+ break ;
3210+
3211+ case GGML_OP_REPEAT:
3212+ supp = ggml_hexagon_supported_repeat (sess, op);
3213+ break ;
3214+
31013215 case GGML_OP_ARGSORT:
31023216 supp = ggml_hexagon_supported_argsort (sess, op);
31033217 break ;
0 commit comments