riseproject-dev
diff --git a/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/cuda.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/aoti/common_shims_slim.cpp‎
Lines changed: 4 additions & 0 deletions b/‎backends/aoti/common_shims_slim.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/aoti/common_shims_slim.h‎
Lines changed: 1 addition & 0 deletions b/‎backends/aoti/common_shims_slim.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/aoti/slim/c10/core/ScalarType.h‎
Lines changed: 8 additions & 1 deletion b/‎backends/aoti/slim/c10/core/ScalarType.h‎
Lines changed: 8 additions & 1 deletion
@@ -354,7 +354,7 @@ EOF
     fi
     ;;
   qwen3_5_moe)
-    RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 32"
+    RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0"
     ;;
   voxtral_realtime)
     RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
 
@@ -145,8 +145,8 @@ jobs:
         # Run CUDA backend Python tests
         python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="
 
-        # Run quantize roundtrip tests (Qwen 3.5 MoE save/load prequantized)
-        python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py -v -o "addopts="
+        # Run Qwen 3.5 MoE tests (quantize roundtrip + TurboQuant KV cache)
+        python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py examples/models/qwen3_5_moe/test_turboquant.py -v -o "addopts="
 
   export-model-cuda-artifact:
     name: export-model-cuda-artifact
 
@@ -134,6 +134,10 @@ int32_t aoti_torch_dtype_int8() {
   return 1; // ScalarType::Char
 }
 
+int32_t aoti_torch_dtype_uint8() {
+  return 0; // ScalarType::Byte
+}
+
 int32_t aoti_torch_dtype_bool() {
   return 11; // ScalarType::Bool
 }
 
@@ -76,6 +76,7 @@ AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_int64();
 AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_int32();
 AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_int16();
 AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_int8();
+AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_uint8();
 AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_bool();
 
 // ============================================================
 
@@ -23,7 +23,7 @@ using BFloat16 = ::executorch::runtime::etensor::BFloat16;
 /// Enum representing the scalar type (dtype) of tensor elements.
 /// Note: Enum values must match PyTorch's c10::ScalarType for compatibility.
 enum class ScalarType : int8_t {
-  // Byte = 0,     // uint8_t - not currently needed
+  Byte = 0, // uint8_t
   Char = 1, // int8_t
   Short = 2, // int16_t
   Int = 3, // int32_t
@@ -43,6 +43,7 @@ enum class ScalarType : int8_t {
 };
 
 // Type alias constants for convenience
+constexpr ScalarType kByte = ScalarType::Byte;
 constexpr ScalarType kChar = ScalarType::Char;
 constexpr ScalarType kShort = ScalarType::Short;
 constexpr ScalarType kInt = ScalarType::Int;
@@ -56,6 +57,8 @@ constexpr ScalarType kBFloat16 = ScalarType::BFloat16;
 /// @return The size in bytes of a single element.
 inline size_t elementSize(ScalarType t) {
   switch (t) {
+    case ScalarType::Byte:
+      return sizeof(uint8_t);
     case ScalarType::Char:
       return sizeof(int8_t);
     case ScalarType::Short:
@@ -80,6 +83,8 @@ inline size_t elementSize(ScalarType t) {
 /// @return The name of the scalar type.
 inline const char* toString(ScalarType t) {
   switch (t) {
+    case ScalarType::Byte:
+      return "Byte";
     case ScalarType::Char:
       return "Char";
     case ScalarType::Short:
@@ -114,6 +119,7 @@ inline bool isFloatingType(ScalarType t) {
 /// @return true if the scalar type is integral, false otherwise.
 inline bool isIntegralType(ScalarType t, bool includeBool) {
   switch (t) {
+    case ScalarType::Byte:
     case ScalarType::Char:
     case ScalarType::Short:
     case ScalarType::Int:
@@ -138,6 +144,7 @@ inline bool isBoolType(ScalarType t) {
 /// @return true if the scalar type is valid, false otherwise.
 inline bool isValidScalarType(ScalarType t) {
   switch (t) {
+    case ScalarType::Byte:
     case ScalarType::Char:
     case ScalarType::Short:
     case ScalarType::Int:
Original file line number	Diff line number	Diff line change
`@@ -134,6 +134,10 @@ int32_t aoti_torch_dtype_int8() {`
`134`	`134`	`return 1; // ScalarType::Char`
`135`	`135`	`}`
`136`	`136`
	`137`	`+int32_t aoti_torch_dtype_uint8() {`
	`138`	`+ return 0; // ScalarType::Byte`
	`139`	`+}`
	`140`	`+`
`137`	`141`	`int32_t aoti_torch_dtype_bool() {`
`138`	`142`	`return 11; // ScalarType::Bool`
`139`	`143`	`}`