pytorch · rascani · Feb 27, 2026
@@ -36,6 +36,9 @@ using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
 #define ARM_NN_Q31_MAX ((int32_t)(0x7FFFFFFFL))
 #define ARM_NN_Q31_MIN ((int32_t)(0x80000000L))
 
+// 16-byte alignment for MVE vector operations.
+constexpr size_t kCortexMMveAlignment = 16;
+
 // Basic tensor type / layout validation and dimension order checking
 inline void validate_cmsis_nn_tensor_requirements(
     const Tensor& input1,

@@ -192,7 +192,7 @@ Tensor& quantized_conv2d_out(
       arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims));
   if (buffer_bytes > 0) {
     auto buffer_or_error =
-        context.allocate_temp(buffer_bytes, alignof(int16_t));
+        context.allocate_temp(buffer_bytes, kCortexMMveAlignment);
     if (!buffer_or_error.ok()) {
       if (buffer_or_error.error() != Error::NotFound) {
         ET_LOG(

@@ -237,7 +237,7 @@ Tensor& quantized_depthwise_conv2d_out(
   }
 
   auto buffer_or_error = context.allocate_temp(
-      static_cast<size_t>(buffer_bytes), alignof(int16_t));
+      static_cast<size_t>(buffer_bytes), kCortexMMveAlignment);
   if (!buffer_or_error.ok()) {
     ET_LOG(
         Error,

@@ -192,7 +192,7 @@ Tensor& quantized_transpose_conv2d_out(
   const int32_t buffer_bytes = arm_transpose_conv_s8_get_buffer_size(
       &transpose_conv_params, &input_dims, &filter_dims, &output_dims);
   auto buffer_or_error = context.allocate_temp(
-      static_cast<size_t>(buffer_bytes), alignof(int16_t));
+      static_cast<size_t>(buffer_bytes), kCortexMMveAlignment);
   if (!buffer_or_error.ok()) {
     ET_LOG(
         Error,
@@ -209,7 +209,7 @@ Tensor& quantized_transpose_conv2d_out(
       arm_transpose_conv_s8_get_reverse_conv_buffer_size(
           &transpose_conv_params, &input_dims, &filter_dims);
   auto output_buffer_or_error = context.allocate_temp(
-      static_cast<size_t>(output_buffer_bytes), alignof(int16_t));
+      static_cast<size_t>(output_buffer_bytes), kCortexMMveAlignment);
   if (!output_buffer_or_error.ok()) {
     ET_LOG(
         Error,