diff --git a/backends/cortex_m/ops/cortex_m_ops_common.h b/backends/cortex_m/ops/cortex_m_ops_common.h index 2f74bbc0e90..51179d834d3 100644 --- a/backends/cortex_m/ops/cortex_m_ops_common.h +++ b/backends/cortex_m/ops/cortex_m_ops_common.h @@ -36,6 +36,9 @@ using KernelRuntimeContext = torch::executor::KernelRuntimeContext; #define ARM_NN_Q31_MAX ((int32_t)(0x7FFFFFFFL)) #define ARM_NN_Q31_MIN ((int32_t)(0x80000000L)) +// 16-byte alignment for MVE vector operations. +constexpr size_t kCortexMMveAlignment = 16; + // Basic tensor type / layout validation and dimension order checking inline void validate_cmsis_nn_tensor_requirements( const Tensor& input1, diff --git a/backends/cortex_m/ops/op_quantized_conv2d.cpp b/backends/cortex_m/ops/op_quantized_conv2d.cpp index f7c66962d56..3eae9507ba7 100644 --- a/backends/cortex_m/ops/op_quantized_conv2d.cpp +++ b/backends/cortex_m/ops/op_quantized_conv2d.cpp @@ -192,7 +192,7 @@ Tensor& quantized_conv2d_out( arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims)); if (buffer_bytes > 0) { auto buffer_or_error = - context.allocate_temp(buffer_bytes, alignof(int16_t)); + context.allocate_temp(buffer_bytes, kCortexMMveAlignment); if (!buffer_or_error.ok()) { if (buffer_or_error.error() != Error::NotFound) { ET_LOG( diff --git a/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp b/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp index 87120edf8be..b3cf926c2e1 100644 --- a/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp +++ b/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp @@ -237,7 +237,7 @@ Tensor& quantized_depthwise_conv2d_out( } auto buffer_or_error = context.allocate_temp( - static_cast(buffer_bytes), alignof(int16_t)); + static_cast(buffer_bytes), kCortexMMveAlignment); if (!buffer_or_error.ok()) { ET_LOG( Error, diff --git a/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp b/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp index fbf4f429187..7126a2b2cf7 100644 --- a/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp +++ b/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp @@ -192,7 +192,7 @@ Tensor& quantized_transpose_conv2d_out( const int32_t buffer_bytes = arm_transpose_conv_s8_get_buffer_size( &transpose_conv_params, &input_dims, &filter_dims, &output_dims); auto buffer_or_error = context.allocate_temp( - static_cast(buffer_bytes), alignof(int16_t)); + static_cast(buffer_bytes), kCortexMMveAlignment); if (!buffer_or_error.ok()) { ET_LOG( Error, @@ -209,7 +209,7 @@ Tensor& quantized_transpose_conv2d_out( arm_transpose_conv_s8_get_reverse_conv_buffer_size( &transpose_conv_params, &input_dims, &filter_dims); auto output_buffer_or_error = context.allocate_temp( - static_cast(output_buffer_bytes), alignof(int16_t)); + static_cast(output_buffer_bytes), kCortexMMveAlignment); if (!output_buffer_or_error.ok()) { ET_LOG( Error,