@@ -117,6 +117,75 @@ namespace {
117117#endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
118118
119119
120+ #if XNN_ENABLE_AVX2 && (XNN_ARCH_X86 || XNN_ARCH_X86_64)
121+ static void qs8_qc2w_gemm_minmax_fp32_ukernel_1x8c8__avx2_madd (benchmark::State& state, const char * net) {
122+ GEMMBenchmark (state,
123+ xnn_qs8_qc2w_gemm_minmax_fp32_ukernel_1x8c8__avx2_madd,
124+ xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
125+ xnn_pack_qs8_to_qu8_qc2w_gemm_goi_w,
126+ /* mr=*/ 1 , /* nr=*/ 8 , /* kr=*/ 8 , /* sr=*/ 1 ,
127+ /* arch_flags=*/ xnn_arch_x86_avx2);
128+ }
129+
130+ BENCHMARK_GEMM (qs8_qc2w_gemm_minmax_fp32_ukernel_1x8c8__avx2_madd)
131+
132+ static void qs8_qc2w_gemm_minmax_fp32_ukernel_2x8c8__avx2_madd(benchmark::State& state, const char * net) {
133+ GEMMBenchmark (state,
134+ xnn_qs8_qc2w_gemm_minmax_fp32_ukernel_2x8c8__avx2_madd,
135+ xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
136+ xnn_pack_qs8_to_qu8_qc2w_gemm_goi_w,
137+ /* mr=*/ 2 , /* nr=*/ 8 , /* kr=*/ 8 , /* sr=*/ 1 ,
138+ /* arch_flags=*/ xnn_arch_x86_avx2);
139+ }
140+
141+ BENCHMARK_GEMM (qs8_qc2w_gemm_minmax_fp32_ukernel_2x8c8__avx2_madd)
142+
143+ static void qs8_qc2w_gemm_minmax_fp32_ukernel_3x8c8__avx2_madd(benchmark::State& state, const char * net) {
144+ GEMMBenchmark (state,
145+ xnn_qs8_qc2w_gemm_minmax_fp32_ukernel_3x8c8__avx2_madd,
146+ xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
147+ xnn_pack_qs8_to_qu8_qc2w_gemm_goi_w,
148+ /* mr=*/ 3 , /* nr=*/ 8 , /* kr=*/ 8 , /* sr=*/ 1 ,
149+ /* arch_flags=*/ xnn_arch_x86_avx2);
150+ }
151+
152+ BENCHMARK_GEMM (qs8_qc2w_gemm_minmax_fp32_ukernel_3x8c8__avx2_madd)
153+
154+ static void qs8_qc2w_gemm_minmax_fp32_ukernel_4x8c8__avx2_madd(benchmark::State& state, const char * net) {
155+ GEMMBenchmark (state,
156+ xnn_qs8_qc2w_gemm_minmax_fp32_ukernel_4x8c8__avx2_madd,
157+ xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
158+ xnn_pack_qs8_to_qu8_qc2w_gemm_goi_w,
159+ /* mr=*/ 4 , /* nr=*/ 8 , /* kr=*/ 8 , /* sr=*/ 1 ,
160+ /* arch_flags=*/ xnn_arch_x86_avx2);
161+ }
162+
163+ BENCHMARK_GEMM (qs8_qc2w_gemm_minmax_fp32_ukernel_4x8c8__avx2_madd)
164+
165+ static void qs8_qc2w_gemm_minmax_fp32_ukernel_5x8c8__avx2_madd(benchmark::State& state, const char * net) {
166+ GEMMBenchmark (state,
167+ xnn_qs8_qc2w_gemm_minmax_fp32_ukernel_5x8c8__avx2_madd,
168+ xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
169+ xnn_pack_qs8_to_qu8_qc2w_gemm_goi_w,
170+ /* mr=*/ 5 , /* nr=*/ 8 , /* kr=*/ 8 , /* sr=*/ 1 ,
171+ /* arch_flags=*/ xnn_arch_x86_avx2);
172+ }
173+
174+ BENCHMARK_GEMM (qs8_qc2w_gemm_minmax_fp32_ukernel_5x8c8__avx2_madd)
175+
176+ static void qs8_qc2w_gemm_minmax_fp32_ukernel_6x8c8__avx2_madd(benchmark::State& state, const char * net) {
177+ GEMMBenchmark (state,
178+ xnn_qs8_qc2w_gemm_minmax_fp32_ukernel_6x8c8__avx2_madd,
179+ xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
180+ xnn_pack_qs8_to_qu8_qc2w_gemm_goi_w,
181+ /* mr=*/ 6 , /* nr=*/ 8 , /* kr=*/ 8 , /* sr=*/ 1 ,
182+ /* arch_flags=*/ xnn_arch_x86_avx2);
183+ }
184+
185+ BENCHMARK_GEMM (qs8_qc2w_gemm_minmax_fp32_ukernel_6x8c8__avx2_madd)
186+ #endif // XNN_ENABLE_AVX2 && (XNN_ARCH_X86 || XNN_ARCH_X86_64)
187+
188+
120189static void qs8_qc2w_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf (benchmark::State& state, const char * net) {
121190 GEMMBenchmark (state,
122191 xnn_qs8_qc2w_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
0 commit comments