[llvm-dev] Vectorization width not correct using #pragma clang loop vectorize_width (original) (raw)

hameeza ahmed via llvm-dev llvm-dev at lists.llvm.org
Thu Sep 20 14:15:55 PDT 2018


Hello, I m trying to set vector width using #pragma clang loop vectorize_width(32) but i m getting width 8 for the following kernel;

#define M 128 #define N 128

#define SQRT_FUN(x) sqrtf(x) int main(int argc, char** argv) { /* Variable declaration/allocation. / double float_n = (double)N; double data[NM]; double corr[M*M]; double mean[M]; double stddev[M]; uint32_t i,j,k;

/*Initialize array(s). */

#pragma clang loop vectorize_width(1) //no vectorize for (i = 0; i < NM; i++) { data[i] = (50.0)i; } kernel_1: #pragma clang loop vectorize_width(32) for (j = 0; j < M; j++) { mean[j] = 0.0; } for (i = 0; i < N; i++) { for (j = 0; j < M; j++) mean[j] += data[(iM) + j]; } for (j = 0; j < M; j++) { mean[j] /= float_n; } kernel_2: for (j = 0; j < M; j++) { stddev[j] = 0.0; } for (i = 0; i < N; i++) { for (j = 0; j < M; j++) { stddev[j] += (data[(iM) + j] - mean[j]) * (data[(iM)+j] - mean[j]); } } for (j = 0; j < M; j++) { stddev[j] /= float_n; } for (j = 0; j < M; j++) { stddev[j] = SQRT_FUN(stddev[j]); } kernel_3: for (i = 0; i < N; i++) { for (j = 0; j < M; j++) { data[(iM) + j] -= mean[j]; } } for (i = 0; i < N; i++) { for (j = 0; j < M; j++) { data[(i*M) + j] /= SQRT_FUN(float_n) * stddev[j]; } } kernel_4:

for (i = 0; i < M*M; i++)
{
    corr[i] = 0.0;
}
for (k = 0; k < N; k++)
{
    for (i = 0; i < M-1; i++)
    {
        for (j = i+1; j < M; j++)
        {
            corr[(i*M)+j] += (data[(k*M)+i] *

data[(kM)+j]); } } } printf("Corr[0]: %lf\n",mean[0]); printf("Corr[0]: %lf\n",mean[M-1]); printf("Corr[0]: %lf\n",stddev[0]); printf("Corr[0]: %lf\n",stddev[M-1]); printf("Corr[0]: %lf\n",corr[0]); printf("Corr[(MM)-1]: %lf\n",corr[(MM)-1]); printf("Corr[0]: %lf\n",data[0]); printf("Corr[(MM)-1]: %lf\n",data[(M*M)-1]); return 0; } i m getting following output when i compiled;

clang -O3 correlation.c -Rpass=loop-vectorize -emit-llvm -march=knl -S -o 1.llcorrelation.c:38:9: remark: vectorized loop (vectorization width: 8, interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0; j < M; j++) ^correlation.c:41:5: remark: vectorized loop (vectorization width: 8, interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0; j < M; j++) ^correlation.c:53:9: remark: vectorized loop (vectorization width: 8, interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0; j < M; j++) ^correlation.c:58:5: remark: vectorized loop (vectorization width: 8, interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0; j < M; j++) ^correlation.c:71:9: remark: vectorized loop (vectorization width: 8, interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0; j < M; j++) ^correlation.c:78:9: remark: vectorized loop (vectorization width: 8, interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0; j < M; j++) ^correlation.c:98:13: remark: vectorized loop (vectorization width: 8, interleaved count: 4) [-Rpass=loop-vectorize] for (j = i+1; j < M; j++)

why is that so?

although i m able to set width to 32 of the example code given on site.

Why Pragmas are not setting vector width correctly here in my kernel?

What is the issue?

Please help..

Thank You Regards -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20180921/a072e1c9/attachment.html>



More information about the llvm-dev mailing list