Hi all,
the code is like this:
void testfun() { float *buf1 = new float[dataLen]; float *buf2 = new float[dataLen]; for(int i=0; i<dataLen; i++) { buf1[i] = 1; buf2[i] = 1; } __m256 *pp1; __m256 *pp2; __m256 _rresult = _mm256_set1_ps(0.0f); for(int j=0;j<1000000; j++) { pp1 = (__m256 *)buf1; pp2 = (__m256 *)buf2; _rresult = _mm256_sub_ps(_mm256_set_ps(j,0,0,0,0,0,0,0), _rresult); for(int i=0; i<dataLen/8; i++) { _rresult = _mm256_add_ps(_mm256_mul_ps(*pp1, *pp2), _rresult);// There will be an error pp1++; pp2++; cout<<"i"<<i<<endl; } } }
In debug mode there will no error, but when in release mode it will be an error every time!
Wish someone can help me , Thanks!