并行计算课程考核实验报告考核题目:QR分解算法的并行实现并行实现方式:OpenMP1.问题概述QR分解法是目前求一般矩阵全部特征值的最有效并广泛应用的方法,一般矩阵先经过正交相似变化成为householder矩阵,然后再应用QR方法求特征值和特征向量。它是将矩阵分解成一个正规正交矩阵Q与上三角形矩阵R,所以称为QR分解法。2.串行代码描述串行主要代码如下:#includestdio.h#includemath.h#includestdlib.h#includetime.h#definen100//maxNvoidMatrix_print(doubleA[n][n]){for(inti=0;in;i++){for(intj=0;jn;j++)printf(%8.4lf\t,A[i][j]);printf(\n);}}doubleMatrix_norm(doublea[n]){doubled=0;for(inti=0;in;i++)d+=a[i]*a[i];returnsqrt(d);}voidMatrix_multiply(doubleA[n][n],doubleB[n][n],doubleC[n][n]){for(inti=0;in;i++)for(intj=0;jn;j++){C[i][j]=0;for(intt=0;tn;t++)C[i][j]+=A[i][t]*B[t][j];}}voidMatrix_copy(doubleA[n][n],doubleB[n][n]){for(inti=0;in;i++)for(intj=0;jn;j++)A[i][j]=B[i][j];}voidhouseholder_trans(doubleA[n][n],intk,doubleQ[n][n]){doublea[n];for(inti=0;in-k;i++)a[i]=0;for(inti=n-k;in;i++)a[i]=A[i][n-k];a[n-k]-=Matrix_norm(a);doubled=Matrix_norm(a);for(inti=0;in;i++)a[i]=a[i]/d;doubleH[n][n];for(inti=0;in;i++){for(intj=0;jn;j++)H[i][j]=-2*a[i]*a[j];H[i][i]++;}//μ?H=I-2vvTdoubletemp[n][n];Matrix_multiply(H,A,temp);Matrix_copy(A,temp);Matrix_multiply(Q,H,temp);Matrix_copy(Q,temp);}voidMatrix_input(doubleA[n][n]){srand((unsigned)time(NULL));for(inti=0;in;i++)for(intj=0;jn;j++){/*printf(a[%d][%d]=,i,j);scanf(%lf,&A[i][j]);*/A[i][j]=rand();}}voidmain(){doubleQ[n][n];doubleA[n][n];Matrix_input(A);printf(A:\n);Matrix_print(A);for(inti=0;in;i++){for(intj=0;jn;j++)Q[i][j]=0;Q[i][i]=1;}clock_tstart=clock();for(inti=n;i=2;i--)householder_trans(A,i,Q);clock_tend=clock();printf(\n\nR:\n);Matrix_print(A);printf(Q:\n);Matrix_print(Q);printf(串行时间time=%.0fms,double(end-start));system(pause);}3.并行化设计思路a.将有for循环且没有数据关系、计算量大的计算进行并行优化;b.将毫不相关且可独立运行的代码块进行并行优化;c.程序主要优化是对householder变换里的乘法和矩阵赋值进行优化。4.并行代码描述#includestdio.h#includemath.h#includestdlib.h#includeomp.h#includetime.h#definen4//maxNvoidMatrix_print(doubleA[n][n]){for(inti=0;in;i++){for(intj=0;jn;j++)printf(%8.4lf\t,A[i][j]);printf(\n);}}voidMatrix_input(doubleA[n][n]){srand(unsigned(time(NULL)));for(inti=0;in;i++)for(intj=0;jn;j++){A[i][j]=rand();}/*for(inti=0;in;i++)for(intj=0;jn;j++){printf(a[%d][%d]=,i,j);scanf(%f,&a[i][j]);}*/}doubleMatrix_norm(doublea[n]){doubled=0;#pragmaompparallelfornum_threads(2)reduction(+:d)for(inti=0;in;i++)d+=a[i]*a[i];returnsqrt(d);}voidMatrix_multiply(doubleA[n][n],doubleB[n][n],doubleC[n][n]){intborder=n*n;inti,j;#pragmaompparallelfornum_threads(2)for(intindex=0;indexborder;index++){i=index/n;j=index%n;C[i][j]=0;for(intt=0;tn;t++)C[i][j]+=A[i][t]*B[t][j];}}voidMatrix_copy(doubleA[n][n],doubleB[n][n]){inti,j;intborder=n*n;#pragmaompparallelfornum_threads(2)for(intindex=0;indexborder;index++){i=index/n;j=index%n;A[i][j]=B[i][j];}}voidhouseholder_trans(doubleA[n][n],intk,doubleQ[n][n]){doublea[n];#pragmaompparallelnum_threads(2){#pragmaompforfor(inti=0;in-k;i++)a[i]=0;#pragmaompforfor(inti=n-k;in;i++)a[i]=A[i][n-k];}a[n-k]-=Matrix_norm(a);doubled=Matrix_norm(a);#pragmaompparallelfornum_threads(2)for(inti=0;in;i++)a[i]=a[i]/d;doubleH[n][n];intborder=n*n;inti,j;#pragmaompparallelfornum_threads(2)for(intindex=0;indexborder;index++){i=index/n;j=index%n;H[i][j]=-2*a[i]*a[j];if(j==n-1){H[i][i]++;}}doubletemp[n][n];#pragmaompparallelsectionsprivate(temp)num_threads(2){#pragmaompsection{Matrix_multiply(H,A,temp);Matrix_copy(A,temp);}#pragmaompsection{Matrix_multiply(Q,H,temp);Matrix_copy(Q,temp);}}}intmain(){doubleQ[n][n];doubleA[n][n];Matrix_input(A);//printf(A:\n);//Matrix_print(A);intborder=n*n;inti,j;#pragmaompparallelfornum_threads(2)for(intindex=0;indexborder;index++){i=index/n;j=index%n;Q[i][j]=0;if(j==n-1){Q[i][i]=1;}}clock_tstart=clock();for(inti=n;i=2;i--)householder_trans(A,i,Q);clock_tend=clock();doubletime=(double)end-start;printf(串行结果\nR:\n);Matrix_print(A);printf(Q:\n);Matrix_print(Q);printf(\n并行时间:time=%.0fms,time);system(pause);return0;}5.实验结果分析1)测试平台描述系统:Windows7旗舰版编译器:VisualStudio2012内存:4GCPU:AMDA6-4400MAPUwithRadeon(tm)HD双核2)测试结果描述(串行运行时间、并行运行时间)数据量串行时间(ms)并行时间(ms)加速比805625151.091100131011861.105120254321531.181140499239781.255160932972851.2813)加速比分析加速比=1/5(1.091+1.105+1.181+1.255+1.281)=1.1826