常用blas函数

整理文档很辛苦,赏杯茶钱您下走!

免费阅读已结束,点击下载阅读编辑剩下 ...

阅读已结束,您可以下载文档离线阅读编辑

资源描述

Y=alpha*X+beta*Ytemplatevoidcaffe_cpu_axpbyfloat(constintN,constfloatalpha,constfloat*X,constfloatbeta,float*Y){cblas_saxpby(N,alpha,X,1,beta,Y,1);}templatevoidcaffe_cpu_axpbydouble(constintN,constdoublealpha,constdouble*X,constdoublebeta,double*Y){cblas_daxpby(N,alpha,X,1,beta,Y,1);}cblas_dscal(N,beta,Y,incY);Y=Y*betacblas_daxpy(N,alpha,X,incX,Y,incY);Y=(alpha*X)+Y)Y=alpha*X+Ytemplatevoidcaffe_axpyfloat(constintN,constfloatalpha,constfloat*X,float*Y){cblas_saxpy(N,alpha,X,1,Y,1);}templatevoidcaffe_axpydouble(constintN,constdoublealpha,constdouble*X,double*Y){cblas_daxpy(N,alpha,X,1,Y,1);}DEFINE_VSL_BINARY_FUNC(Add,y[i]=a[i]+b[i]);DEFINE_VSL_BINARY_FUNC(Sub,y[i]=a[i]-b[i]);DEFINE_VSL_BINARY_FUNC(Mul,y[i]=a[i]*b[i]);DEFINE_VSL_BINARY_FUNC(Div,y[i]=a[i]/b[i]);templatevoidcaffe_addfloat(constintn,constfloat*a,constfloat*b,float*y){vsAdd(n,a,b,y);}templatevoidcaffe_adddouble(constintn,constdouble*a,constdouble*b,double*y){vdAdd(n,a,b,y);}y=x;templatevoidcaffe_copyfloat(constintN,constfloat*X,float*Y){cblas_scopy(N,X,1,Y,1);}templatevoidcaffe_copydouble(constintN,constdouble*X,double*Y){cblas_dcopy(N,X,1,Y,1);}templatevoidcaffe_gpu_copyfloat(constintN,constfloat*X,float*Y){CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(),N,X,1,Y,1));}templatevoidcaffe_gpu_copydouble(constintN,constdouble*X,double*Y){CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(),N,X,1,Y,1));}Computesalpha*x*y'+A.cblas_sgerMultipliesvectorXbythetransformofvectorY,thenaddsmatrixA(singleprecison).MultipliesvectorXbythetransformofvectorY,thenaddsmatrixA(singleprecison).voidcblas_sger(constenumCBLAS_ORDEROrder,constintM,constintN,constfloatalpha,constfloat*X,constintincX,constfloat*Y,constintincY,float*A,constintlda);Y(vetor)←αAX+βYThisfunctionmultipliesA*X(aftertransposingA,ifneeded)andmultipliestheresultingmatrixbyalpha.ItthenmultipliesvectorYbybeta.ItstoresthesumofthesetwoproductsinvectorY.templatevoidcaffe_cpu_gemvfloat(constCBLAS_TRANSPOSETransA,constintM,constintN,constfloatalpha,constfloat*A,constfloat*x,constfloatbeta,float*y){cblas_sgemv(CblasRowMajor,TransA,M,N,alpha,A,N,x,1,beta,y,1);}C(matrix)←αAB+βCtemplatetypenameTvoidgpu_multmat(T*A,T*B,T*C,intM,intK,intN){constTalpha=1,beta=0;caffe_gpu_gemm(CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C);}templatevoidcaffe_cpu_gemmfloat(constCBLAS_TRANSPOSETransA,constCBLAS_TRANSPOSETransB,constintM,constintN,constintK,constfloatalpha,constfloat*A,constfloat*B,constfloatbeta,float*C){intlda=(TransA==CblasNoTrans)?K:M;intldb=(TransB==CblasNoTrans)?N:K;cblas_sgemm(CblasRowMajor,TransA,TransB,M,N,K,alpha,A,lda,B,ldb,beta,C,N);}A=M*NB=M*KC=A'*BNMKtemplatetypenameTvoidcpu_multTmat(T*A,T*B,T*C,intM,intK,intN){constTalpha=1,beta=0;caffe_cpu_gemm(CblasTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C);//cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,M,B,K,beta,C,M);}A=M*NB=N*KC=A*BMNKtemplatetypenameTvoidcpu_multmat(T*A,T*B,T*C,intM,intK,intN){constTalpha=1,beta=0;caffe_cpu_gemm(CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C);//cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,M,B,K,beta,C,M);}

1 / 4
下载文档,编辑使用

©2015-2020 m.777doc.com 三七文档.

备案号:鲁ICP备2024069028号-1 客服联系 QQ:2149211541

×
保存成功