#层次聚类Data=iris[,-5];Means=sapply(data,mean);SD=sapply(data,sd);dataScale=scale(data,center=means,scale=SD);Dist=dist(dataScale,method=”euclidean”);heatmap(as.matrix(Dist),labRow=FALSE,labCol=FALSE);clusteModel=hclust(Dist,method=”ward”);result=cutree(clusteModel,k=3);table(iris[,5],result);plot(clusteModel);library(fastcluster);#kuaisucengcijuleiclusteModel=hclust(Dist,method=”ward”);library(proxy);res=dist(data,method=”cosine”);x=c(0,0,1,1,1,1);y=c(1,0,1,1,0,1);dist(rbind(x,y),method=”Jaccard”);x=c(0,0,1.2,1,0.5,1,NA);y=c(1,0,2.3,1,0.9,1,1);d=abs(x-y);Dist=sum(d[!is.na(d)])/6;#k-means聚类clusteModel=kmeans(dataScale,centers=3,nstart=10);class(clusteModel);library(proxy);library(cluster);clustModel=pam(dataScale,k=3,metric=”Mahalanobis”);clustModel$medoidstable(iris$Species,clustModel$clustering);par(mfcol=c(1,2));plot(clustModel,which.plots=2,main=””);Plot(clustModel,which.plots=1,main=””);library(devtools);install_github(“lijian13/rinds”);rinds::bestCluster(dataScale,2:6);library(fpc);pka=kmeansruns(iris[,1:4],krange=2:6,critout=TRUE,runs=2,criterion=”asw”);#基于密度的聚类x1=seq(0,pi,lenth.out=100);y1=sin(x1)+0.1*rnorm(100);x2=1.5+seq(0,pi,length.out=100);y2=cos(x2)+0.1*rnorm(100);data=data.frame(c(x1,x2),c(y1,y2));names(data)=c(“x”,”y”);model1=kmeans(data,centers=2,nstart=10);library(“fpc”);model2=dbscan(data,eps=0.3,MinPts=4);#自组织映射library(kohonen);data=as.matrix(iris[,-5]);somModel=som(data,grid=somgrid(15,10,”hexagonal”));plot(somModel,ncolors=10,type=”dist.neighbours”);irisclass=as.numeric(iris[,5]);plot(somModel,type=”mapping”,labels=irisclass,col=irisclass+3,main=”mappingplot”);#主成分分析library(FactoMineR);data(decathlon);head(decathlon,n=2);pca1=princomp(decathlon[,1:10]);plot(pca1,type=line);res.pca=PCA(decathlon,quanti.sup=11:12,quali.sup=13);#对应分析library(MASS);data(caith);biplot(corresp(caith,nf=2),xlim=c(-0.6,0.8));#多元分析的可视化library(car);data(mpg,package=ggplot2);scatterplotMatrix(mpg[,c('displ',cty,hwy)],diagonal=histogram,ellipse=TRUE);library(corrplot);data(mtcars);M=cor(mtcars);corrplot(M,order=hclust);#Logistic回归set.seed(1);b0=1;b1=2;b2=3;x1=rnorm(1000);x2=rnorm(1000);z=b0+b1*x1+b2*x2;pr=1/(1+exp(-z));y=rbinom(1000,1,pr);plotdata2=data.frame(x1,x2,y=factor(y));library(ggplot2);p2=ggplot(data=plotdata2,aes(x=x1,y=x2,color=y))+geom_point();print(p2);data=data.frame(x1,x2,y);model=glm(y~.,data=data,family=binomial);summary(model);w=model$coef;inter=-w[1]/w[3];slope=-w[2]/w[3];plotdata3=data.frame(cbind(x1,x2),y=factor(y));p3=ggplot(data=plotdata3,aes(x=x1,y=x2,color=y))+geom_point()+geom_abline(intercept=inter,slope=slope);print(p3);predict(model,newdata=list(x1=1,x2=3),type=response);#复杂网络snafile=system.file(examples,sna,lijian001.txt,package=rinds);snadf=read.table(snafile,header=FALSE,stringsAsFactors=FALSE);head(snadf)library(igraph);snaobj=graph.data.frame(snadf,directed=FALSE);class(snaobj)vcount(snaobj);ecount(snaobj);neighbors(snaobj,6,mode=all);degree(snaobj,v=6);betweenness(snaobj,v=6,directed=FALSE);closeness(snaobj,v=6);page.rank(snaobj,vids=6);similarity.dice(snaobj,vids=c(6,7));snaclass=walktrap.community(snaobj,steps=5);cl=snaclass$membership;V(snaobj)$color=rainbow(max(cl))[cl];V(snaobj)$bte=betweenness(snaobj,directed=FALSE);V(snaobj)$size=5;V(snaobj)[bte=1800]$size=15;V(snaobj)$label=NA;V(snaobj)[bte=1800]$label=V(snaobj)[bte=1800]$name;plot(snaobj,layout=layout.fruchterman.reingold,vertex.size=V(snaobj)$size,vertex.color=V(snaobj)$color,vertex.label=V(snaobj)$label,vertex.label.cex=V(snaobj)$cex,edge.color=grey(0.5),edge.arrow.mode=-);用caret包对数据清洗并进行回归树预测set.seed(1)data(PimaIndiansDiabetes2,package='mlbench')data=PimaIndiansDiabetes2library(caret)library(caret)preProcValues=preProcess(data[,-9],method=c('center','scale'))scaleddata=predict(preProcValues,data[,-9])preProcbox=preProcess(scaleddata,method=c('YeoJohnson'))boxdata=predict(preProcbox,scaleddata)preProcimp=preProcess(boxdata,method='bagImpute')procdata=predict(preProcimp,boxdata)procdata$class=data[,9]library(rpart)rpartModel=rpart(class~.,data=procdata,control=rpart.control(cp=0))cptable=as.data.frame(rpartModel$cptable)cptable$errsd=cptable$xerror+cptable$xstdcpvalue=cptable[which.min(cptable$errsd),'CP']pruneModel=prune(rpartModel,cpvalue)library(rpart.plot)rpart.plot(pruneModel)pre=predict(pruneModel,procdata,type='class')preTable=table(pre,procdata$class)accuracy=sum(diag(preTable))/sum(preTable)write.table(iris,file='C:/ProgramFiles/R/zhangfuchang.csv',sep=',')data=read.table(file='C:/ProgramFiles/R/zhangfuchang.csv',sep=',')write.table(procdata,file='C:/ProgramFiles/R/zhangfuchang.csv',sep=',')procdata=read.table(file='C:/ProgramFiles/R/zhangfuchang.csv',sep=',')回归树回归代码rpartModel=rpart(class~.,data=procdata,control=rpart.control(cp=0.01),parms=list(loss=matrix(c(0,5,1,0),2)))pre=predict(rpartModel,procdata,type='class')preTable=table(pre,procdata$class)accuracy=sum(diag(preTable))/sum(preTable)用分类回归数分类,并用10重交叉验证的R代码procdata=read.table(file='C:/ProgramFiles/R/zhangfuchang.csv',sep=',')num=sample(1:10,nrow(procdata),r