昆明理工大学信息工程与自动化学院学生实验报告(—学年第1学期)课程名称:人工智能开课实验室:信自楼504年12月24日年级、专业、班学号姓名成绩实验项目名称天气决策树指导教师吴霖教师评语该同学是否了解实验原理:A.了解□B.基本了解□C.不了解□该同学的实验能力:A.强□B.中等□C.差□该同学的实验是否达到要求:A.达到□B.基本达到□C.未达到□实验报告是否规范:A.规范□B.基本规范□C.不规范□实验过程是否详细记录:A.详细□B.一般□C.没有□教师签名:年月日一、上机目的及内容1.上机内容根据下列给定的14个数据,运用InformationGain构造一个天气决策树。例子编号属性分类天况温度湿度风况1晴热大无N2晴热大有N3多云热大无P4雨中大无P5雨冷正常无P6雨冷正常有N7多云冷正常有P8晴中大无N9晴冷正常无P10雨中正常无P11晴中正常有P12多云中大有P13多云热正常无P14雨中大有N2.上机目的(1)学习用InformationGain构造决策树的方法;(2)在给定的例子上,构造出正确的决策树;(3)理解并掌握构造决策树的技术要点。二、实验原理及基本技术路线图(方框原理图或程序流程图)(1)设计并实现程序,构造出正确的决策树;(2)对所设计的算法采用大O符号进行时间复杂性和空间复杂性分析;实验考虑到几个属性:天况——晴、雨、多云;温度——热、中、冷;湿度——大、正常;风况——有、无;然后根据每个属性来算出信息增益,接下来我们根据信息增益最大的来进行划分。根据问题设计算法,建立数据结构,设计需要用的类,然后通过编程实现问题求解。了解和求解最大信息增益和最小熵选择平均熵最小的属性作为根节点,用同样的方法选择其他节点直至形成整个决策树。dataset就是具体的划分过程,首先找到可用的划分项目,再第一次划分之后再相关的数据来计算熵。Main函数流程图Dataset函数主要流程图Basefun函数流程图Attributevalue函数流程图Datapiont函数流程图三、所用仪器、材料(设备名称、型号、规格等或使用软件)1台PC及VISUALC++软件四、实验方法、步骤(或:程序代码或操作过程)源代码:main函数:#includefstream#includeiostream#includelist#includesstream#includestring#includevector#include#include#includeDataPointprocessLine(std::stringconst&sLine){std::istringstreamisLine(sLine,std::istringstream::in);std::vectorAttributeValueattributes;while(()){std::stringrawfield;isLinerawfield;(AttributeValue(rawfield));}AttributeValuev=();();booltype=();returnDataPoint(attributes,type);}voidmain(){std::ifstreamifs(,std::ifstream::in);DataSetinitDataset;while(()){std::stringsLine;std::getline(ifs,sLine);(processLine(sLine));}std::listDataSetprocessQ;std::vectorDataSetfinishedDataSet;(initDataset);while(()0){std::vectorDataSetsplittedDataSets;DataSetdataset=();(splittedDataSets);();for(inti=0;i();++i){floatprob=splittedDataSets[i].getPositiveProb();if(prob==||prob=={(splittedDataSets[i]);}else{(splittedDataSets[i]);}}}std::coutThedicisiontreeis:std::endl;for(inti=0;i();++i){finishedDataSet[i].display();}}DataSet函数:#includeiostream#includemap#include#includevoidSplitAttributeValue::display(){std::cout\tSplitattributeID(m_attributeIndex)\t;std::coutSplitattributevalue(())std::endl;}voidDataSet::addDataPoint(DataPointconst&datapoint){(datapoint);}floatDataSet::getPositiveProb(){floatnPositive=0;for(inti=0;i();++i){if(m_data[i].isPositive()){nPositive++;}}returnnPositive/();}structStat{intnPos;intnNeg;intid;};voidDataSet::splitDataSet(std::vectorDataSet&splittedSets){etNAttributes();inti,j;std::vectorboolsplittingAttributeBV;(nAttributes);for(i=0;inAttributes;++i){splittingAttributeBV[i]=true;}for(i=0;i();++i){splittingAttributeBV[m_splitAttributes[i].getAttributeIndex()]=false;}std::vectorintsplittingAttributeIds;for(i=0;inAttributes;++i){if(true==splittingAttributeBV[i]){(i);}}typedefstd::mapAttributeValue,Stat,AttributeValueCmpAttributeValueStat;typedefstd::mapAttributeValue,Stat,AttributeValueCmp::iteratorAttributeValueStat_iterator;typedefstd::mapAttributeValue,Stat,AttributeValueCmp::const_iteratorAttributeValueStat_const_iterator;etAttribute(splittingAttributeIds[j]);AttributeValueStat_iteratorit=splittingStats[j].find(v);if(splittingStats[j].end()==it){Statstat;if(m_data[i].isPositive()){=1;=0;=0;}else{=0;=1;=0;}splittingStats[j].insert(std::pairAttributeValue,Stat(v,stat));}else{if(m_data[i].isPositive()){it-++;}else{it-++;}}}}egin();it!=splittingStats[j].end();++it){std::cout\tit-()\tit-\tit-std::endl;}}egin();it!=splittingStats[j].end();++it){intnSamples=it-+it-;floatp=it-;p/=nSamples;entropy+=calEntropy(p)*nSamples/n;}if(entropyminEntropy||-1==splitAttributeId){minEntropy=entropy;splitAttributeId=j;}}std::coutSplitatattribute(splittingAttributeIds[splitAttributeId])std::endlstd::endl;egin();it!=splittingStats[splitAttributeId].end();++it){it-=k++;}(k);for(i=0;ik;++i){for(j=0;j();++j){splittedSets[i].(m_splitAttributes[j]);}}for(AttributeValueStat_iteratoritt=splittingStats[splitAttributeId].begin();itt!=splittingStats[splitAttributeId].end();++itt){splittedSets[itt-].(SplitAttributeValue(itt-first,attrId));}for(i=0;i();++i){AttributeValueconst&v=m_data[i].getAttribute(attrId);AttributeValueStat_const_iteratorit=(v);if(()!=it){splittedSets[it-].addDataPoint(m_data[i]);}else{throwDataErrException();}}}voidDataSet::display(){inti;std::coutDataset(this)std::endl;for(i=0;i();++i){m_splitAttributes[i].display();}std::coutData:std::endl;for(i=0;i();++i){m_data[i].display();}std::coutstd::endl;}DataPoint函数:#includefloatlog2(floatx){return/log10(2)*log10(x);}floatcalEntropy(floatprob){floatsum=0;if(prob==0||prob==1){return0;}sum-=prob*log2(prob);sum-=(1-prob)*log2(1-prob);returnsum;}Basefun函数:#includefloatlog2(floatx){return/log10(2)*log10(x);}floatcalEntropy(floatprob){floatsum=0;if(prob==0||prob==1){return0;}sum-=prob*log2(prob);sum-=(1-prob)*log2(1-prob);returnsum;}AttributeValue函数:#include#includeAttributeValue::AttributeValue(std::stringconst&instring):m_value(instring){}boolAttributeValue::GetType(){if(m_value==P){returntrue;}elseif(m_value==N){returnfalse;}else{throwDataE