朴树贝叶斯算法及截图(共12页).docx
精选优质文档-倾情为你奉上/ naiveBayesClassify.cpp : 定义控制台应用程序的入口点。/#include "stdafx.h"#include<iostream>#include<iomanip>#include<string>#include<map>#include<vector>using namespace std; /save the training datatypedef vector<vector<string>> strDVect;/save all possible condition probabilitystypedef map<string,map<string,map<string,double>>> thriMap;/service for thriMaptypedef map<string,map<string,double>> doubleMap;typedef map<string,double> strMap;/save the kind of aim attribute valuetypedef map<string,int> aimMap;/kind of every attribute including aim attribute valuetypedef map<string,int> attMap;/const for denominator and numeratordouble K = 1;/sampletypedef vector<string> sampleVect;/check the three dimensions map and return 4 statesint existRecord(thriMap prob,string aimValue,string attName,string attValue) thriMap:const_iterator iterThri = prob.find(aimValue); if(iterThri = prob.end() return 1; doubleMap:const_iterator iterDouble = iterThri->second.find(attName); if(iterDouble = iterThri->second.end() return 2; strMap:const_iterator iterSingle = iterDouble->second.find(attValue); if(iterSingle = iterDouble->second.end() return 3; return 4; void show(thriMap prob) thriMap:iterator iterThri = prob.begin(); for(;iterThri != prob.end(); iterThri+) doubleMap:iterator iterDouble = iterThri->second.begin(); for(;iterDouble != iterThri->second.end(); iterDouble+) strMap:iterator iterSingle = iterDouble->second.begin(); for(;iterSingle != iterDouble->second.end(); iterSingle+) cout << iterThri->first << " " << iterDouble->first << " " << iterSingle->first << " " << iterSingle->second << endl; /get all possible statistics/three dimensions hash prob, first key notes aim attribute value,second key notes refence attribute name,/third key notes reference attribute value, the value notes the refence attribute value numbervoid getConditionStat(strDVect datas,thriMap &prob,aimMap &aimNum) int i,j,k,m,n; m = datas.size(); n = datas0.size(); doubleMap doubleMapIns; strMap strMapIns; thriMap:iterator iterThri; doubleMap:iterator iterDouble; for(i=1; i < m; i+) /save all possible statistics for(j = 0; j < n-1; j+) switch(existRecord(prob,datasin-1,datas0j,datasij) case 1: /must clear strMapIns.clear(); doubleMapIns.clear(); strMapIns.insert(make_pair(datasij,1); doubleMapIns.insert(make_pair(datas0j,strMapIns); prob.insert(make_pair(datasin-1,doubleMapIns); break; case 2: strMapIns.clear(); strMapIns.insert(make_pair(datasij,1); prob.find(datasin-1)->second.insert(make_pair(datas0j,strMapIns); break; case 3: prob.find(datasin-1)->second.find(datas0j)->second.insert(make_pair(datasij,1); break; case 4: prob.find(datasin-1)->second.find(datas0j)->second.find(datasij)->second+; break; /save the kind of aim attribute value if(aimNum.find(datasin-1) != aimNum.end() aimNum.find(datasin-1)->second+; else aimNum.insert(make_pair(datasin-1,1); void getConditionProb(thriMap &prob,aimMap aimNum,attMap attKind) /save all possible condition probabilitys thriMap:iterator iterThri = prob.begin(); for(;iterThri != prob.end(); iterThri+) doubleMap:iterator iterDouble = iterThri->second.begin(); for(;iterDouble != iterThri->second.end(); iterDouble+) strMap:iterator iterSingle = iterDouble->second.begin(); for(;iterSingle != iterDouble->second.end(); iterSingle+) /add const K,L to denominator and numerator /the part of "attKind.find(iterDouble->first)->second*K" add the weight of kind of refence attribute。 iterSingle->second = (iterSingle->second + K)/ (attKind.find(iterDouble->first)->second*K + aimNum.find(iterThri->first)->second); void getClassification(strDVect datas,thriMap prob,aimMap aimNum,attMap attKind,sampleVect sampleIns,int records) /save all the probability double sum = 0; double max = 0; double pp; string classKind; /获得极大后验假设 double h_map; aimMap:const_iterator iterAim = aimNum.begin(); for(;iterAim != aimNum.end(); iterAim+) /get prior probability /the part of "(-attKind.end()->second*K" add the weight of the kind of aim attribute。 pp =(double)(iterAim->second + K)/(-attKind.end()->second*K+ records); /获得极大使然假设 double h_ml = 1; for(int i=0; i < sampleIns.size(); i+) if(existRecord(prob,iterAim->first,datas0i,sampleIns.at(i) != 4) cout << "there is no value of attrubute " << datas0i << endl; h_ml *= K; else h_ml *= prob.find(iterAim->first)->second.find(datas0i)->second.find(sampleIns.at(i)->second; h_map = pp*h_ml; if(h_map > max) max = h_map; classKind = iterAim->first; sum += h_map; /normalize max = max / sum; cout << "the the most class is " << classKind << ", and the probability is " << max << endl; int _tmain(int argc, _TCHAR* argv) int i,j,m,n; /m:number of training data,n: number of attributes m = 15; n = 5; strDVect datas(m); for(i = 0; i < m; i+) datasi.resize(n); /first row save attributes datas00 = "age" datas01 = "inco" datas02 = "student" datas03 = "credit_rating" datas04 = "class:buys_computer" datas10 = "<=30" datas11 = "high" datas12 = "no" datas13 = "fair" datas14 = "no" datas20 = "<=30" datas21 = "high" datas22 = "no" datas23 = "excellent" datas24 = "no" datas30 = "31.40" datas31 = "high" datas32 = "no" datas33 = "fair" datas34 = "yes" datas40 = ">40" datas41 = "medi" datas42 = "no" datas43 = "fair" datas44 = "yes" datas50 = ">40" datas51 = "low" datas52 = "yes" datas53 = "fair" datas54 = "yes" datas60 = ">40" datas61 = "low" datas62 = "yes" datas63 = "excellent" datas64 = "no" datas70 = "31.40" datas71 = "low" datas72 = "yes" datas73 = "excellent" datas74 = "yes" datas80 = "<=30" datas81 = "medi" datas82 = "no" datas83 = "fair" datas84 = "no" datas90 = "<=30" datas91 = "low" datas92 = "yes" datas93 = "fair" datas94 = "yes" datas100 = ">40" datas101 = "medi" datas102 = "yes" datas103 = "fair" datas104 = "yes" datas110 = "<=30" datas111 = "medi" datas112 = "yes" datas113 = "excellent" datas114 = "yes" datas120 = "31.40" datas121 = "medi" datas122 = "no" datas123 = "excellent" datas124 = "yes" datas130 = "31.40" datas131 = "high" datas132 = "yes" datas133 = "fair" datas134 = "yes" datas140 = ">40" datas141 = "medi" datas142 = "no" datas143 = "excellent" datas144 = "no" for(i =0; i < m; i+) for(j = 0; j < n; j+) cout.width(15); cout << setiosflags(ios:left) << datasij; cout << endl; thriMap prob; aimMap aimNum; attMap attKind; attKind.insert(make_pair("age",3); attKind.insert(make_pair("inco",3); attKind.insert(make_pair("student",2); attKind.insert(make_pair("credit_rating",2); attKind.insert(make_pair("class:buys_computer",2); getConditionStat(datas,prob,aimNum); /init K K = (double)1/(m-1); getConditionProb(prob,aimNum,attKind); show(prob); sampleVect sampleIns; sampleIns.push_back("31.40"); sampleIns.push_back("high"); sampleIns.push_back("no"); sampleIns.push_back("fair"); getClassification(datas,prob,aimNum,attKind,sampleIns,m-1); return 0; 专心-专注-专业