基于pawlak属性重要度的属性约简算法源码代码本科学位论文.doc
大概一年前写代码实现了这个算法,今天有人问我要,在这里贴出来好了。具体算法思路我也忘了,但是代码应该还算清晰。只需要事先了解下STL。view plaincopy to clipboardprint?/* 测试数据 21/10 X1 X2 X3 X4 X5 X6 X7 X8 X9 y U1 c 6 y E m h h a m m U2 c 6 n E m m h ma m m U3 c 6 n E m h h ma m m U4 c 4 y E m h h ma l h U5 c 6 n E m m m ma m m U6 c 6 n B m m m a he lo U7 c 6 n E m m h ma he lo U8 s 4 n B sm h lo ma l h U9 c 4 n B sm h lo ma m m U10 c 4 n B sm h m a m m U11 s 4 n E sm h lo ma l h U12 s 4 n E m m m ma m h U13 c 4 n B m m m ma m m U14 s 4 y E sm h h ma m h U15 s 4 n B sm m lo ma m h U16 c 4 y E m m h ma m m U17 c 6 n E m m h a m m U18 c 4 n E m m h a m m U19 s 4 n E sm h m ma m h U20 c 4 n E sm h m ma m h U21 c 4 n B sm h m ma m m */ #include <iostream> #include <fstream> #include <vector> #include <set> #include <string> #include <iomanip> using namespace std; #define DATA_FILE_NAME "./data.txt" #define DEF_ATTR_CNT 10 #define DEF_OBJ_CNT 21 int g_attr_cnt = 0; int g_obj_cnt = 0; class object; vector<string> g_attr_set; vector<bool> g_mask; vector<object *> g_obj_set; vector<string> g_core_set; vector<string> g_b_set; vector<vector<object *> * > g_ind_c_set; vector<vector<object *> * > g_ind_b_set; void clr_mask(const string &attr) int i = 0; for (i = 0; i < g_attr_cnt; i+) if (attr = g_attr_seti) g_maski = false; else g_maski = true; void set_mask() int i = 0; for (i = 0; i < g_attr_cnt; i+) g_maski = true; void set_mask_set(vector<string> &str_set) int i = 0, j = 0; bool found = false; for (i = 0; i < g_attr_cnt; i+) found = false; for (j = 0; j < str_set.size(); j+) if (g_attr_seti = str_setj) found = true; break; if (found) g_maski = true; else g_maski = false; class object public: vector<string> attr_val; string obj_name; public: object(const string &name) : obj_name(name) attr_val.reserve(DEF_ATTR_CNT); object() bool operator = (const object &r_obj) const bool res = true; int i = 0; for (i = 0; i < g_attr_cnt; i+) if (g_maski) res &= (attr_vali = r_obj.attr_vali); if (false = res) break; return res; ; void read_data() string strTmp; ifstream data_in(DATA_FILE_NAME,ifstream:in); int i = 0, j = 0; object *p_obj = NULL; g_attr_set.reserve(DEF_ATTR_CNT); g_mask.reserve(DEF_ATTR_CNT); g_obj_set.reserve(DEF_OBJ_CNT); / read object count, and attr count data_in >> strTmp; g_obj_cnt = atoi(strTmp.substr(0, strTmp.find("/").c_str(); g_attr_cnt = atoi(strTmp.substr(strTmp.find("/") + 1, strTmp.length().c_str(); for (i = 0; i < g_attr_cnt; i+) data_in >> strTmp; g_attr_set.push_back(strTmp); g_mask.push_back(true); for (i = 0; i < g_obj_cnt; i+) data_in >> strTmp; p_obj = new object(strTmp); for (j = 0; j < g_attr_cnt; j+) data_in >> strTmp; p_obj->attr_val.push_back(strTmp); / add object g_obj_set.push_back(p_obj); / close file data_in.close(); void print_data() int i = 0, j = 0; cout << g_obj_cnt << "/" << g_attr_cnt << " " for (i = 0; i < g_attr_cnt; i+) cout << setw(4) << setiosflags(ios:left) << g_attr_seti; if (i < g_attr_cnt - 1) cout << " " cout << endl; cout << "-" << endl; for (i = 0; i < g_obj_cnt; i+) cout << setw(3) << g_obj_seti->obj_name << " " for (j = 0; j < g_attr_cnt; j+) cout << setw(3) << setiosflags(ios:left) << g_obj_seti->attr_valj; if (j < g_attr_cnt - 1) cout << setw(3) << " " cout << endl; void clear_data() int i = 0; for (i = 0; i < g_obj_cnt; i+) delete g_obj_seti; g_obj_seti = NULL; void clear_ind_set(vector<vector<object *> * > &ind_set) vector<object *> *tmp = NULL; while (!ind_set.empty() tmp = ind_set.back(); delete tmp; tmp = NULL; ind_set.pop_back(); void app_exit() clear_ind_set(g_ind_c_set); clear_ind_set(g_ind_b_set); void add_obj_to_ind_set(vector<vector<object *> * > &ind_set, object *p_obj) vector<object *> *vec_tmp = NULL; int i = 0; bool found = false; for (i = 0; i < ind_set.size(); i+) if (*p_obj) = *(*ind_seti)0) found = true; ind_seti->push_back(p_obj); break; if (!found) vec_tmp = new vector<object *> vec_tmp->push_back(p_obj); ind_set.push_back(vec_tmp); bool obj_set_equal(vector<object *> &l_obj_set, vector<object *> &r_obj_set) if (l_obj_set.size() != r_obj_set.size() return false; bool found = false; int i = 0, j = 0; for (i = 0; i < l_obj_set.size(); i+) found = false; for (j = 0; j < r_obj_set.size(); j+) if (l_obj_seti = r_obj_setj) found = true; break; if (!found) return false; return true; bool ind_set_equal(vector<vector<object *> * > &l_ind_set, vector<vector<object *> * > &r_ind_set) if (l_ind_set.size() != r_ind_set.size() return false; bool found = false; int i = 0, j = 0; for (i = 0; i < l_ind_set.size(); i+) found = false; for (j = 0; j < r_ind_set.size(); j+) if (obj_set_equal(*l_ind_seti, *r_ind_setj) found = true; break; if (!found) return false; return true; void core() / get IND(C) int i = 0; vector<vector<object *> * > ind_set_tmp; set_mask(); for (i = 0; i < g_obj_cnt; i+) add_obj_to_ind_set(g_ind_c_set, g_obj_seti); int j = 0; for (i = 0; i < g_attr_cnt; i+) clear_ind_set(ind_set_tmp); clr_mask(g_attr_seti); / get IND(C-xi) for (j = 0; j < g_obj_cnt; j+) add_obj_to_ind_set(ind_set_tmp, g_obj_setj); if (!ind_set_equal(g_ind_c_set, ind_set_tmp) g_core_set.push_back(g_attr_seti); clear_ind_set(ind_set_tmp); / already get core cout << "-" << endl; cout << "core:n" for (i = 0; i < g_core_set.size(); i+) /copy to B g_b_set.push_back(g_core_seti); cout << g_core_seti; if (i < g_core_set.size() - 1) cout << ", " cout << "n" bool attr_set_equal(vector<string> &l_attr_set, vector<string> &r_attr_set) if (l_attr_set.size() != r_attr_set.size() return false; int i = 0, j = 0; bool found = false; for (i = 0; i < l_attr_set.size(); i+) found = false; for (j = 0; j < r_attr_set.size(); j+) if (l_attr_seti = r_attr_setj) found = true; break; if (!found) return false; return true; void attr_set_sub(vector<string> &l_attr_set, vector<string> &r_attr_set, vector<string> &res_set) int i = 0, j = 0; bool found = false; for (i = 0; i < l_attr_set.size(); i+) found = false; for (j = 0; j < r_attr_set.size(); j+) if (l_attr_seti = r_attr_setj) found = true; break; if (!found) res_set.push_back(l_attr_seti); void get_ind_set(vector<string> &mask_set, vector<vector<object *> * > &res_ind_set) int i = 0; / get IND set set_mask_set(mask_set); for (i = 0; i < g_obj_cnt; i+) add_obj_to_ind_set(res_ind_set, g_obj_seti); void attr_set_copy(vector<string> &dst_attr_set, vector<string> &src_attr_set) int i = 0; dst_attr_set.clear(); for (i = 0; i < src_attr_set.size(); i+) dst_attr_set.push_back(src_attr_seti); void reduction() vector<string> b_set_tmp; vector<string> c_sub_b; vector<int> max_sig; vector<vector<object *> * > ind_set_tmp; int max = 0, max_idx = 0, max_cnt = 0; int i = 0, j = 0; / get IND(B) get_ind_set(g_b_set, g_ind_b_set); while (!ind_set_equal(g_ind_c_set, g_ind_b_set) if (attr_set_equal(g_b_set, g_attr_set) break; c_sub_b.clear(); attr_set_sub(g_attr_set, g_b_set, c_sub_b); max_sig.clear(); for (i = 0; i < c_sub_b.size(); i+) attr_set_copy(b_set_tmp, g_b_set); b_set_tmp.push_back(c_sub_bi); clear_ind_set(ind_set_tmp); get_ind_set(b_set_tmp, ind_set_tmp); max_sig.push_back(ind_set_tmp.size() - g_ind_b_set.size(); / now, process max_sig max = max_sig0; max_idx = 0; max_cnt = 0; for (j = 1; j < max_sig.size(); j+) if (max_sigj > max) max = max_sigj; max_idx = j; max_cnt = 1; else if (max = max_sigj) max_cnt+; g_b_set.push_back(c_sub_bmax_idx); if (attr_set_equal(g_b_set, g_attr_set) break; / get IND(B) clear_ind_set(g_ind_b_set); get_ind_set(g_b_set, g_ind_b_set); clear_ind_set(ind_set_tmp); / get reduction now cout << "-" << endl; cout << "reduction set:n" for (i = 0; i < g_b_set.size(); i+) cout << g_b_seti; if (i < g_b_set.size() - 1) cout << ", " cout << "n" void print_red_data() int i = 0, j = 0; set_mask_set(g_b_set); cout << "-" << endl; cout << " " for (i = 0; i < g_attr_cnt; i+) if (g_maski) cout << setw(4) << setiosflags(ios:left) << g_attr_seti; if (i < g_attr_cnt - 1) cout << " " cout << endl; cout << "-" << endl; for (i = 0; i < g_obj_cnt; i+) cout << setw(3) << g_obj_seti->obj_name << " " for (j = 0; j < g_attr_cnt; j+) if (g_maskj) cout << setw(3) << setiosflags(ios:left) << g_obj_seti->attr_valj; if (j < g_attr_cnt - 1) cout << setw(3) << " " cout << endl; int main() read_data(); print_data(); core(); reduction(); print_red_data(); clear_data(); app_exit(); /* 测试数据21/10 X1 X2 X3 X4 X5 X6 X7 X8 X9 yU1 c 6 y E m h h a m mU2 c 6 n E m m h ma m mU3 c 6 n E m h h ma m mU4 c 4 y E m h h ma l hU5 c 6 n E m m m ma m mU6 c 6 n B m m m a he loU7 c 6 n E m m h ma he loU8 s 4 n B sm h lo ma l hU9 c 4 n B sm h lo ma m mU10 c 4 n B sm h m a m mU11 s 4 n E sm h lo ma l hU12 s 4 n E m m m ma m hU13 c 4 n B m m m ma m mU14 s 4 y E sm h h ma m hU15 s 4 n B