00001
00002
00003 #ifndef DISET_H
00004 #define DISET_H
00005
00006
00007 #include <string>
00008 #include <map>
00009
00010
00011 #include <utility>
00012 #include <fstream>
00013 #include <sstream>
00014
00015
00016 #include <set>
00017 #include <vector>
00018 #include <iostream>
00019 #include <cstdlib>
00020
00021
00022
00031
00032
00033
00034
00035 using namespace std;
00036
00037
00038 template<class T> struct node
00039 {
00040 node() : rank(0), key() { parent=this; }
00041 node(const T &k) : rank(0), key(k) { parent=this; }
00042 node(const T &k, node* p) : parent(p), rank(0), key(k) { }
00043
00044 node(const node &n) { key = n.key; parent = this; rank = n.rank; }
00045
00046 ~node() { }
00047 bool operator==(const node& n) const { return key==n.key; }
00048 bool operator<(const node &n) const { return key < n.key; }
00049
00050 node& operator=(const node &n) { if (&n != this) { key = n.key; parent = this; rank = n.rank; } return *this; }
00051
00052 T key;
00053 node<T>* parent;
00054 int rank;
00055 };
00056
00057
00059
00060 template<class T> node<T>* getRoot(node<T> *x) {
00061 if (x != x->parent) x->parent = getRoot(x->parent);
00062 return x->parent;
00063 }
00064 template<class T> void mergeRoot(node<T> *&r1, node<T> *&r2) {
00065 if(r1 == r2) return;
00066 if (r1->rank > r2->rank) r2->parent = r1;
00067 else {
00068 r1->parent = r2;
00069 if (r1->rank == r2->rank) r2->rank++;
00070 }
00071 }
00072 template<class T> void join(node<T> &x, node<T> &y) {
00073 node<T> *r1 = getRoot(&x);
00074 node<T> *r2 = getRoot(&y);
00075 mergeRoot(r1, r2);
00076
00077 }
00078 template<class T> void join(node<T>* x, node<T>* y) {
00079 node<T> *r1 = getRoot(x);
00080 node<T> *r2 = getRoot(y);
00081 mergeRoot(r1, r2);
00082
00083 }
00084
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00107
00115 template<class T> class hatrees
00116 {
00117 public:
00118 hatrees() {};
00119 ~hatrees();
00120 void clear() { nodes.clear(); result.clear(); }
00122 void readFromMap(const std::multimap<T, T> &m);
00123 void readFromFile(const string &file);
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136 void showStore(ostream &os) const;
00137
00138
00139 int getNodeCount() const { return nodes.size(); }
00142 void showCluster(ostream &os, bool reverse= true);
00147 void showClusterIntId(ostream &ous, int &id);
00148
00150 int showClusterByLine(ostream &os);
00151 set<T> keyset() const;
00152 vector<T> keyarray() const;
00153
00156 void clusterArray(vector<set<T> > &vecset);
00157
00163 std::multimap<T,T>& getCluster() { if (result.empty()) transform(); return result; }
00164
00165 #ifdef USE_HASH_MAP
00166
00167
00168 typedef typename hash_map<T, node<T>* >::iterator niterator;
00169 typedef typename hash_map<T, node<T>* >::const_iterator const_niterator;
00170 #else
00171 typedef typename map<T, node<T>* >::iterator niterator;
00172 typedef typename map<T, node<T>* >::const_iterator const_niterator;
00173
00174
00175 #endif
00176
00177 private:
00178 #ifdef USE_HASH_MAP
00179 hash_map<T, node<T>* > nodes;
00180 #else
00181 map<T, node<T>* > nodes;
00182 #endif
00183
00186
00187 std::multimap<T, T> result;
00188
00189
00190
00191 void transform();
00192 };
00193
00195 template<class T> hatrees<T>::~hatrees() {
00196 niterator MI = nodes.begin();
00197 while (MI != nodes.end()) {
00198 delete MI->second;
00199 MI++;
00200 }
00201 }
00202
00203 template<class T> void hatrees<T>::readFromMap(const std::multimap<T, T> &m) {
00204 pair<niterator, bool> p1, p2;
00205 T f1, f2;
00206 typename std::multimap<T, T>::const_iterator imi = m.begin();
00207
00208 while (imi != m.end()) {
00209 f1 = imi->first;
00210 f2 = imi->second;
00211 p1 = nodes.insert(pair<T, node<T>* >(f1, new node<T>(f1)));
00212 p2 = nodes.insert(pair<T, node<T>* >(f2, new node<T>(f2)));
00213 join(p1.first->second, p2.first->second);
00214 imi++;
00215 }
00216 }
00217
00218 template<class T> void hatrees<T>::readFromFile(const string &file) {
00219 ifstream IN(file.c_str());
00220 if (IN.fail()) {
00221 cerr << "reading from " << file << "failed\n";
00222 exit(1);
00223 }
00224 string ln;
00225 pair<niterator, bool> p1, p2;
00226 T f1, f2;
00227
00228
00229
00230 int id=0;
00231
00232 getline(IN, ln);
00233 while (!IN.eof()) {
00234 istringstream ist(ln);
00235 ist >> f1 >> f2;
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246 p1 = nodes.insert(pair<T, node<T>* >(f1, new node<T>(f1)));
00247 p2 = nodes.insert(pair<T, node<T>* >(f2, new node<T>(f2)));
00248 join(p1.first->second, p2.first->second);
00249 getline(IN, ln);
00250 }
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273 }
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342 template<class T> void hatrees<T>::showStore(ostream &os) const {
00343 const_niterator it = nodes.begin();
00344 node<T>* root;
00345 os << "key\tcluster_representative\n";
00346 while (it != nodes.end()) {
00347 root = getRoot(it->second->parent);
00348 os << it->first << "\t" << root->key << endl;
00349 it++;
00350 }
00351 os << "Total number of unique keys is " << nodes.size() << endl;
00352 cerr << "Total number of unique keys is " << nodes.size() << endl;
00353 }
00354
00355
00356 template<class T> set<T> hatrees<T>::keyset() const {
00357 set<T> tmpset;
00358 const_niterator it = nodes.begin();
00359 while (it != nodes.end()) {
00360 tmpset.insert(it->first);
00361 it++;
00362 }
00363 return tmpset;
00364 }
00365
00366 template<class T> vector<T> hatrees<T>::keyarray() const {
00367 vector<T> tmp;
00368 const_niterator it = nodes.begin();
00369 while (it != nodes.end()) {
00370
00371 tmp.push_back(it->first);
00372 it++;
00373 }
00374 return tmp;
00375 }
00376
00377
00378 template<class T> void hatrees<T>::clusterArray(vector< set<T> > &vecset) {
00379 if (result.empty()) transform();
00380 vecset.clear();
00381
00382 typename std::multimap<T, T>::const_iterator mi = result.begin();
00383 while (mi != result.end()) {
00384 set<T> tmpset;
00385 tmpset.insert(mi->second);
00386
00387 T cluster_id = mi->first;
00388 mi++;
00389 while (mi != result.end() && mi->first == cluster_id) {
00390
00391 tmpset.insert(mi->second);
00392 mi++;
00393 }
00394 vecset.push_back(tmpset);
00395
00396 }
00397 }
00398
00399
00400
00401 template<class T> void hatrees<T>::showClusterIntId(ostream &ous, int &id) {
00402 if (result.empty()) transform();
00403 typename std::multimap<T, T>::const_iterator mi = result.begin();
00404 while (mi != result.end()) {
00405 set<T> tmpset;
00406 ous << mi->second << '\t' << id << endl;
00407
00408 T cluster_id = mi->first;
00409 mi++;
00410 while (mi != result.end() && mi->first == cluster_id) {
00411
00412 ous << mi->second << '\t' << id << endl;
00413 mi++;
00414 }
00415
00416
00417 ++id;
00418 }
00419 }
00420
00421
00422
00423
00424 template<class T> void hatrees<T>::showCluster(ostream &os, bool reverse) {
00425 if (result.empty()) transform();
00426
00427 typename std::multimap<T, T>::const_iterator mi = result.begin();
00428 if (reverse) {
00429 while (mi != result.end()) {
00430 os << mi->second << '\t' << mi->first << endl;
00431 mi++;
00432 }
00433 }
00434 else {
00435 while (mi != result.end()) {
00436 os << mi->first << '\t' << mi->second << endl;
00437 mi++;
00438 }
00439 }
00440
00441
00442 cerr << "Total members: " << result.size() << endl;
00443 }
00444
00445 template<class T> void hatrees<T>::transform() {
00446 niterator it = nodes.begin();
00447 node<T>* root;
00448 while (it != nodes.end()) {
00449 root = getRoot(it->second->parent);
00450 result.insert(pair<T, T>(root->key, it->first));
00451 it++;
00452 }
00453 }
00454
00457 template<class T> int hatrees<T>::showClusterByLine(ostream &os) {
00458 if (result.empty()) transform();
00459 os << "Output format: one cluster per line\n\n";
00460 int clusterCnt=0;
00461 typename std::multimap<T, T>::const_iterator mi = result.begin();
00462 while (mi != result.end()) {
00463 clusterCnt++;
00464 os << mi->second;
00465 T cluster_id = mi->first;
00466 mi++;
00467 while (mi != result.end() && mi->first == cluster_id) {
00468 os << '\t' << mi->second;
00469 mi++;
00470 }
00471 os << "\n\n";
00472 }
00473 os << "Total " << clusterCnt << " clusters\n";
00474 return clusterCnt;
00475 }
00476
00477 #endif