#include <CInvertedFileChunk.h>

Public Member Functions | |
| double | getCollectionFrequency (size_t inSize) const |
| void | addElement (TID inDocumentID, double inDocumentFrequency) |
| bool | writeBinary (ostream &, TID inFeatureID, size_t inSize) const |
Definition at line 59 of file CInvertedFileChunk.h.
| double CInvertedFileChunk::getCollectionFrequency | ( | size_t | inSize | ) | const |
returns the percentage of documents which contain this one feature
Definition at line 79 of file CInvertedFileChunk.cc.
Referenced by CAcIFFileSystem::newGenerateInvertedFile().
| void CInvertedFileChunk::addElement | ( | TID | inDocumentID, | |
| double | inDocumentFrequency | |||
| ) |
add one document of to the list for one feature
Definition at line 58 of file CInvertedFileChunk.cc.
Referenced by CAcIFFileSystem::newGenerateInvertedFile().
00059 { 00060 00061 //cout << "a" << flush; 00062 00063 insert(make_pair(inDocumentID,inDocumentFrequency)); 00064 };
writes the binary part of the list for one feature
Definition at line 96 of file CInvertedFileChunk.cc.
References CDocumentFrequencyList::begin(), CDocumentFrequencyList::end(), and CDocumentFrequencyList::writeBinary().
Referenced by CAcIFFileSystem::newGenerateInvertedFile().
00098 { 00099 00100 bool lRetVal=true; 00101 00102 { 00103 TID lFeatureID(inFeatureID); 00104 lRetVal = lRetVal && outStream.write((char*)&lFeatureID, 00105 sizeof(lFeatureID)); 00106 } 00107 00108 { 00109 float lCollectionFrequency=(float(size()) 00110 /inSize); 00111 if((lCollectionFrequency >1) 00112 || 00113 (lCollectionFrequency < 0)){ 00114 cout << "The collection frequency is: " 00115 << lCollectionFrequency 00116 << endl; 00117 assert(!"collection frequency out of range"); 00118 } 00119 lRetVal = lRetVal && outStream.write((char*)&lCollectionFrequency, 00120 sizeof(lCollectionFrequency)); 00121 } 00122 00123 int lSize(size()); 00124 { 00125 // writing this twice means: there is 00126 // no space left in this list 00127 lRetVal = lRetVal && outStream.write((char*)&lSize, 00128 sizeof(lSize)); 00129 // writing this twice means: there is 00130 // no space left in this list 00131 lRetVal = lRetVal && outStream.write((char*)&lSize, 00132 sizeof(lSize)); 00133 cout << "writing chunk of size " << lSize <<endl; 00134 } 00135 00136 00137 CDocumentFrequencyList lList(lSize); 00138 00139 #ifndef _CDocumentFrequencyListIsList 00140 CDocumentFrequencyList::iterator j=lList.begin(); 00141 #endif 00142 00143 for(const_iterator i=begin(); 00144 i!=end(); 00145 i++){ 00146 #ifndef _CDocumentFrequencyListIsList 00147 *(j++)=CDocumentFrequencyElement((*i).first, 00148 (*i).second); 00149 #else 00150 lList.push_back(CDocumentFrequencyElement((*i).first, 00151 (*i).second) 00152 ); 00153 #endif 00154 } 00155 #ifdef _CDocumentFrequencyListIsArray 00156 //lList.setEnd(j);// superfluous, because lList.begin()+lSize==j anyway 00157 assert(j==lList.begin()+lSize); 00158 #endif 00159 00160 #ifndef _CDocumentFrequencyListIsList 00161 sort(lList.begin(), 00162 lList.end(), 00163 CSortByDocumentFrequency_DFE()); 00164 #else 00165 00166 lList.sort(CSortByDocumentFrequency_DFE()); 00167 #endif 00168 00169 for(CDocumentFrequencyList::const_iterator i=lList.begin(); 00170 i!=lList.end(); 00171 i++){ 00172 lRetVal=lRetVal && i->writeBinary(outStream); 00173 } 00174 00175 return lRetVal; 00176 }
1.5.6