CInvertedFileChunk Class Reference

#include <CInvertedFileChunk.h>

Inheritance diagram for CInvertedFileChunk:

CMapInvertedFileChunk

List of all members.

Public Member Functions

double getCollectionFrequency (size_t inSize) const
void addElement (TID inDocumentID, double inDocumentFrequency)
bool writeBinary (ostream &, TID inFeatureID, size_t inSize) const


Detailed Description

A class which does all the necessary administrative work for generating inverted files. It contains the list of documents which contain a given feature

Definition at line 59 of file CInvertedFileChunk.h.


Member Function Documentation

double CInvertedFileChunk::getCollectionFrequency ( size_t  inSize  )  const

returns the percentage of documents which contain this one feature

Definition at line 79 of file CInvertedFileChunk.cc.

Referenced by CAcIFFileSystem::newGenerateInvertedFile().

00079                                                                    {
00080   return double(size())/inSize;
00081 }

void CInvertedFileChunk::addElement ( TID  inDocumentID,
double  inDocumentFrequency 
)

add one document of to the list for one feature

Definition at line 58 of file CInvertedFileChunk.cc.

Referenced by CAcIFFileSystem::newGenerateInvertedFile().

00059                                        {
00060 
00061   //cout << "a" << flush;
00062 
00063   insert(make_pair(inDocumentID,inDocumentFrequency));
00064 };

bool CInvertedFileChunk::writeBinary ( ostream &  outStream,
TID  inFeatureID,
size_t  inSize 
) const

writes the binary part of the list for one feature

Definition at line 96 of file CInvertedFileChunk.cc.

References CDocumentFrequencyList::begin(), CDocumentFrequencyList::end(), and CDocumentFrequencyList::writeBinary().

Referenced by CAcIFFileSystem::newGenerateInvertedFile().

00098                                 {
00099   
00100   bool lRetVal=true;
00101 
00102   {
00103     TID lFeatureID(inFeatureID);
00104     lRetVal = lRetVal && outStream.write((char*)&lFeatureID,
00105            sizeof(lFeatureID));
00106   }
00107   
00108   {
00109     float lCollectionFrequency=(float(size())
00110         /inSize);
00111     if((lCollectionFrequency >1)
00112        ||
00113        (lCollectionFrequency < 0)){
00114       cout << "The collection frequency is: "
00115      << lCollectionFrequency
00116      << endl;
00117       assert(!"collection frequency out of range");
00118     }
00119     lRetVal = lRetVal && outStream.write((char*)&lCollectionFrequency,
00120         sizeof(lCollectionFrequency));
00121   }
00122   
00123   int lSize(size());
00124   {
00125     // writing this twice means: there is
00126     // no space left in this list
00127     lRetVal = lRetVal && outStream.write((char*)&lSize,
00128            sizeof(lSize));
00129     // writing this twice means: there is
00130     // no space left in this list
00131     lRetVal = lRetVal && outStream.write((char*)&lSize,
00132            sizeof(lSize));
00133     cout << "writing chunk of size " << lSize <<endl;
00134   }
00135   
00136   
00137   CDocumentFrequencyList lList(lSize);
00138   
00139 #ifndef _CDocumentFrequencyListIsList
00140   CDocumentFrequencyList::iterator j=lList.begin();
00141 #endif
00142   
00143   for(const_iterator i=begin();
00144       i!=end();
00145       i++){
00146 #ifndef _CDocumentFrequencyListIsList
00147     *(j++)=CDocumentFrequencyElement((*i).first,
00148              (*i).second);
00149 #else
00150     lList.push_back(CDocumentFrequencyElement((*i).first,
00151                 (*i).second)
00152         );
00153 #endif
00154   }
00155 #ifdef _CDocumentFrequencyListIsArray
00156   //lList.setEnd(j);// superfluous, because lList.begin()+lSize==j anyway
00157   assert(j==lList.begin()+lSize);
00158 #endif
00159   
00160 #ifndef _CDocumentFrequencyListIsList
00161   sort(lList.begin(),
00162        lList.end(),
00163        CSortByDocumentFrequency_DFE());
00164 #else
00165   
00166   lList.sort(CSortByDocumentFrequency_DFE());
00167 #endif
00168   
00169   for(CDocumentFrequencyList::const_iterator i=lList.begin();
00170       i!=lList.end();
00171       i++){
00172     lRetVal=lRetVal && i->writeBinary(outStream);
00173   }
00174   
00175   return lRetVal;
00176 }


The documentation for this class was generated from the following files:

Generated on Tue Jan 6 00:31:12 2009 for Gift by  doxygen 1.5.6