CQInvertedFile Class Reference

#include <CQInvertedFile.h>

Inheritance diagram for CQInvertedFile:

CQuery CMagic

List of all members.

Public Member Functions

void finishInit ()
 CQInvertedFile (CAccessorAdminCollection &inAccessorAdminCollection, CAlgorithm &inAlgorithm)
 ~CQInvertedFile ()
virtual bool setAlgorithm (CAlgorithm &inAlgorithm)
virtual CIDRelevanceLevelPairListfastQuery (const CXMLElement &inQuery, int inNumberOfInterestingImages, double inDifferenceToBest)
 a query which returns ID/RelevanceLevel pairs instead of instead of URL/RelevanceLevel pairs; this is faster for merging tasks (to explain the name) queries for URLs are answered by query (in herited)
virtual CIDRelevanceLevelPairListfastQueryByFeature (const CWeightingFunctionPointerList &inQuery, int inNumberOfInterestingImages, double inDifferenceToBest)
double DIDToScore (TID inDID, const CWeightingFunctionPointerHash &inQuery) const
double URLToScore (const string &inURL, const CWeightingFunctionPointerHash &inQuery) const
double FeatureListToScore (TID inDID, const CDocumentFrequencyList &inFeatureList, const CWeightingFunctionPointerHash &inQuery) const
void buildNormalizedQueryHash (const CRelevanceLevel &inQuery, CWeightingFunctionPointerHash &outQueryFeatureWeighters) const
void activateBlockingFeatures ()
void releaseBlockingFeatures ()
bool featuresBlocked () const
void blockFeatureGroup (const int featureNumber)
void unblockFeatureGroup (const int featureNumber)
bool isBlocked (const int featureNumber) const
void releaseAllPrunings ()
void useFeaturePruning (double percentage)
void releaseFeaturePruning ()
void useTimePruning (double inTimeCutoffPoint)
void releaseTimePruning ()
void useScoreBoardPruning (double inCutAfterFraction, double inReduceToFactor)
void releaseScoreBoardPruning ()
void useEvaluateAfterPruning ()
void releaseEvaluateAfterPruning ()
CWeightergetWeighter ()

Public Attributes

bool mBlockingOn
bool mBlockingArray [MAXIMUMNUMBEROFEATUREGROUPS]
bool mPruningUsed
bool mScoreBoardPruningUsed
parameterPruningType mScoreBoardPruningArray [MAX_SCOREBOARD_PRUNINGS]
int mNumberofUsedScoreBoardPrunings
bool mFeaturePruningUsed
double mPercentageofFeatures
bool mTimePruningUsed
double mStoppingTime
bool mEvaluateAfterPruning

Static Public Attributes

static const int MAXIMUMNUMBEROFEATUREGROUPS = 50
static const int MAX_SCOREBOARD_PRUNINGS = 10

Protected Member Functions

double keepScore (CScoreBoard &inoutScoreBoard, const CWeightingFunctionPointerList &inFeatures, bool lPositive) const
double keepScorePruning (CScoreBoard &inoutScoreBoard, const CWeightingFunctionPointerList &inFeatures, bool inPositive, int inDesiredNumberOfDocuments) const
void buildQueryHash (CRelevanceLevelList &inQuery, CWeightingFunctionPointerHash &outQueryFeatureWeighters) const
 creates a list of all the features which are in one or more of the query images should be replaced by a version which operates on IDs not URLs
void buildNormalizedQueryList (double inPositiveRelevanceSum, double inNegativeRelevanceSum, CWeightingFunctionPointerHash &inQFW, CWeightingFunctionPointerList &outQFW) const
void buildNormalizedQueryHash (double inPositiveRelevanceSum, double inNegativeRelevanceSum, CWeightingFunctionPointerHash &inQFW) const
void init ()

Protected Attributes

CAlgorithmmDeb
int mModulo
int mModuloClass
CWeighterFactory mWeighterFactory
CSelfDestroyPointer< CWeightermWeighter
CSelfDestroyPointer
< CWeightingFunctionPointerHash
mQueryFeatureWeighters

Classes

struct  parameterPruningType


Detailed Description

The Query manager for Queries on inverted Files

Definition at line 96 of file CQInvertedFile.h.


Constructor & Destructor Documentation

CQInvertedFile::CQInvertedFile ( CAccessorAdminCollection inAccessorAdminCollection,
CAlgorithm inAlgorithm 
)

New constructor, taking as parameter the algorithm structure, which contains all the algorithm configuration

Definition at line 245 of file CQInvertedFile.cc.

References CNoDelete::activate(), init(), CQuery::mAccessor, CQuery::mAccessorAdmin, CQuery::mAlgorithm, mDeb, my_diagnose, my_throw, CAccessorAdmin::openAccessor(), and CXMLElement::stringReadAttribute().

00247               :
00248   CQuery(inAccessorAdminCollection,
00249    inAlgorithm){
00250   assert(&inAccessorAdminCollection);
00251   assert(&inAlgorithm);
00252   assert(&inAlgorithm==mAlgorithm);
00253   mDeb=mAlgorithm;
00254   mAlgorithm->activate();
00255   my_diagnose(&inAlgorithm);
00256 
00257   // mproxy has been filled in a reasonable way 
00258   // by CQuery::CQuery
00259   
00260   pair<bool,string> lSubType=inAlgorithm.stringReadAttribute("cui-sub-type");
00261 
00262   if(lSubType.first){
00263     if(lSubType.second=="mysql"){
00264       mAccessor=mAccessorAdmin->openAccessor("if_mysql");
00265     }else{
00266       mAccessor=mAccessorAdmin->openAccessor("inverted_file");
00267     }
00268   }else{
00269     mAccessor=mAccessorAdmin->openAccessor("inverted_file");
00270   }
00271   cout << "1st Checking accessor" << endl;
00272   //  mAccessor->checkNPrint();
00273 
00274   //there is something wrong between the generated 
00275   //accessor and what we want
00276   if(!mAccessor){
00277     cout << "throwing: "
00278    << (VEWrongAccessor("InvertedFileQuery"))
00279    << endl
00280    << flush;
00281     try{
00282       my_throw(VEWrongAccessor("InvertedFileQuery"));
00283     }
00284     catch(...){
00285       cout << "locally caught and rethrown"
00286      << flush;
00287       my_throw(VEWrongAccessor("InvertedFileQuery"));
00288     }
00289   }
00290 
00291   //here do additional things with the algorithm, if wanted and needed
00292   //which probably is the case
00293   init();
00294   cout << "++Accessor: " << mAccessor << flush << endl;
00295   cout << "Checking Accessor II"
00296        << endl;
00297   //mAccessor->checkNPrint();
00298   
00299   cout << "CQInvertedFile THIS:" << this << endl;
00300 };
CQInvertedFile::~CQInvertedFile(){

CQInvertedFile::~CQInvertedFile (  ) 

destructor

Definition at line 301 of file CQInvertedFile.cc.

References CXMLElement::check(), CAccessorAdmin::closeAccessor(), CNoDelete::deActivate(), CQuery::mAccessorAdmin, CQuery::mAlgorithm, mDeb, and CXMLElement::stringReadAttribute().

00301                                {
00302 
00303   cout << "begin KILLING INVERTED FILE QUERY" << endl;
00304   assert(mDeb==mAlgorithm);
00305   if(mAlgorithm){
00306     mAlgorithm->check();
00307     pair<bool,string> lSubType=mAlgorithm->stringReadAttribute("cui-sub-type");
00308     if(lSubType.first){
00309       if(lSubType.second=="mysql"){
00310   mAccessorAdmin->closeAccessor("if_mysql");
00311       }else{
00312   mAccessorAdmin->closeAccessor("inverted_file");
00313       }
00314     }else{
00315       mAccessorAdmin->closeAccessor("inverted_file");
00316     }
00317   }
00318   mAlgorithm->deActivate();
00319   cout << "end KILLING INVERTED FILE QUERY" << endl;
00320 }


Member Function Documentation

double CQInvertedFile::keepScore ( CScoreBoard inoutScoreBoard,
const CWeightingFunctionPointerList inFeatures,
bool  lPositive 
) const [protected]

calculates the score for all the images based on a list of features

Author:
Wolfgang Müller

Definition at line 1030 of file CQInvertedFile.cc.

References CDocumentFrequencyList::begin(), CDocumentFrequencyList::end(), and CQuery::mAccessor.

Referenced by fastQueryByFeature().

01032                                   {
01033   double lQueryScore=0;
01034   int lMaximumNumberofEvaluatedFeatures=int (inFeatures.size()*0.9);
01035 
01036   //For all query features
01037   int lCount=0;
01038   for(CWeightingFunctionPointerList::const_iterator i=inFeatures.begin();
01039       (i!=inFeatures.end()); 
01040       i++)
01041     {
01042     
01043       lCount++;
01044 
01045 
01046 
01047 #ifndef _NO_FIDPRINT    
01048       cout << "[FID"
01049      << dec
01050      << (*i)->getID()
01051      << "]"
01052      << flush;
01053 #endif
01054 
01055       /* adjusts the query score for one more feature, this does it for the
01056          query image itself to have a number to normalize with */
01057       lQueryScore+=
01058   (*i)->applyOnThis();
01059     
01060       //load list of documents which contain features
01061       CDocumentFrequencyList* lOneFeatureResult=
01062   ((CAcInvertedFile*)mAccessor)->FeatureToList((*i)->getID());
01063 
01064       //then adjust the score for the documents
01065       if(lOneFeatureResult)
01066   {
01067     for(CDocumentFrequencyList::iterator j=lOneFeatureResult->begin();
01068         j!=lOneFeatureResult->end();
01069         j++)
01070       {
01071         inoutScoreBoard(*(*i),*j);//one could say inoutScoreBoard.adjust(.,.)
01072       };
01073   }
01074       else{
01075   cerr << "FAILED:OneFeatureResult" 
01076        << endl 
01077        << flush;
01078       }
01079       delete lOneFeatureResult;
01080 
01081     }
01082 
01083   return lQueryScore;
01084 };

double CQInvertedFile::keepScorePruning ( CScoreBoard inoutScoreBoard,
const CWeightingFunctionPointerList inFeatures,
bool  inPositive,
int  inDesiredNumberOfDocuments 
) const [protected]

calculates the results for the images using some sort of pruning

Author:
Henning Müller

Definition at line 1100 of file CQInvertedFile.cc.

References CDocumentFrequencyList::begin(), CDocumentFrequencyList::end(), CScoreBoard::limitNumberTo(), CQuery::mAccessor, mFeaturePruningUsed, mNumberofUsedScoreBoardPrunings, mPercentageofFeatures, mScoreBoardPruningArray, mScoreBoardPruningUsed, mStoppingTime, mTimePruningUsed, CQInvertedFile::parameterPruningType::reduceTo, CScoreBoard::setIgnore(), and timeReached().

Referenced by fastQueryByFeature().

01104                     {
01105   double lQueryScore=0;
01106   int lNumberofFeatures=inFeatures.size();
01107   int lMaximumNumberofEvaluatedFeatures=int (lNumberofFeatures*mPercentageofFeatures/100);
01108 
01109   cout << "Pruning: I will evaluate "
01110        << lMaximumNumberofEvaluatedFeatures
01111        << " Features."
01112        << flush
01113        << endl;
01114 
01115   int lNextScoreBoardNumber=0;
01116 
01117   //For all query features
01118   int lCount=0;
01119   for(CWeightingFunctionPointerList::const_iterator i=inFeatures.begin();
01120       (i!=inFeatures.end())//FIXME for loop comparison
01121   && (!(mTimePruningUsed 
01122         && (timeReached(mStoppingTime)) 
01123         && (lCount>100)))
01124   && (!(mFeaturePruningUsed 
01125         && (lCount==lMaximumNumberofEvaluatedFeatures))); 
01126       i++)
01127     {
01128  
01129       lCount++;
01130       
01131       if(mScoreBoardPruningUsed)
01132   {
01133     if(mNumberofUsedScoreBoardPrunings>lNextScoreBoardNumber)
01134       {
01135         if(lCount== 
01136      (int (mScoreBoardPruningArray[lNextScoreBoardNumber]
01137            .stopAfterFeature*lNumberofFeatures)))
01138     {
01139 #ifndef NOSCREENOUTPUT
01140       cout << mScoreBoardPruningArray[lNextScoreBoardNumber].reduceTo;
01141       cout<<"\n";
01142 #endif
01143       inoutScoreBoard
01144         .limitNumberTo(int(mScoreBoardPruningArray
01145                [lNextScoreBoardNumber]
01146                .reduceTo
01147                *inElementsToRetrieve));
01148       inoutScoreBoard.setIgnore();
01149       lNextScoreBoardNumber++;
01150     }
01151       }
01152   }
01153 
01154 #ifndef _NO_FIDPRINT    
01155       cout << "[FID"
01156      << dec
01157      << (*i)->getID()
01158      << "]"
01159      << flush;
01160 #endif
01161 
01162       /* adjusts the query score for one more feature, this does it for the
01163          query image itself to have a number to normalize with */
01164       lQueryScore+=
01165   (*i)->applyOnThis();
01166     
01167       //load list of documents which contain features
01168       CDocumentFrequencyList* lOneFeatureResult=
01169   ((CAcInvertedFile*)mAccessor)->FeatureToList((*i)->getID());
01170 
01171       //then adjust the score for the documents
01172       if(lOneFeatureResult)
01173   {
01174     for(CDocumentFrequencyList::iterator j=lOneFeatureResult->begin();
01175         j!=lOneFeatureResult->end();
01176         j++)
01177       {
01178         inoutScoreBoard(*(*i),
01179             *j);//one could say inoutScoreBoard.adjust(.,.)
01180       };
01181   }
01182       else
01183   cerr << "FAILED:OneFeatureResult" 
01184        << endl 
01185        << flush;
01186       
01187       delete lOneFeatureResult;
01188 
01189     }
01190   /* this is only to reduce the list to a special number at the end */
01191   /*      inoutScoreBoard.limitNumberTo(20);*/
01192 
01193 
01194   return lQueryScore;
01195 };

void CQInvertedFile::buildQueryHash ( CRelevanceLevelList inQuery,
CWeightingFunctionPointerHash outQueryFeatureWeighters 
) const [protected]

creates a list of all the features which are in one or more of the query images should be replaced by a version which operates on IDs not URLs

Author:
Wolfgang Müller + Pruning: Henning Müller/reprogrammed WM 09-10-00

Definition at line 528 of file CQInvertedFile.cc.

References CWeightingFunctionPointerHash::addFeature(), CDocumentFrequencyList::begin(), CDocumentFrequencyList::end(), featuresBlocked(), isBlocked(), CQuery::mAccessor, mModulo, and mModuloClass.

Referenced by buildNormalizedQueryHash(), and fastQuery().

00530                                           {
00531   
00532 
00533   /* checks all the relevant images which were selected */
00534   for(CRelevanceLevelList::const_iterator i=inQuery.begin();
00535       i!=inQuery.end();
00536       i++){
00537     //Get the feature list for the URL of *i (that means: for each input image)
00538 #ifndef NOSCREENOUTPUT
00539     cout<< "input image to URL: _"
00540   << (*i).getURL() 
00541   << "_"
00542   << endl
00543   << flush;
00544 #endif
00545     
00546     CDocumentFrequencyList* lReadFeatures=
00547       ((CAcInvertedFile*)mAccessor)->URLToFeatureList((*i).getURL());
00548 
00549     /* 
00550        Build a new feature list without blocked features.
00551        There are several cases
00552     */
00553     if(featuresBlocked() && !mModulo){// adding features which are not blocked
00554       for(CDocumentFrequencyList::iterator 
00555       j=lReadFeatures->begin();
00556     j!=lReadFeatures->end();
00557     j++
00558     ){
00559   if(isBlocked(((CAcInvertedFile*)mAccessor)->getFeatureDescription(j->getID()))==false){
00560     outQueryFeatureWeighters.addFeature((*i).getRelevanceLevel(),
00561                 *j);
00562   }
00563       } 
00564     }// END: adding features which are not blocked
00565     if(featuresBlocked() && mModulo){// adding features which are not blocked,
00566       // and for which the feature number has the right modulo
00567       // use of this: dispatching multiple query processors
00568       for(CDocumentFrequencyList::iterator 
00569       j=lReadFeatures->begin();
00570     j!=lReadFeatures->end();
00571     j++
00572     ){
00573   if((isBlocked(((CAcInvertedFile*)mAccessor)->getFeatureDescription(j->getID()))==false)
00574      && ((j->getID()) % mModulo == mModuloClass)){
00575     outQueryFeatureWeighters.addFeature((*i).getRelevanceLevel(),
00576                 *j);
00577   } /* end of the loop for all the features of one image */
00578       } /* end of removing the blocked features from the list */
00579     }
00580     if(!featuresBlocked() && mModulo){
00581       for(CDocumentFrequencyList::iterator 
00582       j=lReadFeatures->begin();
00583     j!=lReadFeatures->end();
00584     j++
00585     ){
00586   if((j->getID()) % mModulo == mModuloClass){// adding features with the right modulo
00587     /* adds one of the features for a special image to the list */
00588     outQueryFeatureWeighters.addFeature((*i).getRelevanceLevel(),
00589                 *j);
00590   } /* end of the loop for all the features of one image */
00591       } /* end of removing the blocked features from the list */
00592     }
00593     if(!featuresBlocked() && !mModulo){// adding all features
00594       for(CDocumentFrequencyList::iterator 
00595       j=lReadFeatures->begin();
00596     j!=lReadFeatures->end();
00597     j++
00598     ){
00599   /* adds one of the features for a special image to the list */
00600   outQueryFeatureWeighters.addFeature((*i).getRelevanceLevel(),
00601               *j);
00602       }
00603     }
00604     
00605 #ifndef NOSCREENOUTPUT
00606     cout << "gotten HERE"
00607    << flush
00608    << endl;
00609 #endif  
00610 
00611    
00612 #ifdef PRINT
00613     cout << endl
00614    << (*i).getURL()
00615    << " "
00616    << lRelevanceLevel
00617    << endl;
00618 #endif
00619     delete(lReadFeatures);    
00620   }/* end of the loop for every image in the query */
00621 }

void CQInvertedFile::buildNormalizedQueryList ( double  inPositiveRelevanceSum,
double  inNegativeRelevanceSum,
CWeightingFunctionPointerHash inQFW,
CWeightingFunctionPointerList outQFW 
) const [protected]

Building a list of normalized WF from a hash of Weighting functions

Definition at line 677 of file CQInvertedFile.cc.

References buildNormalizedQueryHash(), CQuery::mAccessor, mWeighter, and stampTime().

Referenced by fastQuery().

00681 {
00682   //Just to make sure...
00683   mWeighter->getQueryNormalizer()->reset();
00684   mWeighter->getDocumentNormalizer()->reset();
00685 
00686 
00687   buildNormalizedQueryHash(inPositiveRelevanceSum,
00688          inNegativeRelevanceSum,
00689          inoutQFW);
00690 
00691 #ifndef NOSCREENOUTPUT
00692   cout<<"normalized List build\n";
00693   stampTime();
00694 #endif
00695 
00696   for(CWeightingFunctionPointerHash::const_iterator i=inoutQFW.begin();
00697       i!=inoutQFW.end();
00698       i++){
00699     outQFW.push_back((*i).second);
00700   }
00701 
00702   
00703   outQFW.sort(CSortPointers_WF <CSortByAbsQueryFactor_WF>());
00704   outQFW.reverse();
00705 
00706 
00707   /* test to print out the list with the sorted features */
00708 #ifndef _NO_PRINT_QFW
00709   cout << "--------------------"
00710        << "The query feature weighters: "
00711        << endl;
00712 
00713   for(CWeightingFunctionPointerList::const_iterator j=outQFW.begin();
00714       j!=outQFW.end();
00715       j++)
00716       {
00717       cout<<(*j)->getID();
00718       cout<<";";
00719       cout<<((CAcInvertedFile*)mAccessor)->FeatureToCollectionFrequency((*j)->getID());
00720       
00721       cout<<";";
00722       cout<<(*j)->getTermFrequency();
00723       cout<<";";
00724       cout<<((CAcInvertedFile*)mAccessor)->getFeatureDescription((*j)->getID());
00725       cout<<"\n";
00726     }
00727 #endif
00728 }

void CQInvertedFile::buildNormalizedQueryHash ( double  inPositiveRelevanceSum,
double  inNegativeRelevanceSum,
CWeightingFunctionPointerHash inQFW 
) const [protected]

Building a list of normalized WF from a hash of Weighting functions

Definition at line 637 of file CQInvertedFile.cc.

References CQueryNormalizer::considerQueryFeature(), and mWeighter.

Referenced by buildNormalizedQueryHash(), and buildNormalizedQueryList().

00639                                                      {
00640   //Just to make sure...
00641   mWeighter->getQueryNormalizer()->reset();
00642   mWeighter->getDocumentNormalizer()->reset();
00643   
00644   /* for every weighting function, meaning for every element in the list of important features */
00645 
00646 
00647   {//for limiting the scope of the following variable definitions
00648     CQueryNormalizer* lDocumentNormalizer=mWeighter->getDocumentNormalizer();
00649     CQueryNormalizer* lQueryNormalizer=mWeighter->getQueryNormalizer();
00650     for(CWeightingFunctionPointerHash::const_iterator j=inoutQFW.begin();
00651   j!=inoutQFW.end();
00652   j++)
00653       {
00654   /* calculates the relevant values for the normalizing of the function */
00655   lQueryNormalizer->considerQueryFeature(*(*j).second);
00656   lDocumentNormalizer->considerQueryFeature(*(*j).second);
00657   
00658   (*j).second->setRelevanceSum(inPositiveRelevanceSum,
00659              inNegativeRelevanceSum);
00660   (*j).second->preCalculate();
00661     }
00662   }
00663 }

void CQInvertedFile::init (  )  [protected, virtual]

Initializer, used by both construcors

Implements CQuery.

Definition at line 361 of file CQInvertedFile.cc.

References blockFeatureGroup(), CXMLElement::boolReadAttribute(), COL_HST, COL_POS, mrml_const::cui_block_color_blocks, mrml_const::cui_block_color_histogram, mrml_const::cui_block_texture_blocks, mrml_const::cui_block_texture_histogram, mrml_const::cui_pr_modulo, mrml_const::cui_pr_modulo_class, mrml_const::cui_pr_percentage_of_features, mrml_const::cui_pr_score_board_reduced_at, mrml_const::cui_pr_score_board_reduced_to, mrml_const::cui_pr_time_cutoff_point, mrml_const::cui_weighting_function, CXMLElement::doubleReadAttribute(), GABOR_HST, GABOR_POS, CXMLElement::longReadAttribute(), CQuery::mAccessor, CQuery::mAlgorithm, MAX_SCOREBOARD_PRUNINGS, MAXIMUMNUMBEROFEATUREGROUPS, mBlockingArray, mBlockingOn, mEvaluateAfterPruning, mFeaturePruningUsed, mModulo, mModuloClass, mNumberofUsedScoreBoardPrunings, mPercentageofFeatures, mPruningUsed, mQueryFeatureWeighters, mScoreBoardPruningArray, mScoreBoardPruningUsed, mStoppingTime, mTimePruningUsed, mWeighter, mWeighterFactory, my_diagnose, CWeighterFactory::newWeighter(), CQInvertedFile::parameterPruningType::reduceTo, releaseBlockingFeatures(), CQInvertedFile::parameterPruningType::stopAfterFeature, CXMLElement::stringReadAttribute(), CXMLElement::toXML(), useFeaturePruning(), useScoreBoardPruning(), and useTimePruning().

Referenced by CQInvertedFile().

00361                          {
00362   int i;
00363 
00364   if(mAlgorithm){
00365     string lString("");
00366     mAlgorithm->toXML(lString,5);
00367     
00368     cout << "çççççççççççççççççççççççççççççççççççççççç" << endl
00369    << lString
00370    << "çççççççççççççççççççççççççççççççççççççççç" << endl
00371    << flush;
00372   }
00373     
00374   if(mAlgorithm){
00375     mWeighter=mWeighterFactory
00376       .newWeighter(mAlgorithm
00377        ->stringReadAttribute(mrml_const::cui_weighting_function).second);
00378   }
00379 
00380   mQueryFeatureWeighters= 
00381     new CWeightingFunctionPointerHash(*mWeighter->getWeightingFunction());
00382 
00383 
00384   mWeighter->setAccessor((*(CAcInvertedFile*)mAccessor));
00385 
00386 
00387   /* parameter for the seperate normalization of positive and 
00388      negative feedback */
00389   /*  mSeperateNormalizationofPositiveandNegativeImages=false;*/
00390 
00391   /* turns off the blocking of features */
00392   mBlockingOn=false; 
00393 
00394   /* clears the file with the selective blocking, all features
00395      need to be blocked actively, if wanted */
00396   for(i=0;i<MAXIMUMNUMBEROFEATUREGROUPS;i++)
00397     {
00398       mBlockingArray[i]=false;
00399     }
00400   
00401   /* parameters for pruning (Henning)*/
00402   mPruningUsed=false;
00403   mScoreBoardPruningUsed=false;
00404   mNumberofUsedScoreBoardPrunings=0;
00405   mFeaturePruningUsed=false;
00406   mTimePruningUsed=false;
00407   mStoppingTime=0;
00408   mPercentageofFeatures=0;
00409   for(i=0;i<MAX_SCOREBOARD_PRUNINGS;i++)
00410     {
00411       mScoreBoardPruningArray[i].stopAfterFeature=0;
00412       mScoreBoardPruningArray[i].reduceTo=0;
00413    }
00414   mEvaluateAfterPruning=false;
00415 
00416 
00417   /* now set the pruning parameters 
00418      this is made by me, WOLFGANG, so there can be some
00419      BUGS.
00420   */
00421   if(mAlgorithm){
00422 
00423     string lString("");
00424     mAlgorithm->toXML(lString,5);
00425     my_diagnose(lString);
00426 
00427 
00428     releaseBlockingFeatures();
00429     {
00430       pair<bool,bool> lBlock=
00431   mAlgorithm->boolReadAttribute(string(mrml_const::cui_block_color_histogram));
00432 
00433       cout << "%%%%%%%%%%%%%%%%%%%%"
00434      << mAlgorithm->stringReadAttribute(string(mrml_const::cui_block_color_histogram)).second
00435      << endl;
00436       if(lBlock.first && lBlock.second){
00437   blockFeatureGroup(COL_HST);
00438       }
00439     }
00440     {
00441       pair<bool,bool> lBlock=
00442   mAlgorithm->boolReadAttribute(string(mrml_const::cui_block_texture_histogram));
00443       if(lBlock.first && lBlock.second){
00444   blockFeatureGroup(GABOR_HST);
00445       }
00446     }
00447     {
00448       pair<bool,bool> lBlock=
00449   mAlgorithm->boolReadAttribute(string(mrml_const::cui_block_color_blocks));
00450       if(lBlock.first && (lBlock.second)){
00451   blockFeatureGroup(COL_POS);
00452       }
00453     }
00454     {
00455       pair<bool,bool> lBlock=
00456   mAlgorithm->boolReadAttribute(string(mrml_const::cui_block_texture_blocks));
00457       if(lBlock.first && (lBlock.second)){
00458   blockFeatureGroup(GABOR_POS);
00459       }
00460     }
00461     {
00462       pair<bool,double> lTimeCutoffPoint=
00463   mAlgorithm->doubleReadAttribute(string(mrml_const::cui_pr_time_cutoff_point));
00464       if(lTimeCutoffPoint.first){
00465   useTimePruning(lTimeCutoffPoint.second);
00466       }
00467     }
00468     {
00469       pair<bool,double> lPercentageOfFeatures=
00470   mAlgorithm->doubleReadAttribute(string(mrml_const::cui_pr_percentage_of_features));
00471       if(lPercentageOfFeatures.first){
00472   useFeaturePruning(lPercentageOfFeatures.second);
00473       }
00474     }
00475     {
00476       pair<bool,double> lScoreBoardReducedAt=
00477   mAlgorithm->doubleReadAttribute(string(mrml_const::cui_pr_score_board_reduced_at));
00478 
00479       pair<bool,int> lScoreBoardReducedTo=
00480   mAlgorithm->longReadAttribute(string(mrml_const::cui_pr_score_board_reduced_to));
00481 
00482       if(lScoreBoardReducedAt.first
00483    &&
00484    lScoreBoardReducedTo.first){
00485   useScoreBoardPruning(lScoreBoardReducedAt.second,
00486            lScoreBoardReducedTo.second);
00487       }
00488     }
00489     {
00490       pair<bool,int> lModulo=
00491   mAlgorithm->longReadAttribute(string(mrml_const::cui_pr_modulo));
00492 
00493       pair<bool,int> lModuloClass=
00494   mAlgorithm->longReadAttribute(string(mrml_const::cui_pr_modulo_class));
00495       
00496       if(lModulo.first){
00497   mModulo=lModulo.second;
00498   if(lModuloClass.first){
00499     mModuloClass=lModuloClass.second;
00500   }else{
00501     mModuloClass=0;
00502   }
00503       }else{
00504   mModulo=0;
00505   mModuloClass=0;
00506       }
00507     }
00508   }
00509   cout << "Init finished " 
00510        << flush
00511        << endl;
00512 };

void CQInvertedFile::finishInit (  )  [virtual]

finish the initialisation phase make the weighting function know who its normalizers are

Reimplemented from CQuery.

Definition at line 1673 of file CQInvertedFile.cc.

References CQuery::mAccessor, mWeighter, and my_throw.

01673                                {
01674   CAcInvertedFile* lAccessor=
01675     dynamic_cast<CAcInvertedFile*>(mAccessor);
01676   
01677   if(!lAccessor){
01678     my_throw(VEWrongAlgorithm("finishInit"));
01679   }
01680 
01681   mWeighter->setAccessor(*lAccessor);
01682 
01683   
01684 }

bool CQInvertedFile::setAlgorithm ( CAlgorithm inAlgorithm  )  [virtual]

set the Algorithm. same scheme as in setCollection

Reimplemented from CQuery.

Definition at line 328 of file CQInvertedFile.cc.

References CAccessorAdmin::closeAccessor(), CAlgorithm::getCollectionID(), CAccessorAdminCollection::getProxy(), CQuery::mAccessor, CQuery::mAccessorAdmin, CQuery::mAccessorAdminCollection, CQuery::mAlgorithm, CAccessorAdmin::openAccessor(), and CQuery::setAlgorithm().

00328                                                         {
00329   cout << "SETALGORITHM" << endl;
00330 
00331   if(mAlgorithm->getCollectionID()==inAlgorithm.getCollectionID()){
00332     
00333     return true;
00334     
00335   }else{
00336     cout << "OPENACCESSOR" << endl;
00337     //close the old collection
00338     mAccessorAdmin->closeAccessor("inverted_file");
00339 
00340     //
00341     mAccessorAdmin=&mAccessorAdminCollection->getProxy(inAlgorithm.getCollectionID());
00342     mAccessor=mAccessorAdmin->openAccessor("inverted_file");
00343 
00344     //
00345     return (mAccessor && CQuery::setAlgorithm(inAlgorithm));
00346   }
00347 };

CIDRelevanceLevelPairList * CQInvertedFile::fastQuery ( const CXMLElement inQuery,
int  inNumberOfInterestingImages,
double  inDifferenceToBest 
) [virtual]

a query which returns ID/RelevanceLevel pairs instead of instead of URL/RelevanceLevel pairs; this is faster for merging tasks (to explain the name) queries for URLs are answered by query (in herited)

Implements CQuery.

Definition at line 747 of file CQInvertedFile.cc.

References buildNormalizedQueryList(), buildQueryHash(), CXMLElement::child_list_begin(), CXMLElement::child_list_end(), fastQueryByFeature(), CQuery::getRandomIDs(), mrml_const::image_location, CQuery::mAccessor, mQueryFeatureWeighters, mWeighter, stampTime(), mrml_const::user_relevance, mrml_const::user_relevance_element, and mrml_const::user_relevance_element_list.

00749                                     {
00750 
00751   cout << "--Accessor: " 
00752        << flush 
00753        << mAccessor 
00754        << flush 
00755        << endl;
00756 
00757   mQueryFeatureWeighters->clearFeatures();
00758 
00759 
00760   double lPositiveSum(0);
00761   double lNegativeSum(0);
00762 
00763 
00764   CRelevanceLevelList lQuery;
00765 
00766   // In the query tree I got, i simply look for 
00767   // the element containing a user-relevance-list
00768   
00769   bool lIsEmpty(true);// is there no image in the query?
00770   for(list<CXMLElement*>::const_iterator i=inQuery.child_list_begin();
00771       i!=inQuery.child_list_end();
00772       i++){
00773 
00774 //     cout << "I:The name of this tree element: "
00775 //   << endl
00776 //   << (*i)->getName()
00777 //   << endl;
00778     if((*i)->getName()=="cui-inverted-file-query-by-feature"){
00779       for(list<CXMLElement*>::const_iterator j=(*i)->child_list_begin();
00780     j!=(*i)->child_list_end();
00781     j++){
00782   if((*j)->getName()=="cui-term-frequency-element"){
00783     pair<bool,int> lFeatureID((*j)->longReadAttribute("feature-id"));
00784     pair<bool,double> lTermFrequency((*j)->doubleReadAttribute("term-frequency"));
00785     
00786     if(lFeatureID.first){
00787       if(lTermFrequency.first){
00788           mQueryFeatureWeighters->addFeature(lTermFrequency.second,CDocumentFrequencyElement(lFeatureID.second,
00789                            1));
00790       }else{
00791         mQueryFeatureWeighters->addFeature(1,CDocumentFrequencyElement(lFeatureID.second,
00792                        1));
00793       }
00794     }
00795   }
00796       }
00797     }
00798     if((*i)->getName()==mrml_const::user_relevance_element_list){
00799       for(list<CXMLElement*>::const_iterator j=(*i)->child_list_begin();
00800     j!=(*i)->child_list_end();
00801     j++){
00802 //  cout << "J:The name of this tree element: "
00803 //       << endl
00804 //       << (*j)->getName()
00805 //       << endl;
00806   if((*j)->getName()==mrml_const::user_relevance_element){
00807     if(((*j)->stringReadAttribute(mrml_const::image_location).first)
00808        &&
00809        ((*j)->stringReadAttribute(mrml_const::user_relevance).first)){
00810       
00811       if(fabs((*j)->doubleReadAttribute(mrml_const::user_relevance).second)>0.001){
00812         lQuery.push_back(CRelevanceLevel((*j)->stringReadAttribute(mrml_const::image_location).second,
00813                  (*j)->doubleReadAttribute(mrml_const::user_relevance).second));
00814         if(lQuery.back().getRelevanceLevel()>0){
00815     lIsEmpty=false;
00816     lPositiveSum+=lQuery.back().getRelevanceLevel();
00817         }else{
00818     lIsEmpty=false;
00819     lNegativeSum+=lQuery.back().getRelevanceLevel();
00820         }
00821       }
00822     }
00823   }
00824       }
00825     }
00826   }
00827   
00828   if(lIsEmpty){
00829     cout << "THE QUERY IS EMPTY" << endl;
00830     //which means, random images are requested
00831     return getRandomIDs(inNumberOfInterestingImages);
00832     cout << "RETURNING" << endl;
00833   }
00834 
00835 
00836 
00837   //... but more successfully
00838   cout << "Query number of documents: "
00839        << lQuery.size()
00840        << "The document: ";
00841   if(lQuery.size()){
00842     cout << lQuery.front().getRelevanceLevel()
00843    << ","
00844    << lQuery.front().getURL()
00845    << "\n"
00846    << endl;
00847   }else{
00848     cout << "NO DOCUMENT"
00849    << endl;
00850   }
00851 #ifndef NOSCREENOUTPUT
00852 #endif
00853 
00854   //in case of an empty query
00855   if(!lQuery.size()){
00856     return new CIDRelevanceLevelPairList();
00857   }
00858 
00859   double lRelevanceSum=0; /* */
00860   
00861   //resets the normalizer for the query
00862   mWeighter->getQueryNormalizer()->reset();
00863   mWeighter->getDocumentNormalizer()->reset();
00864 
00865 #ifndef NOSCREENOUTPUT
00866   cout << "building query" 
00867        << endl 
00868        << flush;
00869   stampTime();
00870 #endif  
00871 
00872   /* creates a list of a all the features in all the input images */
00873   buildQueryHash(lQuery,
00874      *mQueryFeatureWeighters);
00875   
00876 #ifndef NOSCREENOUTPUT
00877   stampTime();
00878   cout << "..finished:left:" << mQueryFeatureWeighters->size() << endl;
00879 #endif
00880   
00881   /* this is a list with all the weightings for every feature */
00882   CWeightingFunctionPointerList lParameters;
00883   
00884   /* and the list for every feature and its weighting is created here */
00885   buildNormalizedQueryList(lPositiveSum,
00886          lNegativeSum,
00887          *mQueryFeatureWeighters,
00888          lParameters);
00889 
00890 
00891   return fastQueryByFeature(lParameters,
00892           inNumberOfInterestingImages,
00893           inThreshold);
00894 }

CIDRelevanceLevelPairList * CQInvertedFile::fastQueryByFeature ( const CWeightingFunctionPointerList inQuery,
int  inNumberOfInterestingImages,
double  inThreshold 
) [virtual]

Assuming that a correct CWeightingFunctionPointerHash has been built by fastQuery (or another function), this function will do the rest

Definition at line 902 of file CQInvertedFile.cc.

References keepScore(), keepScorePruning(), mPruningUsed, and stampTime().

Referenced by fastQuery().

00904                                        {
00905   
00906 
00907 #ifndef NOSCREENOUTPUT
00908   cout << "query." << flush << endl;
00909   stampTime();
00910 #endif
00911 
00912   //A score for each document, 
00913   //each score is initialized with zero
00914   CScoreBoard lScoreBoard;
00915   
00916   double lQueryScore;
00917   
00918   /* calculates the value for the answer images */
00919   if (mPruningUsed)
00920     {
00921       cout << "Pruning used!"
00922      << endl;
00923 
00924 
00925       lQueryScore=(keepScorePruning(lScoreBoard,
00926             inQuery,
00927             true,
00928             inNumberOfInterestingImages
00929             ));
00930     }
00931   else
00932     {
00933       cout << "Pruning NOT used!"
00934      << endl;
00935 
00936       lQueryScore=(keepScore(lScoreBoard,
00937            inQuery,
00938            true));
00939     }
00940 
00941 #ifndef NOSCREENOUTPUT
00942   stampTime();
00943   cout << ".query" << flush << endl;
00944 #endif
00945 
00946   //Normalizing scores by query score and building a list from it
00947   CIDRelevanceLevelPairList* lReturnValue=new CIDRelevanceLevelPairList(); // list for the contents of the ScoreBoard
00948   for(CScoreBoard::iterator i=lScoreBoard.begin();
00949       i!=lScoreBoard.end();
00950       i++){
00951     (*i).second/=fabs(lQueryScore);
00952     lReturnValue->push_back(CIDRelevanceLevelPair(i->first,
00953               i->second));
00954   }
00955   
00956 #ifndef NOSCREENOUTPUT
00957   cout << "presort" << flush << endl;
00958   stampTime();
00959 #endif
00960   
00961   lReturnValue->sort();
00962   lReturnValue->reverse();
00963   
00964   //Iterate over the wanted elements
00965   CIDRelevanceLevelPairList::iterator iWantedElements=lReturnValue->begin(); 
00966 
00967   //skip inNumberOfInterestingImages and delete the rest of the
00968   //output list
00969   for(int lNumberOfSkippedImages=0;
00970       (lNumberOfSkippedImages
00971        <
00972        inNumberOfInterestingImages)
00973   && 
00974   (iWantedElements!=lReturnValue->end());
00975       iWantedElements++,
00976   lNumberOfSkippedImages++
00977       ){
00978   }
00979   
00980   if(iWantedElements!=lReturnValue->end()){
00981     lReturnValue->erase(iWantedElements,lReturnValue->end());
00982   }
00983   
00984   
00985   //Delete all results below threshold if threshold>0.0
00986   if(inThreshold>0.0001){
00987     for(CIDRelevanceLevelPairList::iterator i=lReturnValue->begin();
00988   i!=lReturnValue->end();){
00989       if((*i).getRelevanceLevel()<inThreshold){
00990   cout << "eliminating:" 
00991        << inNumberOfInterestingImages
00992        << ":"
00993        << inThreshold
00994        << ">"
00995        << (*i).getRelevanceLevel()
00996        << endl;
00997   
00998   lReturnValue->erase(i++);
00999       }else{
01000   i++;
01001       }
01002     }
01003   }
01004 
01005 #ifndef NOSCREENOUTPUT
01006   stampTime();
01007   cout << "postsort" 
01008        << flush 
01009        << endl;
01010 #endif
01011   return lReturnValue;
01012 }

double CQInvertedFile::DIDToScore ( TID  inDID,
const CWeightingFunctionPointerHash inQuery 
) const

Returns t