include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* Copyright 1999,2000,2001 BrightStation PLC
00005  * Copyright 2001,2002 Ananova Ltd
00006  * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00025 #define XAPIAN_INCLUDED_ENQUIRE_H
00026 
00027 #include <string>
00028 
00029 #include <xapian/base.h>
00030 #include <xapian/deprecated.h>
00031 #include <xapian/error.h>
00032 #include <xapian/types.h>
00033 #include <xapian/termiterator.h>
00034 #include <xapian/visibility.h>
00035 
00036 namespace Xapian {
00037 
00038 class Database;
00039 class Document;
00040 class ErrorHandler;
00041 class ExpandDecider;
00042 class MSetIterator;
00043 class Query;
00044 class Weight;
00045 
00049 class XAPIAN_VISIBILITY_DEFAULT MSet {
00050     public:
00051         class Internal;
00053         Xapian::Internal::RefCntPtr<Internal> internal;
00054 
00056         explicit MSet(MSet::Internal * internal_);
00057 
00059         MSet();
00060 
00062         ~MSet();
00063 
00065         MSet(const MSet & other);
00066 
00068         void operator=(const MSet &other);
00069 
00085         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00086 
00089         void fetch(const MSetIterator &item) const;
00090 
00093         void fetch() const;
00094 
00099         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00100 
00102         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00103 
00111         Xapian::doccount get_termfreq(const std::string &tname) const;
00112 
00120         Xapian::weight get_termweight(const std::string &tname) const;
00121 
00129         Xapian::doccount get_firstitem() const;
00130 
00140         Xapian::doccount get_matches_lower_bound() const;
00141 
00154         Xapian::doccount get_matches_estimated() const;
00155 
00165         Xapian::doccount get_matches_upper_bound() const;
00166 
00172         Xapian::weight get_max_possible() const;
00173 
00187         Xapian::weight get_max_attained() const;
00188 
00190         Xapian::doccount size() const;
00191 
00193         Xapian::doccount max_size() const { return size(); }
00194 
00196         bool empty() const;
00197 
00199         void swap(MSet & other);
00200 
00202         MSetIterator begin() const;
00203 
00205         MSetIterator end() const;
00206 
00208         MSetIterator back() const;
00209 
00219         MSetIterator operator[](Xapian::doccount i) const;
00220 
00222 
00223         typedef MSetIterator value_type; // FIXME: not assignable...
00224         typedef MSetIterator iterator;
00225         typedef MSetIterator const_iterator;
00226         typedef MSetIterator & reference; // Hmm
00227         typedef MSetIterator & const_reference;
00228         typedef MSetIterator * pointer; // Hmm
00229         typedef Xapian::doccount_diff difference_type;
00230         typedef Xapian::doccount size_type;
00232 
00236         std::string get_description() const;
00237 };
00238 
00242 class XAPIAN_VISIBILITY_DEFAULT MSetIterator {
00243     private:
00244         friend class MSet;
00245         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00246         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00247 
00248         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00249             : index(index_), mset(mset_) { }
00250 
00251         Xapian::doccount index;
00252         MSet mset;
00253 
00254     public:
00258         MSetIterator() : index(0), mset() { }
00259 
00260         ~MSetIterator() { }
00261 
00263         MSetIterator(const MSetIterator &other) {
00264             index = other.index;
00265             mset = other.mset;
00266         }
00267 
00269         void operator=(const MSetIterator &other) {
00270             index = other.index;
00271             mset = other.mset;
00272         }
00273 
00275         MSetIterator & operator++() {
00276             ++index;
00277             return *this;
00278         }
00279 
00281         MSetIterator operator++(int) {
00282             MSetIterator tmp = *this;
00283             ++index;
00284             return tmp;
00285         }
00286 
00288         MSetIterator & operator--() {
00289             --index;
00290             return *this;
00291         }
00292 
00294         MSetIterator operator--(int) {
00295             MSetIterator tmp = *this;
00296             --index;
00297             return tmp;
00298         }
00299 
00301         Xapian::docid operator*() const;
00302 
00319         Xapian::Document get_document() const;
00320 
00327         Xapian::doccount get_rank() const {
00328             return mset.get_firstitem() + index;
00329         }
00330 
00332         Xapian::weight get_weight() const;
00333 
00336         std::string get_collapse_key() const;
00337 
00354         Xapian::doccount get_collapse_count() const;
00355 
00361         Xapian::percent get_percent() const;
00362 
00366         std::string get_description() const;
00367 
00369 
00370         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator
00371         typedef Xapian::docid value_type;
00372         typedef Xapian::doccount_diff difference_type;
00373         typedef Xapian::docid * pointer;
00374         typedef Xapian::docid & reference;
00376 };
00377 
00378 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00379 {
00380     return (a.index == b.index);
00381 }
00382 
00383 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00384 {
00385     return (a.index != b.index);
00386 }
00387 
00388 class ESetIterator;
00389 
00394 class XAPIAN_VISIBILITY_DEFAULT ESet {
00395     public:
00396         class Internal;
00398         Xapian::Internal::RefCntPtr<Internal> internal;
00399 
00401         ESet();
00402 
00404         ~ESet();
00405 
00407         ESet(const ESet & other);
00408 
00410         void operator=(const ESet &other);
00411 
00416         Xapian::termcount get_ebound() const;
00417 
00419         Xapian::termcount size() const;
00420 
00422         Xapian::termcount max_size() const { return size(); }
00423 
00425         bool empty() const;
00426 
00428         void swap(ESet & other);
00429 
00431         ESetIterator begin() const;
00432 
00434         ESetIterator end() const;
00435 
00437         ESetIterator back() const;
00438 
00440         ESetIterator operator[](Xapian::termcount i) const;
00441 
00446         std::string get_description() const;
00447 };
00448 
00450 class XAPIAN_VISIBILITY_DEFAULT ESetIterator {
00451     private:
00452         friend class ESet;
00453         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00454         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00455 
00456         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00457             : index(index_), eset(eset_) { }
00458 
00459         Xapian::termcount index;
00460         ESet eset;
00461 
00462     public:
00466         ESetIterator() : index(0), eset() { }
00467 
00468         ~ESetIterator() { }
00469 
00471         ESetIterator(const ESetIterator &other) {
00472             index = other.index;
00473             eset = other.eset;
00474         }
00475 
00477         void operator=(const ESetIterator &other) {
00478             index = other.index;
00479             eset = other.eset;
00480         }
00481 
00483         ESetIterator & operator++() {
00484             ++index;
00485             return *this;
00486         }
00487 
00489         ESetIterator operator++(int) {
00490             ESetIterator tmp = *this;
00491             ++index;
00492             return tmp;
00493         }
00494 
00496         ESetIterator & operator--() {
00497             --index;
00498             return *this;
00499         }
00500 
00502         ESetIterator operator--(int) {
00503             ESetIterator tmp = *this;
00504             --index;
00505             return tmp;
00506         }
00507 
00509         const std::string & operator *() const;
00510 
00512         Xapian::weight get_weight() const;
00513 
00517         std::string get_description() const;
00518 
00520 
00521         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator!
00522         typedef std::string value_type;
00523         typedef Xapian::termcount_diff difference_type;
00524         typedef std::string * pointer;
00525         typedef std::string & reference;
00527 };
00528 
00529 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00530 {
00531     return (a.index == b.index);
00532 }
00533 
00534 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00535 {
00536     return (a.index != b.index);
00537 }
00538 
00543 class XAPIAN_VISIBILITY_DEFAULT RSet {
00544     public:
00546         class Internal;
00547 
00549         Xapian::Internal::RefCntPtr<Internal> internal;
00550 
00552         RSet(const RSet &rset);
00553 
00555         void operator=(const RSet &rset);
00556 
00558         RSet();
00559 
00561         ~RSet();
00562 
00564         Xapian::doccount size() const;
00565 
00567         bool empty() const;
00568 
00570         void add_document(Xapian::docid did);
00571 
00573         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00574 
00576         void remove_document(Xapian::docid did);
00577 
00579         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00580 
00582         bool contains(Xapian::docid did) const;
00583 
00585         bool contains(const Xapian::MSetIterator & i) const { return contains(*i); }
00586 
00591         std::string get_description() const;
00592 };
00593 
00596 class XAPIAN_VISIBILITY_DEFAULT MatchDecider {
00597     public:
00600         virtual bool operator()(const Xapian::Document &doc) const = 0;
00601 
00603         virtual ~MatchDecider();
00604 };
00605 
00616 class XAPIAN_VISIBILITY_DEFAULT Enquire {
00617     private:
00619         Enquire(const Enquire &);
00620 
00622         void operator=(const Enquire &);
00623 
00624     public:
00625         class Internal;
00627         Xapian::Internal::RefCntPtr<Internal> internal;
00628 
00653         explicit Enquire(const Database &database, ErrorHandler * errorhandler_ = 0);
00654 
00657         ~Enquire();
00658 
00665         void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00666 
00673         const Xapian::Query & get_query() const;
00674 
00681         void set_weighting_scheme(const Weight &weight_);
00682 
00709         void set_collapse_key(Xapian::valueno collapse_key);
00710 
00711         typedef enum {
00712             ASCENDING = 1,
00713             DESCENDING = 0,
00714             DONT_CARE = 2
00715         } docid_order;
00716 
00740         void set_docid_order(docid_order order);
00741 
00760         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00761 
00766         void set_sort_by_relevance();
00767 
00780         void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
00781 
00795         void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00796                                               bool ascending = true);
00797 
00817         void set_sort_by_relevance_then_value(Xapian::valueno sort_key,
00818                                               bool ascending = true);
00819 
00845         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00846                       Xapian::doccount checkatleast = 0,
00847                       const RSet * omrset = 0,
00848                       const MatchDecider * mdecider = 0) const;
00849         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00850                       const RSet * omrset,
00851                       const MatchDecider * mdecider = 0) const {
00852             return get_mset(first, maxitems, 0, omrset, mdecider);
00853         }
00854 
00855         static const int INCLUDE_QUERY_TERMS = 1;
00856         static const int USE_EXACT_TERMFREQ = 2;
00857 #ifndef _MSC_VER
00858 
00859         XAPIAN_DEPRECATED(static const int include_query_terms) = 1;
00861         XAPIAN_DEPRECATED(static const int use_exact_termfreq) = 2;
00862 #else
00863         // Work around MSVC stupidity (you get a warning for deprecating a
00864         // declaration).
00865         static const int include_query_terms = 1;
00866         static const int use_exact_termfreq = 2;
00867 #pragma deprecated("Xapian::Enquire::include_query_terms", "Xapian::Enquire::use_exact_termfreq")
00868 #endif
00869 
00892         ESet get_eset(Xapian::termcount maxitems,
00893                         const RSet & omrset,
00894                         int flags = 0,
00895                         double k = 1.0,
00896                         const Xapian::ExpandDecider * edecider = 0) const;
00897 
00911         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00912                                const Xapian::ExpandDecider * edecider) const {
00913             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00914         }
00915 
00944         TermIterator get_matching_terms_begin(Xapian::docid did) const;
00945 
00947         TermIterator get_matching_terms_end(Xapian::docid /*did*/) const {
00948             return TermIterator(NULL);
00949         }
00950 
00973         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00974 
00976         TermIterator get_matching_terms_end(const MSetIterator &/*it*/) const {
00977             return TermIterator(NULL);
00978         }
00979 
00986         void register_match_decider(const std::string &name,
00987                                     const MatchDecider *mdecider = NULL);
00988 
00992         std::string get_description() const;
00993 };
00994 
00995 }
00996 
00997 class RemoteServer;
00998 
00999 namespace Xapian {
01000 
01002 class XAPIAN_VISIBILITY_DEFAULT Weight {
01003     friend class Enquire; // So Enquire can clone us
01004     friend class ::RemoteServer; // So RemoteServer can clone us - FIXME
01005     public:
01006         class Internal;
01007     protected:
01008         Weight(const Weight &);
01009     private:
01010         void operator=(Weight &);
01011 
01021         virtual Weight * clone() const = 0;
01022 
01023     protected:
01024         const Internal * internal; // Weight::Internal == StatsSource
01025         Xapian::doclength querysize;
01026         Xapian::termcount wqf;
01027         std::string tname;
01028 
01029     public:
01030         Weight() { }
01031         virtual ~Weight();
01032 
01045         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01046                         Xapian::termcount wqf_, const std::string & tname_) const;
01047 
01052         virtual std::string name() const = 0;
01053 
01055         virtual std::string serialise() const = 0;
01056 
01058         virtual Weight * unserialise(const std::string &s) const = 0;
01059 
01067         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01068                                       Xapian::doclength len) const = 0;
01069 
01075         virtual Xapian::weight get_maxpart() const = 0;
01076 
01085         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01086 
01090         virtual Xapian::weight get_maxextra() const = 0;
01091 
01093         virtual bool get_sumpart_needs_doclength() const; /* { return true; } */
01094 };
01095 
01097 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
01098     public:
01099         BoolWeight * clone() const;
01100         BoolWeight() { }
01101         ~BoolWeight();
01102         std::string name() const;
01103         std::string serialise() const;
01104         BoolWeight * unserialise(const std::string & s) const;
01105         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01106         Xapian::weight get_maxpart() const;
01107 
01108         Xapian::weight get_sumextra(Xapian::doclength len) const;
01109         Xapian::weight get_maxextra() const;
01110 
01111         bool get_sumpart_needs_doclength() const;
01112 };
01113 
01126 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
01127     private:
01128         mutable Xapian::weight termweight;
01129         mutable Xapian::doclength lenpart;
01130 
01131         double k1, k2, k3, b;
01132         Xapian::doclength min_normlen;
01133 
01134         mutable bool weight_calculated;
01135 
01136         void calc_termweight() const;
01137 
01138     public:
01157         BM25Weight(double k1_, double k2_, double k3_, double b_,
01158                    double min_normlen_)
01159                 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01160                   weight_calculated(false)
01161         {
01162             if (k1 < 0) k1 = 0;
01163             if (k2 < 0) k2 = 0;
01164             if (k3 < 0) k3 = 0;
01165             if (b < 0) b = 0; else if (b > 1) b = 1;
01166         }
01167         BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01168                        weight_calculated(false) { }
01169 
01170         BM25Weight * clone() const;
01171         ~BM25Weight() { }
01172         std::string name() const;
01173         std::string serialise() const;
01174         BM25Weight * unserialise(const std::string & s) const;
01175         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01176         Xapian::weight get_maxpart() const;
01177 
01178         Xapian::weight get_sumextra(Xapian::doclength len) const;
01179         Xapian::weight get_maxextra() const;
01180 
01181         bool get_sumpart_needs_doclength() const;
01182 };
01183 
01201 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
01202     private:
01203         mutable Xapian::weight termweight;
01204         mutable Xapian::doclength lenpart;
01205 
01206         double param_k;
01207 
01208         mutable bool weight_calculated;
01209 
01210         void calc_termweight() const;
01211 
01212     public:
01220         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01221             if (param_k < 0) param_k = 0;
01222         }
01223 
01224         TradWeight() : param_k(1.0), weight_calculated(false) { }
01225 
01226         TradWeight * clone() const;
01227         ~TradWeight() { }
01228         std::string name() const;
01229         std::string serialise() const;
01230         TradWeight * unserialise(const std::string & s) const;
01231 
01232         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01233         Xapian::weight get_maxpart() const;
01234 
01235         Xapian::weight get_sumextra(Xapian::doclength len) const;
01236         Xapian::weight get_maxextra() const;
01237 
01238         bool get_sumpart_needs_doclength() const;
01239 };
01240 
01241 }
01242 
01243 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 1.0.1).
Generated on 11 Jun 2007 by Doxygen 1.4.6.