libtpc  0.1
Textpressocentral core library
DataStructures.h
1 
9 #ifndef LIBTPC_QUERY_H
10 #define LIBTPC_QUERY_H
11 
12 #include <vector>
13 #include <string>
14 #include <lucene++/LuceneHeaders.h>
15 #include <cfloat>
16 
17 namespace tpc {
18 
19  namespace index {
20 
21  static const std::vector<std::string> LUCENE_SPECIAL_CHARS = {"!", "(", ")", "{", "}", "[", "]", "^",
22  "~", ":"};
23 
28  enum class QueryType {
29  document = 1, sentence = 2
30  };
31 
32  enum class DocumentType {
33  main = 1, external = 2
34  };
35 
44  struct SentenceSummary {
45  int sentence_id{-1};
46  int lucene_internal_id{-1};
47  double score{0};
48  };
49 
61  struct SentenceDetails : public SentenceSummary {
62  int doc_position_begin{-1};
63  int doc_position_end{-1};
64  std::string sentence_text;
65  std::string categories_string;
66  };
67 
77  struct Document {
78  std::string identifier;
79  double score{0};
80  std::string year;
81  int lucene_internal_id{-1};
82  DocumentType documentType{DocumentType::main};
83  };
84 
91  struct DocumentSummary : public Document {
92  std::vector <SentenceSummary> matching_sentences;
93  };
94 
109  struct DocumentDetails : public Document {
110  std::string filepath;
111  std::string fulltext;
112  std::string categories_string;
113  std::string abstract;
114  std::vector <std::string> corpora;
115  std::string accession;
116  std::string title;
117  std::string author;
118  std::string journal;
119  std::string type;
120  std::vector <SentenceDetails> sentences_details;
121  };
122 
126  class Query {
127  public:
128  QueryType type{QueryType::document};
129  std::string keyword{""};
130  std::string exclude_keyword{""};
131  std::string year{""};
132  std::string author{""};
133  std::string accession{""};
134  std::string journal{""};
135  std::string paper_type{""};
136  bool case_sensitive{false};
137  bool sort_by_year{false};
138  bool exact_match_author{false};
139  bool exact_match_journal{false};
140  bool categories_and_ed{true};
141  std::vector<std::string> literatures{};
142  std::vector<std::string> categories{};
143 
148  std::string get_query_text() const;
149  private:
150  void add_field_to_text_if_not_empty(const std::string& field_value, const std::string& lucene_field_name,
151  bool exact_match_field, std::string& query_text) const;
152  void add_categories_to_text(std::string& query_text) const;
153  };
154 
167  struct SearchResults {
168  Query query;
169  std::vector <DocumentSummary> hit_documents{};
170  size_t total_num_sentences{0};
171  double max_score{0};
172  double min_score{DBL_MAX};
173  Lucene::Collection <Lucene::ScoreDocPtr> partialIndexMatches{};
174  Lucene::Collection <Lucene::ScoreDocPtr> partialExternalMatches{};
175 
176  void update(const SearchResults &other);
177  };
178  }
179 }
180 
181 
182 #endif //LIBTPC_QUERY_H
results generated by a search
Definition: DataStructures.h:167
data structure that contains summary information related to a document as the result of a search ...
Definition: DataStructures.h:91
data structure that contains summary information related to a sentence as the result of a search ...
Definition: DataStructures.h:44
data structure that contains detailed information related to a sentence
Definition: DataStructures.h:61
data structure that contains detailed information related to a document as the result of a search ...
Definition: DataStructures.h:109
Definition: CASManager.h:16
generic information of a document
Definition: DataStructures.h:77
Definition: DataStructures.h:126