00001
00002 #ifndef _PCRE_RML_H
00003 #define _PCRE_RML_H
00004
00005 #include <pcrecpp.h>
00006 #include "../common/utilit.h"
00007 #include <iostream>
00008
00009 extern void RmlPcreMakeTables(vector<BYTE>& character_table, MorphLanguageEnum Langua);
00010
00011
00012 typedef pcrecpp::Arg Arg;
00013 extern Arg no_arg;
00014
00015 struct RML_RE_OPTIONS {
00016 pcrecpp::RE_Options opts;
00017 const BYTE* table;
00018 };
00019
00020
00021
00022
00023 class RML_RE {
00024 public:
00025 typedef pcrecpp::StringPiece StringPiece;
00026 typedef pcrecpp::RE_Options RE_Options;
00027
00028 class Options {
00029 public:
00030 pcrecpp::RE_Options options_;
00031 const BYTE* tableptr_;
00032 Options(void) : tableptr_(NULL) {};
00033 Options(const RE_Options &opts, const BYTE *tableptr=NULL) : options_(opts), tableptr_(tableptr) {};
00034 };
00035
00036 protected:
00037 string pattern_;
00038 Options opts_;
00039 pcre* re_full_;
00040 pcre* re_partial_;
00041 string error_;
00042 int match_limit_;
00043
00044 public:
00045
00046
00047 RML_RE(void)
00048 : re_full_(NULL), re_partial_(NULL), match_limit_(0)
00049 {};
00050
00051 RML_RE(const string& pat)
00052 : re_full_(NULL), re_partial_(NULL), match_limit_(0)
00053 { Init(pat); };
00054
00055 RML_RE(const string& pat, const Options &opts)
00056 : re_full_(NULL), re_partial_(NULL), match_limit_(0), opts_(opts)
00057 { Init(pat); };
00058
00059
00060 RML_RE(const string& pat, const vector<BYTE>& RegExpTables)
00061 : re_full_(NULL), re_partial_(NULL), match_limit_(0)
00062 {
00063 opts_.tableptr_ = RegExpTables.data();
00064 Init(pat);
00065 };
00066
00067
00068
00069 RML_RE(const RML_RE& re)
00070 : opts_(re.opts_), re_full_(NULL), re_partial_(NULL), match_limit_(0)
00071 { Init(re.pattern_); };
00072
00073
00074 void operator=(const RML_RE& re)
00075 {
00076 clear();
00077 opts_ = re.opts_;
00078 Init(re.pattern_);
00079 };
00080
00081
00082 void clear();
00083
00084 ~RML_RE() { clear(); }
00085
00086
00087
00088
00089 const string& pattern() const { return pattern_; }
00090
00091
00092 void pattern(const string &pattern) { clear(); Init(pattern); }
00093
00094
00095
00096 const string& error() const { return error_; }
00097
00098
00099
00100
00101
00102
00103
00104 bool FullMatch(const StringPiece& text,
00105 const Arg& ptr1 = no_arg,
00106 const Arg& ptr2 = no_arg,
00107 const Arg& ptr3 = no_arg,
00108 const Arg& ptr4 = no_arg,
00109 const Arg& ptr5 = no_arg,
00110 const Arg& ptr6 = no_arg,
00111 const Arg& ptr7 = no_arg,
00112 const Arg& ptr8 = no_arg,
00113 const Arg& ptr9 = no_arg,
00114 const Arg& ptr10 = no_arg,
00115 const Arg& ptr11 = no_arg,
00116 const Arg& ptr12 = no_arg,
00117 const Arg& ptr13 = no_arg,
00118 const Arg& ptr14 = no_arg,
00119 const Arg& ptr15 = no_arg,
00120 const Arg& ptr16 = no_arg) const;
00121
00122 bool PartialMatch(const StringPiece& text,
00123 const Arg& ptr1 = no_arg,
00124 const Arg& ptr2 = no_arg,
00125 const Arg& ptr3 = no_arg,
00126 const Arg& ptr4 = no_arg,
00127 const Arg& ptr5 = no_arg,
00128 const Arg& ptr6 = no_arg,
00129 const Arg& ptr7 = no_arg,
00130 const Arg& ptr8 = no_arg,
00131 const Arg& ptr9 = no_arg,
00132 const Arg& ptr10 = no_arg,
00133 const Arg& ptr11 = no_arg,
00134 const Arg& ptr12 = no_arg,
00135 const Arg& ptr13 = no_arg,
00136 const Arg& ptr14 = no_arg,
00137 const Arg& ptr15 = no_arg,
00138 const Arg& ptr16 = no_arg) const;
00139
00140 bool Consume(StringPiece* input,
00141 const Arg& ptr1 = no_arg,
00142 const Arg& ptr2 = no_arg,
00143 const Arg& ptr3 = no_arg,
00144 const Arg& ptr4 = no_arg,
00145 const Arg& ptr5 = no_arg,
00146 const Arg& ptr6 = no_arg,
00147 const Arg& ptr7 = no_arg,
00148 const Arg& ptr8 = no_arg,
00149 const Arg& ptr9 = no_arg,
00150 const Arg& ptr10 = no_arg,
00151 const Arg& ptr11 = no_arg,
00152 const Arg& ptr12 = no_arg,
00153 const Arg& ptr13 = no_arg,
00154 const Arg& ptr14 = no_arg,
00155 const Arg& ptr15 = no_arg,
00156 const Arg& ptr16 = no_arg) const;
00157
00158 bool FindAndConsume(StringPiece* input,
00159 const Arg& ptr1 = no_arg,
00160 const Arg& ptr2 = no_arg,
00161 const Arg& ptr3 = no_arg,
00162 const Arg& ptr4 = no_arg,
00163 const Arg& ptr5 = no_arg,
00164 const Arg& ptr6 = no_arg,
00165 const Arg& ptr7 = no_arg,
00166 const Arg& ptr8 = no_arg,
00167 const Arg& ptr9 = no_arg,
00168 const Arg& ptr10 = no_arg,
00169 const Arg& ptr11 = no_arg,
00170 const Arg& ptr12 = no_arg,
00171 const Arg& ptr13 = no_arg,
00172 const Arg& ptr14 = no_arg,
00173 const Arg& ptr15 = no_arg,
00174 const Arg& ptr16 = no_arg) const;
00175
00176 bool Replace(const StringPiece& rewrite,
00177 string *str) const;
00178
00179 int GlobalReplace(const StringPiece& rewrite,
00180 string *str) const;
00181
00182 bool Extract(const StringPiece &rewrite,
00183 const StringPiece &text,
00184 string *out) const;
00185
00186
00187
00188
00189 enum Anchor {
00190 UNANCHORED,
00191 ANCHOR_START,
00192 ANCHOR_BOTH
00193 };
00194
00195
00196
00197
00198 bool DoMatch(const StringPiece& text,
00199 Anchor anchor,
00200 int* consumed,
00201 const Arg* const* args, int n) const;
00202
00203
00204
00205 int NumberOfCapturingGroups();
00206
00207
00208
00209
00210 static bool isComplexPattern(const std::string &pat);
00211
00212 protected:
00213 void Init(const string& pattern);
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227 int TryMatch(const StringPiece& text,
00228 int startpos,
00229 Anchor anchor,
00230 int *vec,
00231 int vecsize) const;
00232
00233
00234
00235 bool Rewrite(string *out,
00236 const StringPiece& rewrite,
00237 const StringPiece& text,
00238 int *vec,
00239 int veclen) const;
00240
00241
00242 bool DoMatchImpl(const StringPiece& text,
00243 Anchor anchor,
00244 int* consumed,
00245 const Arg* const args[],
00246 int n,
00247 int* vec,
00248 int vecsize) const;
00249
00250
00251
00252 pcre* Compile(Anchor anchor);
00253
00254
00255
00256
00257
00258
00259
00260
00261 };
00262
00263
00264 #endif