c/libstdcpp-api/a01383_source.html

00001 // class template regex -*- C++ -*-
00002
00003 // Copyright (C) 2010, 2011 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024
00025 /**
00026  *  @file bits/regex_compiler.h
00027  *  This is an internal header file, included by other library headers.
00028  *  Do not attempt to use it directly. @headername{regex}
00029  */
00030
00031 namespace std _GLIBCXX_VISIBILITY(default)
00032 {
00033 namespace __regex
00034 {
00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00036
00037   struct _Scanner_base
00038   {
00039     typedef unsigned int _StateT;
00040
00041     static constexpr _StateT _S_state_at_start    = 1 << 0;
00042     static constexpr _StateT _S_state_in_brace    = 1 << 2;
00043     static constexpr _StateT _S_state_in_bracket  = 1 << 3;
00044
00045     virtual ~_Scanner_base() { };
00046   };
00047
00048   //
00049   // @brief Scans an input range for regex tokens.
00050   //
00051   // The %_Scanner class interprets the regular expression pattern in the input
00052   // range passed to its constructor as a sequence of parse tokens passed to
00053   // the regular expression compiler.  The sequence of tokens provided depends
00054   // on the flag settings passed to the constructor:  different regular
00055   // expression grammars will interpret the same input pattern in
00056   // syntactically different ways.
00057   //
00058   template<typename _InputIterator>
00059     class _Scanner: public _Scanner_base
00060     {
00061     public:
00062       typedef _InputIterator                                        _IteratorT;
00063       typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
00064       typedef std::basic_string<_CharT>                             _StringT;
00065       typedef regex_constants::syntax_option_type                   _FlagT;
00066       typedef const std::ctype<_CharT>                              _CtypeT;
00067
00068       // Token types returned from the scanner.
00069       enum _TokenT
00070       {
00071     _S_token_anychar,
00072     _S_token_backref,
00073     _S_token_bracket_begin,
00074     _S_token_bracket_end,
00075     _S_token_inverse_class,
00076     _S_token_char_class_name,
00077     _S_token_closure0,
00078     _S_token_closure1,
00079     _S_token_collelem_multi,
00080     _S_token_collelem_single,
00081     _S_token_collsymbol,
00082     _S_token_comma,
00083     _S_token_dash,
00084     _S_token_dup_count,
00085     _S_token_eof,
00086     _S_token_equiv_class_name,
00087     _S_token_interval_begin,
00088     _S_token_interval_end,
00089     _S_token_line_begin,
00090     _S_token_line_end,
00091     _S_token_opt,
00092     _S_token_or,
00093     _S_token_ord_char,
00094     _S_token_quoted_char,
00095     _S_token_subexpr_begin,
00096     _S_token_subexpr_end,
00097     _S_token_word_begin,
00098     _S_token_word_end,
00099     _S_token_unknown
00100       };
00101
00102     public:
00103       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
00104            std::locale __loc)
00105       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
00106         _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
00107       { _M_advance(); }
00108
00109       void
00110       _M_advance();
00111
00112       _TokenT
00113       _M_token() const
00114       { return _M_curToken; }
00115
00116       const _StringT&
00117       _M_value() const
00118       { return _M_curValue; }
00119
00120 #ifdef _GLIBCXX_DEBUG
00121       std::ostream&
00122       _M_print(std::ostream&);
00123 #endif
00124
00125     private:
00126       void
00127       _M_eat_escape();
00128
00129       void
00130       _M_scan_in_brace();
00131
00132       void
00133       _M_scan_in_bracket();
00134
00135       void
00136       _M_eat_charclass();
00137
00138       void
00139       _M_eat_equivclass();
00140
00141       void
00142       _M_eat_collsymbol();
00143
00144     private:
00145       _IteratorT  _M_current;
00146       _IteratorT  _M_end;
00147       _FlagT      _M_flags;
00148       _CtypeT&    _M_ctype;
00149       _TokenT     _M_curToken;
00150       _StringT    _M_curValue;
00151       _StateT     _M_state;
00152     };
00153
00154   template<typename _InputIterator>
00155     void
00156     _Scanner<_InputIterator>::
00157     _M_advance()
00158     {
00159       if (_M_current == _M_end)
00160     {
00161       _M_curToken = _S_token_eof;
00162       return;
00163     }
00164
00165       _CharT __c = *_M_current;
00166       if (_M_state & _S_state_in_bracket)
00167     {
00168       _M_scan_in_bracket();
00169       return;
00170     }
00171       if (_M_state & _S_state_in_brace)
00172     {
00173       _M_scan_in_brace();
00174       return;
00175     }
00176 #if 0
00177       // TODO: re-enable line anchors when _M_assertion is implemented.
00178       // See PR libstdc++/47724
00179       else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
00180     {
00181       _M_curToken = _S_token_line_begin;
00182       ++_M_current;
00183       return;
00184     }
00185       else if (__c == _M_ctype.widen('$'))
00186     {
00187       _M_curToken = _S_token_line_end;
00188       ++_M_current;
00189       return;
00190     }
00191 #endif
00192       else if (__c == _M_ctype.widen('.'))
00193     {
00194       _M_curToken = _S_token_anychar;
00195       ++_M_current;
00196       return;
00197     }
00198       else if (__c == _M_ctype.widen('*'))
00199     {
00200       _M_curToken = _S_token_closure0;
00201       ++_M_current;
00202       return;
00203     }
00204       else if (__c == _M_ctype.widen('+'))
00205     {
00206       _M_curToken = _S_token_closure1;
00207       ++_M_current;
00208       return;
00209     }
00210       else if (__c == _M_ctype.widen('|'))
00211     {
00212       _M_curToken = _S_token_or;
00213       ++_M_current;
00214       return;
00215     }
00216       else if (__c == _M_ctype.widen('['))
00217     {
00218       _M_curToken = _S_token_bracket_begin;
00219       _M_state |= (_S_state_in_bracket | _S_state_at_start);
00220       ++_M_current;
00221       return;
00222     }
00223       else if (__c == _M_ctype.widen('\\'))
00224     {
00225       _M_eat_escape();
00226       return;
00227     }
00228       else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00229     {
00230       if (__c == _M_ctype.widen('('))
00231         {
00232           _M_curToken = _S_token_subexpr_begin;
00233           ++_M_current;
00234           return;
00235         }
00236       else if (__c == _M_ctype.widen(')'))
00237         {
00238           _M_curToken = _S_token_subexpr_end;
00239           ++_M_current;
00240           return;
00241         }
00242       else if (__c == _M_ctype.widen('{'))
00243         {
00244           _M_curToken = _S_token_interval_begin;
00245           _M_state |= _S_state_in_brace;
00246           ++_M_current;
00247           return;
00248         }
00249     }
00250
00251       _M_curToken = _S_token_ord_char;
00252       _M_curValue.assign(1, __c);
00253       ++_M_current;
00254     }
00255
00256
00257   template<typename _InputIterator>
00258     void
00259     _Scanner<_InputIterator>::
00260     _M_scan_in_brace()
00261     {
00262       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00263     {
00264       _M_curToken = _S_token_dup_count;
00265       _M_curValue.assign(1, *_M_current);
00266       ++_M_current;
00267       while (_M_current != _M_end
00268          && _M_ctype.is(_CtypeT::digit, *_M_current))
00269         {
00270           _M_curValue += *_M_current;
00271           ++_M_current;
00272         }
00273       return;
00274     }
00275       else if (*_M_current == _M_ctype.widen(','))
00276     {
00277       _M_curToken = _S_token_comma;
00278       ++_M_current;
00279       return;
00280     }
00281       if (_M_flags & (regex_constants::basic | regex_constants::grep))
00282     {
00283       if (*_M_current == _M_ctype.widen('\\'))
00284         _M_eat_escape();
00285     }
00286       else
00287     {
00288       if (*_M_current == _M_ctype.widen('}'))
00289         {
00290           _M_curToken = _S_token_interval_end;
00291           _M_state &= ~_S_state_in_brace;
00292           ++_M_current;
00293           return;
00294         }
00295     }
00296     }
00297
00298   template<typename _InputIterator>
00299     void
00300     _Scanner<_InputIterator>::
00301     _M_scan_in_bracket()
00302     {
00303       if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
00304     {
00305       _M_curToken = _S_token_inverse_class;
00306       _M_state &= ~_S_state_at_start;
00307       ++_M_current;
00308       return;
00309     }
00310       else if (*_M_current == _M_ctype.widen('['))
00311     {
00312       ++_M_current;
00313       if (_M_current == _M_end)
00314         {
00315           _M_curToken = _S_token_eof;
00316           return;
00317         }
00318
00319       if (*_M_current == _M_ctype.widen('.'))
00320         {
00321           _M_curToken = _S_token_collsymbol;
00322           _M_eat_collsymbol();
00323           return;
00324         }
00325       else if (*_M_current == _M_ctype.widen(':'))
00326         {
00327           _M_curToken = _S_token_char_class_name;
00328           _M_eat_charclass();
00329           return;
00330         }
00331       else if (*_M_current == _M_ctype.widen('='))
00332         {
00333           _M_curToken = _S_token_equiv_class_name;
00334           _M_eat_equivclass();
00335           return;
00336         }
00337     }
00338       else if (*_M_current == _M_ctype.widen('-'))
00339     {
00340       _M_curToken = _S_token_dash;
00341       ++_M_current;
00342       return;
00343     }
00344       else if (*_M_current == _M_ctype.widen(']'))
00345     {
00346       if (!(_M_flags & regex_constants::ECMAScript)
00347           || !(_M_state & _S_state_at_start))
00348         {
00349           // special case: only if  _not_ chr first after
00350           // '[' or '[^' and if not ECMAscript
00351           _M_curToken = _S_token_bracket_end;
00352           ++_M_current;
00353           return;
00354         }
00355     }
00356       _M_curToken = _S_token_collelem_single;
00357       _M_curValue.assign(1, *_M_current);
00358       ++_M_current;
00359     }
00360
00361   template<typename _InputIterator>
00362     void
00363     _Scanner<_InputIterator>::
00364     _M_eat_escape()
00365     {
00366       ++_M_current;
00367       if (_M_current == _M_end)
00368     {
00369       _M_curToken = _S_token_eof;
00370       return;
00371     }
00372       _CharT __c = *_M_current;
00373       ++_M_current;
00374
00375       if (__c == _M_ctype.widen('('))
00376     {
00377       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00378         {
00379           _M_curToken = _S_token_ord_char;
00380           _M_curValue.assign(1, __c);
00381         }
00382       else
00383         _M_curToken = _S_token_subexpr_begin;
00384     }
00385       else if (__c == _M_ctype.widen(')'))
00386     {
00387       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00388         {
00389           _M_curToken = _S_token_ord_char;
00390           _M_curValue.assign(1, __c);
00391         }
00392       else
00393         _M_curToken = _S_token_subexpr_end;
00394     }
00395       else if (__c == _M_ctype.widen('{'))
00396     {
00397       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00398         {
00399           _M_curToken = _S_token_ord_char;
00400           _M_curValue.assign(1, __c);
00401         }
00402       else
00403         {
00404           _M_curToken = _S_token_interval_begin;
00405           _M_state |= _S_state_in_brace;
00406         }
00407     }
00408       else if (__c == _M_ctype.widen('}'))
00409     {
00410       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00411         {
00412           _M_curToken = _S_token_ord_char;
00413           _M_curValue.assign(1, __c);
00414         }
00415       else
00416         {
00417           if (!(_M_state && _S_state_in_brace))
00418         __throw_regex_error(regex_constants::error_badbrace);
00419           _M_state &= ~_S_state_in_brace;
00420           _M_curToken = _S_token_interval_end;
00421         }
00422     }
00423       else if (__c == _M_ctype.widen('x'))
00424     {
00425       ++_M_current;
00426       if (_M_current == _M_end)
00427         {
00428           _M_curToken = _S_token_eof;
00429           return;
00430         }
00431       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00432         {
00433           _M_curValue.assign(1, *_M_current);
00434           ++_M_current;
00435           if (_M_current == _M_end)
00436         {
00437           _M_curToken = _S_token_eof;
00438           return;
00439         }
00440           if (_M_ctype.is(_CtypeT::digit, *_M_current))
00441         {
00442           _M_curValue += *_M_current;
00443           ++_M_current;
00444           return;
00445         }
00446         }
00447     }
00448       else if (__c == _M_ctype.widen('^')
00449            || __c == _M_ctype.widen('.')
00450            || __c == _M_ctype.widen('*')
00451            || __c == _M_ctype.widen('$')
00452            || __c == _M_ctype.widen('\\'))
00453     {
00454       _M_curToken = _S_token_ord_char;
00455       _M_curValue.assign(1, __c);
00456     }
00457       else if (_M_ctype.is(_CtypeT::digit, __c))
00458     {
00459       _M_curToken = _S_token_backref;
00460       _M_curValue.assign(1, __c);
00461     }
00462       else
00463     __throw_regex_error(regex_constants::error_escape);
00464     }
00465
00466
00467   // Eats a character class or throwns an exception.
00468   // current point to ':' delimiter on entry, char after ']' on return
00469   template<typename _InputIterator>
00470     void
00471     _Scanner<_InputIterator>::
00472     _M_eat_charclass()
00473     {
00474       ++_M_current; // skip ':'
00475       if (_M_current == _M_end)
00476     __throw_regex_error(regex_constants::error_ctype);
00477       for (_M_curValue.clear();
00478        _M_current != _M_end && *_M_current != _M_ctype.widen(':');
00479        ++_M_current)
00480     _M_curValue += *_M_current;
00481       if (_M_current == _M_end)
00482     __throw_regex_error(regex_constants::error_ctype);
00483       ++_M_current; // skip ':'
00484       if (*_M_current != _M_ctype.widen(']'))
00485     __throw_regex_error(regex_constants::error_ctype);
00486       ++_M_current; // skip ']'
00487     }
00488
00489
00490   template<typename _InputIterator>
00491     void
00492     _Scanner<_InputIterator>::
00493     _M_eat_equivclass()
00494     {
00495       ++_M_current; // skip '='
00496       if (_M_current == _M_end)
00497     __throw_regex_error(regex_constants::error_collate);
00498       for (_M_curValue.clear();
00499        _M_current != _M_end && *_M_current != _M_ctype.widen('=');
00500        ++_M_current)
00501     _M_curValue += *_M_current;
00502       if (_M_current == _M_end)
00503     __throw_regex_error(regex_constants::error_collate);
00504       ++_M_current; // skip '='
00505       if (*_M_current != _M_ctype.widen(']'))
00506     __throw_regex_error(regex_constants::error_collate);
00507       ++_M_current; // skip ']'
00508     }
00509
00510
00511   template<typename _InputIterator>
00512     void
00513     _Scanner<_InputIterator>::
00514     _M_eat_collsymbol()
00515     {
00516       ++_M_current; // skip '.'
00517       if (_M_current == _M_end)
00518     __throw_regex_error(regex_constants::error_collate);
00519       for (_M_curValue.clear();
00520        _M_current != _M_end && *_M_current != _M_ctype.widen('.');
00521        ++_M_current)
00522     _M_curValue += *_M_current;
00523       if (_M_current == _M_end)
00524     __throw_regex_error(regex_constants::error_collate);
00525       ++_M_current; // skip '.'
00526       if (*_M_current != _M_ctype.widen(']'))
00527     __throw_regex_error(regex_constants::error_collate);
00528       ++_M_current; // skip ']'
00529     }
00530
00531 #ifdef _GLIBCXX_DEBUG
00532   template<typename _InputIterator>
00533     std::ostream&
00534     _Scanner<_InputIterator>::
00535     _M_print(std::ostream& ostr)
00536     {
00537       switch (_M_curToken)
00538       {
00539     case _S_token_anychar:
00540       ostr << "any-character\n";
00541       break;
00542     case _S_token_backref:
00543       ostr << "backref\n";
00544       break;
00545     case _S_token_bracket_begin:
00546       ostr << "bracket-begin\n";
00547       break;
00548     case _S_token_bracket_end:
00549       ostr << "bracket-end\n";
00550       break;
00551     case _S_token_char_class_name:
00552       ostr << "char-class-name \"" << _M_curValue << "\"\n";
00553       break;
00554     case _S_token_closure0:
00555       ostr << "closure0\n";
00556       break;
00557     case _S_token_closure1:
00558       ostr << "closure1\n";
00559       break;
00560     case _S_token_collelem_multi:
00561       ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
00562       break;
00563     case _S_token_collelem_single:
00564       ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
00565       break;
00566     case _S_token_collsymbol:
00567       ostr << "collsymbol \"" << _M_curValue << "\"\n";
00568       break;
00569     case _S_token_comma:
00570       ostr << "comma\n";
00571       break;
00572     case _S_token_dash:
00573       ostr << "dash\n";
00574       break;
00575     case _S_token_dup_count:
00576       ostr << "dup count: " << _M_curValue << "\n";
00577       break;
00578     case _S_token_eof:
00579       ostr << "EOF\n";
00580       break;
00581     case _S_token_equiv_class_name:
00582       ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
00583       break;
00584     case _S_token_interval_begin:
00585       ostr << "interval begin\n";
00586       break;
00587     case _S_token_interval_end:
00588       ostr << "interval end\n";
00589       break;
00590     case _S_token_line_begin:
00591       ostr << "line begin\n";
00592       break;
00593     case _S_token_line_end:
00594       ostr << "line end\n";
00595       break;
00596     case _S_token_opt:
00597       ostr << "opt\n";
00598       break;
00599     case _S_token_or:
00600       ostr << "or\n";
00601       break;
00602     case _S_token_ord_char:
00603       ostr << "ordinary character: \"" << _M_value() << "\"\n";
00604       break;
00605     case _S_token_quoted_char:
00606       ostr << "quoted char\n";
00607       break;
00608     case _S_token_subexpr_begin:
00609       ostr << "subexpr begin\n";
00610       break;
00611     case _S_token_subexpr_end:
00612       ostr << "subexpr end\n";
00613       break;
00614     case _S_token_word_begin:
00615       ostr << "word begin\n";
00616       break;
00617     case _S_token_word_end:
00618       ostr << "word end\n";
00619       break;
00620     case _S_token_unknown:
00621       ostr << "-- unknown token --\n";
00622       break;
00623       }
00624       return ostr;
00625     }
00626 #endif
00627
00628   // Builds an NFA from an input iterator interval.
00629   template<typename _InIter, typename _TraitsT>
00630     class _Compiler
00631     {
00632     public:
00633       typedef _InIter                                            _IterT;
00634       typedef typename std::iterator_traits<_InIter>::value_type _CharT;
00635       typedef std::basic_string<_CharT>                          _StringT;
00636       typedef regex_constants::syntax_option_type                _FlagT;
00637
00638     public:
00639       _Compiler(const _InIter& __b, const _InIter& __e,
00640         _TraitsT& __traits, _FlagT __flags);
00641
00642       const _Nfa&
00643       _M_nfa() const
00644       { return _M_state_store; }
00645
00646     private:
00647       typedef _Scanner<_InIter>                              _ScannerT;
00648       typedef typename _ScannerT::_TokenT                    _TokenT;
00649       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
00650       typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
00651
00652       // accepts a specific token or returns false.
00653       bool
00654       _M_match_token(_TokenT __token);
00655
00656       void
00657       _M_disjunction();
00658
00659       bool
00660       _M_alternative();
00661
00662       bool
00663       _M_term();
00664
00665       bool
00666       _M_assertion();
00667
00668       bool
00669       _M_quantifier();
00670
00671       bool
00672       _M_atom();
00673
00674       bool
00675       _M_bracket_expression();
00676
00677       bool
00678       _M_bracket_list(_RMatcherT& __matcher);
00679
00680       bool
00681       _M_follow_list(_RMatcherT& __matcher);
00682
00683       bool
00684       _M_follow_list2(_RMatcherT& __matcher);
00685
00686       bool
00687       _M_expression_term(_RMatcherT& __matcher);
00688
00689       bool
00690       _M_range_expression(_RMatcherT& __matcher);
00691
00692       bool
00693       _M_start_range(_RMatcherT& __matcher);
00694
00695       bool
00696       _M_collating_symbol(_RMatcherT& __matcher);
00697
00698       bool
00699       _M_equivalence_class(_RMatcherT& __matcher);
00700
00701       bool
00702       _M_character_class(_RMatcherT& __matcher);
00703
00704       int
00705       _M_cur_int_value(int __radix);
00706
00707     private:
00708       _TraitsT&      _M_traits;
00709       _ScannerT      _M_scanner;
00710       _StringT       _M_cur_value;
00711       _Nfa           _M_state_store;
00712       _StackT        _M_stack;
00713     };
00714
00715   template<typename _InIter, typename _TraitsT>
00716     _Compiler<_InIter, _TraitsT>::
00717     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
00718           _Compiler<_InIter, _TraitsT>::_FlagT __flags)
00719     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
00720       _M_state_store(__flags)
00721     {
00722       typedef _StartTagger<_InIter, _TraitsT> _Start;
00723       typedef _EndTagger<_InIter, _TraitsT> _End;
00724
00725       _StateSeq __r(_M_state_store,
00726                 _M_state_store._M_insert_subexpr_begin(_Start(0)));
00727       _M_disjunction();
00728       if (!_M_stack.empty())
00729     {
00730       __r._M_append(_M_stack.top());
00731       _M_stack.pop();
00732     }
00733       __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
00734       __r._M_append(_M_state_store._M_insert_accept());
00735     }
00736
00737   template<typename _InIter, typename _TraitsT>
00738     bool
00739     _Compiler<_InIter, _TraitsT>::
00740     _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
00741     {
00742       if (token == _M_scanner._M_token())
00743     {
00744       _M_cur_value = _M_scanner._M_value();
00745       _M_scanner._M_advance();
00746       return true;
00747     }
00748       return false;
00749     }
00750
00751   template<typename _InIter, typename _TraitsT>
00752     void
00753     _Compiler<_InIter, _TraitsT>::
00754     _M_disjunction()
00755     {
00756       this->_M_alternative();
00757       if (_M_match_token(_ScannerT::_S_token_or))
00758     {
00759       _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
00760       this->_M_disjunction();
00761       _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
00762       _M_stack.push(_StateSeq(__alt1, __alt2));
00763     }
00764     }
00765
00766   template<typename _InIter, typename _TraitsT>
00767     bool
00768     _Compiler<_InIter, _TraitsT>::
00769     _M_alternative()
00770     {
00771       if (this->_M_term())
00772     {
00773       _StateSeq __re = _M_stack.top(); _M_stack.pop();
00774       this->_M_alternative();
00775       if (!_M_stack.empty())
00776         {
00777           __re._M_append(_M_stack.top());
00778           _M_stack.pop();
00779         }
00780       _M_stack.push(__re);
00781       return true;
00782     }
00783       return false;
00784     }
00785
00786   template<typename _InIter, typename _TraitsT>
00787     bool
00788     _Compiler<_InIter, _TraitsT>::
00789     _M_term()
00790     {
00791       if (this->_M_assertion())
00792     return true;
00793       if (this->_M_atom())
00794     {
00795       this->_M_quantifier();
00796       return true;
00797     }
00798       return false;
00799     }
00800
00801   template<typename _InIter, typename _TraitsT>
00802     bool
00803     _Compiler<_InIter, _TraitsT>::
00804     _M_assertion()
00805     {
00806       if (_M_match_token(_ScannerT::_S_token_line_begin))
00807     {
00808       // __m.push(_Matcher::_S_opcode_line_begin);
00809       return true;
00810     }
00811       if (_M_match_token(_ScannerT::_S_token_line_end))
00812     {
00813       // __m.push(_Matcher::_S_opcode_line_end);
00814       return true;
00815     }
00816       if (_M_match_token(_ScannerT::_S_token_word_begin))
00817     {
00818       // __m.push(_Matcher::_S_opcode_word_begin);
00819       return true;
00820     }
00821       if (_M_match_token(_ScannerT::_S_token_word_end))
00822     {
00823       // __m.push(_Matcher::_S_opcode_word_end);
00824       return true;
00825     }
00826       return false;
00827     }
00828
00829   template<typename _InIter, typename _TraitsT>
00830     bool
00831     _Compiler<_InIter, _TraitsT>::
00832     _M_quantifier()
00833     {
00834       if (_M_match_token(_ScannerT::_S_token_closure0))
00835     {
00836       if (_M_stack.empty())
00837         __throw_regex_error(regex_constants::error_badrepeat);
00838       _StateSeq __r(_M_stack.top(), -1);
00839       __r._M_append(__r._M_front());
00840       _M_stack.pop();
00841       _M_stack.push(__r);
00842       return true;
00843     }
00844       if (_M_match_token(_ScannerT::_S_token_closure1))
00845     {
00846       if (_M_stack.empty())
00847         __throw_regex_error(regex_constants::error_badrepeat);
00848       _StateSeq __r(_M_state_store,
00849             _M_state_store.
00850             _M_insert_alt(_S_invalid_state_id,
00851                       _M_stack.top()._M_front()));
00852       _M_stack.top()._M_append(__r);
00853       return true;
00854     }
00855       if (_M_match_token(_ScannerT::_S_token_opt))
00856     {
00857       if (_M_stack.empty())
00858       __throw_regex_error(regex_constants::error_badrepeat);
00859       _StateSeq __r(_M_stack.top(), -1);
00860       _M_stack.pop();
00861       _M_stack.push(__r);
00862       return true;
00863     }
00864       if (_M_match_token(_ScannerT::_S_token_interval_begin))
00865     {
00866       if (_M_stack.empty())
00867         __throw_regex_error(regex_constants::error_badrepeat);
00868       if (!_M_match_token(_ScannerT::_S_token_dup_count))
00869         __throw_regex_error(regex_constants::error_badbrace);
00870       _StateSeq __r(_M_stack.top());
00871       int __min_rep = _M_cur_int_value(10);
00872       for (int __i = 1; __i < __min_rep; ++__i)
00873         _M_stack.top()._M_append(__r._M_clone());
00874       if (_M_match_token(_ScannerT::_S_token_comma))
00875         if (_M_match_token(_ScannerT::_S_token_dup_count))
00876           {
00877         int __n = _M_cur_int_value(10) - __min_rep;
00878         if (__n < 0)
00879           __throw_regex_error(regex_constants::error_badbrace);
00880         for (int __i = 0; __i < __n; ++__i)
00881           {
00882             _StateSeq __r(_M_state_store,
00883                   _M_state_store.
00884                   _M_insert_alt(_S_invalid_state_id,
00885                         _M_stack.top()._M_front()));
00886             _M_stack.top()._M_append(__r);
00887           }
00888           }
00889         else
00890           {
00891         _StateSeq __r(_M_stack.top(), -1);
00892         __r._M_push_back(__r._M_front());
00893         _M_stack.pop();
00894         _M_stack.push(__r);
00895           }
00896       if (!_M_match_token(_ScannerT::_S_token_interval_end))
00897         __throw_regex_error(regex_constants::error_brace);
00898       return true;
00899     }
00900       return false;
00901     }
00902
00903   template<typename _InIter, typename _TraitsT>
00904     bool
00905     _Compiler<_InIter, _TraitsT>::
00906     _M_atom()
00907     {
00908       typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
00909       typedef _StartTagger<_InIter, _TraitsT> _Start;
00910       typedef _EndTagger<_InIter, _TraitsT> _End;
00911
00912       if (_M_match_token(_ScannerT::_S_token_anychar))
00913     {
00914       _M_stack.push(_StateSeq(_M_state_store,
00915                                   _M_state_store._M_insert_matcher
00916                                   (_AnyMatcher)));
00917       return true;
00918     }
00919       if (_M_match_token(_ScannerT::_S_token_ord_char))
00920     {
00921       _M_stack.push(_StateSeq(_M_state_store,
00922                                   _M_state_store._M_insert_matcher
00923                                   (_CMatcher(_M_cur_value[0], _M_traits))));
00924       return true;
00925     }
00926       if (_M_match_token(_ScannerT::_S_token_quoted_char))
00927     {
00928       // note that in the ECMA grammar, this case covers backrefs.
00929       _M_stack.push(_StateSeq(_M_state_store,
00930                   _M_state_store._M_insert_matcher
00931                   (_CMatcher(_M_cur_value[0], _M_traits))));
00932       return true;
00933     }
00934       if (_M_match_token(_ScannerT::_S_token_backref))
00935     {
00936       // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
00937       return true;
00938     }
00939       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
00940     {
00941       int __mark = _M_state_store._M_sub_count();
00942       _StateSeq __r(_M_state_store,
00943             _M_state_store.
00944             _M_insert_subexpr_begin(_Start(__mark)));
00945       this->_M_disjunction();
00946       if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
00947         __throw_regex_error(regex_constants::error_paren);
00948       if (!_M_stack.empty())
00949         {
00950           __r._M_append(_M_stack.top());
00951           _M_stack.pop();
00952         }
00953       __r._M_append(_M_state_store._M_insert_subexpr_end
00954             (__mark, _End(__mark)));
00955       _M_stack.push(__r);
00956       return true;
00957     }
00958       return _M_bracket_expression();
00959     }
00960
00961   template<typename _InIter, typename _TraitsT>
00962     bool
00963     _Compiler<_InIter, _TraitsT>::
00964     _M_bracket_expression()
00965     {
00966       if (_M_match_token(_ScannerT::_S_token_bracket_begin))
00967     {
00968       _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
00969                    _M_traits);
00970       if (!_M_bracket_list(__matcher)
00971           || !_M_match_token(_ScannerT::_S_token_bracket_end))
00972         __throw_regex_error(regex_constants::error_brack);
00973       _M_stack.push(_StateSeq(_M_state_store,
00974                   _M_state_store._M_insert_matcher(__matcher)));
00975       return true;
00976     }
00977       return false;
00978     }
00979
00980   // If the dash is the last character in the bracket expression, it is not
00981   // special.
00982   template<typename _InIter, typename _TraitsT>
00983     bool
00984     _Compiler<_InIter, _TraitsT>::
00985     _M_bracket_list(_RMatcherT& __matcher)
00986     {
00987       if (_M_follow_list(__matcher))
00988     {
00989       if (_M_match_token(_ScannerT::_S_token_dash))
00990         __matcher._M_add_char(_M_cur_value[0]);
00991       return true;
00992     }
00993       return false;
00994     }
00995
00996   template<typename _InIter, typename _TraitsT>
00997     bool
00998     _Compiler<_InIter, _TraitsT>::
00999     _M_follow_list(_RMatcherT& __matcher)
01000     { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
01001
01002   template<typename _InIter, typename _TraitsT>
01003     bool
01004     _Compiler<_InIter, _TraitsT>::
01005     _M_follow_list2(_RMatcherT& __matcher)
01006     {
01007       if (_M_expression_term(__matcher))
01008     return _M_follow_list2(__matcher);
01009       return true;
01010     }
01011
01012   template<typename _InIter, typename _TraitsT>
01013     bool
01014     _Compiler<_InIter, _TraitsT>::
01015     _M_expression_term(_RMatcherT& __matcher)
01016     {
01017       return (_M_collating_symbol(__matcher)
01018           || _M_character_class(__matcher)
01019           || _M_equivalence_class(__matcher)
01020           || (_M_start_range(__matcher)
01021           && _M_range_expression(__matcher)));
01022     }
01023
01024   template<typename _InIter, typename _TraitsT>
01025     bool
01026     _Compiler<_InIter, _TraitsT>::
01027     _M_range_expression(_RMatcherT& __matcher)
01028     {
01029       if (!_M_collating_symbol(__matcher))
01030     if (!_M_match_token(_ScannerT::_S_token_dash))
01031       __throw_regex_error(regex_constants::error_range);
01032       __matcher._M_make_range();
01033       return true;
01034     }
01035
01036   template<typename _InIter, typename _TraitsT>
01037     bool
01038     _Compiler<_InIter, _TraitsT>::
01039     _M_start_range(_RMatcherT& __matcher)
01040     { return _M_match_token(_ScannerT::_S_token_dash); }
01041
01042   template<typename _InIter, typename _TraitsT>
01043     bool
01044     _Compiler<_InIter, _TraitsT>::
01045     _M_collating_symbol(_RMatcherT& __matcher)
01046     {
01047       if (_M_match_token(_ScannerT::_S_token_collelem_single))
01048     {
01049       __matcher._M_add_char(_M_cur_value[0]);
01050       return true;
01051     }
01052       if (_M_match_token(_ScannerT::_S_token_collsymbol))
01053     {
01054       __matcher._M_add_collating_element(_M_cur_value);
01055       return true;
01056     }
01057       return false;
01058     }
01059
01060   template<typename _InIter, typename _TraitsT>
01061     bool
01062     _Compiler<_InIter, _TraitsT>::
01063     _M_equivalence_class(_RMatcherT& __matcher)
01064     {
01065       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
01066     {
01067       __matcher._M_add_equivalence_class(_M_cur_value);
01068       return true;
01069     }
01070       return false;
01071     }
01072
01073   template<typename _InIter, typename _TraitsT>
01074     bool
01075     _Compiler<_InIter, _TraitsT>::
01076     _M_character_class(_RMatcherT& __matcher)
01077     {
01078       if (_M_match_token(_ScannerT::_S_token_char_class_name))
01079     {
01080       __matcher._M_add_character_class(_M_cur_value);
01081       return true;
01082     }
01083       return false;
01084     }
01085
01086   template<typename _InIter, typename _TraitsT>
01087     int
01088     _Compiler<_InIter, _TraitsT>::
01089     _M_cur_int_value(int __radix)
01090     {
01091       int __v = 0;
01092       for (typename _StringT::size_type __i = 0;
01093        __i < _M_cur_value.length(); ++__i)
01094     __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
01095       return __v;
01096     }
01097
01098   template<typename _InIter, typename _TraitsT>
01099     _AutomatonPtr
01100     __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
01101           regex_constants::syntax_option_type __f)
01102     { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
01103                                         __f)._M_nfa())); }
01104
01105 _GLIBCXX_END_NAMESPACE_VERSION
01106 } // namespace __regex
01107 } // namespace std
01108
01109 /* vim: set ts=8 sw=2 sts=2: */