libstdc++
codecvt.h
Go to the documentation of this file.
00001 // Locale support (codecvt) -*- C++ -*-
00002 
00003 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
00004 // 2009, 2010, 2011  Free Software Foundation, Inc.
00005 //
00006 // This file is part of the GNU ISO C++ Library.  This library is free
00007 // software; you can redistribute it and/or modify it under the
00008 // terms of the GNU General Public License as published by the
00009 // Free Software Foundation; either version 3, or (at your option)
00010 // any later version.
00011 
00012 // This library is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public License for more details.
00016 
00017 // Under Section 7 of GPL version 3, you are granted additional
00018 // permissions described in the GCC Runtime Library Exception, version
00019 // 3.1, as published by the Free Software Foundation.
00020 
00021 // You should have received a copy of the GNU General Public License and
00022 // a copy of the GCC Runtime Library Exception along with this program;
00023 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00024 // <http://www.gnu.org/licenses/>.
00025 
00026 /** @file bits/codecvt.h
00027  *  This is an internal header file, included by other library headers.
00028  *  Do not attempt to use it directly. @headername{locale}
00029  */
00030 
00031 //
00032 // ISO C++ 14882: 22.2.1.5 Template class codecvt
00033 //
00034 
00035 // Written by Benjamin Kosnik <bkoz@redhat.com>
00036 
00037 #ifndef _CODECVT_H
00038 #define _CODECVT_H 1
00039 
00040 #pragma GCC system_header
00041 
00042 namespace std _GLIBCXX_VISIBILITY(default)
00043 {
00044 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00045 
00046   /// Empty base class for codecvt facet [22.2.1.5].
00047   class codecvt_base
00048   {
00049   public:
00050     enum result
00051     {
00052       ok,
00053       partial,
00054       error,
00055       noconv
00056     };
00057   };
00058 
00059   /**
00060    *  @brief  Common base for codecvt functions.
00061    *
00062    *  This template class provides implementations of the public functions
00063    *  that forward to the protected virtual functions.
00064    *
00065    *  This template also provides abstract stubs for the protected virtual
00066    *  functions.
00067   */
00068   template<typename _InternT, typename _ExternT, typename _StateT>
00069     class __codecvt_abstract_base
00070     : public locale::facet, public codecvt_base
00071     {
00072     public:
00073       // Types:
00074       typedef codecvt_base::result  result;
00075       typedef _InternT          intern_type;
00076       typedef _ExternT          extern_type;
00077       typedef _StateT           state_type;
00078 
00079       // 22.2.1.5.1 codecvt members
00080       /**
00081        *  @brief  Convert from internal to external character set.
00082        *
00083        *  Converts input string of intern_type to output string of
00084        *  extern_type.  This is analogous to wcsrtombs.  It does this by
00085        *  calling codecvt::do_out.
00086        *
00087        *  The source and destination character sets are determined by the
00088        *  facet's locale, internal and external types.
00089        *
00090        *  The characters in [from,from_end) are converted and written to
00091        *  [to,to_end).  from_next and to_next are set to point to the
00092        *  character following the last successfully converted character,
00093        *  respectively.  If the result needed no conversion, from_next and
00094        *  to_next are not affected.
00095        *
00096        *  The @a state argument should be initialized if the input is at the
00097        *  beginning and carried from a previous call if continuing
00098        *  conversion.  There are no guarantees about how @a state is used.
00099        *
00100        *  The result returned is a member of codecvt_base::result.  If
00101        *  all the input is converted, returns codecvt_base::ok.  If no
00102        *  conversion is necessary, returns codecvt_base::noconv.  If
00103        *  the input ends early or there is insufficient space in the
00104        *  output, returns codecvt_base::partial.  Otherwise the
00105        *  conversion failed and codecvt_base::error is returned.
00106        *
00107        *  @param  __state  Persistent conversion state data.
00108        *  @param  __from  Start of input.
00109        *  @param  __from_end  End of input.
00110        *  @param  __from_next  Returns start of unconverted data.
00111        *  @param  __to  Start of output buffer.
00112        *  @param  __to_end  End of output buffer.
00113        *  @param  __to_next  Returns start of unused output area.
00114        *  @return  codecvt_base::result.
00115       */
00116       result
00117       out(state_type& __state, const intern_type* __from,
00118       const intern_type* __from_end, const intern_type*& __from_next,
00119       extern_type* __to, extern_type* __to_end,
00120       extern_type*& __to_next) const
00121       {
00122     return this->do_out(__state, __from, __from_end, __from_next,
00123                 __to, __to_end, __to_next);
00124       }
00125 
00126       /**
00127        *  @brief  Reset conversion state.
00128        *
00129        *  Writes characters to output that would restore @a state to initial
00130        *  conditions.  The idea is that if a partial conversion occurs, then
00131        *  the converting the characters written by this function would leave
00132        *  the state in initial conditions, rather than partial conversion
00133        *  state.  It does this by calling codecvt::do_unshift().
00134        *
00135        *  For example, if 4 external characters always converted to 1 internal
00136        *  character, and input to in() had 6 external characters with state
00137        *  saved, this function would write two characters to the output and
00138        *  set the state to initialized conditions.
00139        *
00140        *  The source and destination character sets are determined by the
00141        *  facet's locale, internal and external types.
00142        *
00143        *  The result returned is a member of codecvt_base::result.  If the
00144        *  state could be reset and data written, returns codecvt_base::ok.  If
00145        *  no conversion is necessary, returns codecvt_base::noconv.  If the
00146        *  output has insufficient space, returns codecvt_base::partial.
00147        *  Otherwise the reset failed and codecvt_base::error is returned.
00148        *
00149        *  @param  __state  Persistent conversion state data.
00150        *  @param  __to  Start of output buffer.
00151        *  @param  __to_end  End of output buffer.
00152        *  @param  __to_next  Returns start of unused output area.
00153        *  @return  codecvt_base::result.
00154       */
00155       result
00156       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
00157           extern_type*& __to_next) const
00158       { return this->do_unshift(__state, __to,__to_end,__to_next); }
00159 
00160       /**
00161        *  @brief  Convert from external to internal character set.
00162        *
00163        *  Converts input string of extern_type to output string of
00164        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
00165        *  calling codecvt::do_in.
00166        *
00167        *  The source and destination character sets are determined by the
00168        *  facet's locale, internal and external types.
00169        *
00170        *  The characters in [from,from_end) are converted and written to
00171        *  [to,to_end).  from_next and to_next are set to point to the
00172        *  character following the last successfully converted character,
00173        *  respectively.  If the result needed no conversion, from_next and
00174        *  to_next are not affected.
00175        *
00176        *  The @a state argument should be initialized if the input is at the
00177        *  beginning and carried from a previous call if continuing
00178        *  conversion.  There are no guarantees about how @a state is used.
00179        *
00180        *  The result returned is a member of codecvt_base::result.  If
00181        *  all the input is converted, returns codecvt_base::ok.  If no
00182        *  conversion is necessary, returns codecvt_base::noconv.  If
00183        *  the input ends early or there is insufficient space in the
00184        *  output, returns codecvt_base::partial.  Otherwise the
00185        *  conversion failed and codecvt_base::error is returned.
00186        *
00187        *  @param  __state  Persistent conversion state data.
00188        *  @param  __from  Start of input.
00189        *  @param  __from_end  End of input.
00190        *  @param  __from_next  Returns start of unconverted data.
00191        *  @param  __to  Start of output buffer.
00192        *  @param  __to_end  End of output buffer.
00193        *  @param  __to_next  Returns start of unused output area.
00194        *  @return  codecvt_base::result.
00195       */
00196       result
00197       in(state_type& __state, const extern_type* __from,
00198      const extern_type* __from_end, const extern_type*& __from_next,
00199      intern_type* __to, intern_type* __to_end,
00200      intern_type*& __to_next) const
00201       {
00202     return this->do_in(__state, __from, __from_end, __from_next,
00203                __to, __to_end, __to_next);
00204       }
00205 
00206       int
00207       encoding() const throw()
00208       { return this->do_encoding(); }
00209 
00210       bool
00211       always_noconv() const throw()
00212       { return this->do_always_noconv(); }
00213 
00214       int
00215       length(state_type& __state, const extern_type* __from,
00216          const extern_type* __end, size_t __max) const
00217       { return this->do_length(__state, __from, __end, __max); }
00218 
00219       int
00220       max_length() const throw()
00221       { return this->do_max_length(); }
00222 
00223     protected:
00224       explicit
00225       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
00226 
00227       virtual
00228       ~__codecvt_abstract_base() { }
00229 
00230       /**
00231        *  @brief  Convert from internal to external character set.
00232        *
00233        *  Converts input string of intern_type to output string of
00234        *  extern_type.  This function is a hook for derived classes to change
00235        *  the value returned.  @see out for more information.
00236       */
00237       virtual result
00238       do_out(state_type& __state, const intern_type* __from,
00239          const intern_type* __from_end, const intern_type*& __from_next,
00240          extern_type* __to, extern_type* __to_end,
00241          extern_type*& __to_next) const = 0;
00242 
00243       virtual result
00244       do_unshift(state_type& __state, extern_type* __to,
00245          extern_type* __to_end, extern_type*& __to_next) const = 0;
00246 
00247       virtual result
00248       do_in(state_type& __state, const extern_type* __from,
00249         const extern_type* __from_end, const extern_type*& __from_next,
00250         intern_type* __to, intern_type* __to_end,
00251         intern_type*& __to_next) const = 0;
00252 
00253       virtual int
00254       do_encoding() const throw() = 0;
00255 
00256       virtual bool
00257       do_always_noconv() const throw() = 0;
00258 
00259       virtual int
00260       do_length(state_type&, const extern_type* __from,
00261         const extern_type* __end, size_t __max) const = 0;
00262 
00263       virtual int
00264       do_max_length() const throw() = 0;
00265     };
00266 
00267 
00268 
00269   /**
00270    *  @brief  Primary class template codecvt.
00271    *  @ingroup locales
00272    *
00273    *  NB: Generic, mostly useless implementation.
00274    *
00275   */
00276    template<typename _InternT, typename _ExternT, typename _StateT>
00277     class codecvt
00278     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
00279     {
00280     public:
00281       // Types:
00282       typedef codecvt_base::result  result;
00283       typedef _InternT          intern_type;
00284       typedef _ExternT          extern_type;
00285       typedef _StateT           state_type;
00286 
00287     protected:
00288       __c_locale            _M_c_locale_codecvt;
00289 
00290     public:
00291       static locale::id         id;
00292 
00293       explicit
00294       codecvt(size_t __refs = 0)
00295       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs),
00296     _M_c_locale_codecvt(0)
00297       { }
00298 
00299       explicit
00300       codecvt(__c_locale __cloc, size_t __refs = 0);
00301 
00302     protected:
00303       virtual
00304       ~codecvt() { }
00305 
00306       virtual result
00307       do_out(state_type& __state, const intern_type* __from,
00308          const intern_type* __from_end, const intern_type*& __from_next,
00309          extern_type* __to, extern_type* __to_end,
00310          extern_type*& __to_next) const;
00311 
00312       virtual result
00313       do_unshift(state_type& __state, extern_type* __to,
00314          extern_type* __to_end, extern_type*& __to_next) const;
00315 
00316       virtual result
00317       do_in(state_type& __state, const extern_type* __from,
00318         const extern_type* __from_end, const extern_type*& __from_next,
00319         intern_type* __to, intern_type* __to_end,
00320         intern_type*& __to_next) const;
00321 
00322       virtual int
00323       do_encoding() const throw();
00324 
00325       virtual bool
00326       do_always_noconv() const throw();
00327 
00328       virtual int
00329       do_length(state_type&, const extern_type* __from,
00330         const extern_type* __end, size_t __max) const;
00331 
00332       virtual int
00333       do_max_length() const throw();
00334     };
00335 
00336   template<typename _InternT, typename _ExternT, typename _StateT>
00337     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
00338 
00339   /// class codecvt<char, char, mbstate_t> specialization.
00340   template<>
00341     class codecvt<char, char, mbstate_t>
00342     : public __codecvt_abstract_base<char, char, mbstate_t>
00343     {
00344     public:
00345       // Types:
00346       typedef char          intern_type;
00347       typedef char          extern_type;
00348       typedef mbstate_t         state_type;
00349 
00350     protected:
00351       __c_locale            _M_c_locale_codecvt;
00352 
00353     public:
00354       static locale::id id;
00355 
00356       explicit
00357       codecvt(size_t __refs = 0);
00358 
00359       explicit
00360       codecvt(__c_locale __cloc, size_t __refs = 0);
00361 
00362     protected:
00363       virtual
00364       ~codecvt();
00365 
00366       virtual result
00367       do_out(state_type& __state, const intern_type* __from,
00368          const intern_type* __from_end, const intern_type*& __from_next,
00369          extern_type* __to, extern_type* __to_end,
00370          extern_type*& __to_next) const;
00371 
00372       virtual result
00373       do_unshift(state_type& __state, extern_type* __to,
00374          extern_type* __to_end, extern_type*& __to_next) const;
00375 
00376       virtual result
00377       do_in(state_type& __state, const extern_type* __from,
00378         const extern_type* __from_end, const extern_type*& __from_next,
00379         intern_type* __to, intern_type* __to_end,
00380         intern_type*& __to_next) const;
00381 
00382       virtual int
00383       do_encoding() const throw();
00384 
00385       virtual bool
00386       do_always_noconv() const throw();
00387 
00388       virtual int
00389       do_length(state_type&, const extern_type* __from,
00390         const extern_type* __end, size_t __max) const;
00391 
00392       virtual int
00393       do_max_length() const throw();
00394   };
00395 
00396 #ifdef _GLIBCXX_USE_WCHAR_T
00397   /// class codecvt<wchar_t, char, mbstate_t> specialization.
00398   template<>
00399     class codecvt<wchar_t, char, mbstate_t>
00400     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
00401     {
00402     public:
00403       // Types:
00404       typedef wchar_t           intern_type;
00405       typedef char          extern_type;
00406       typedef mbstate_t         state_type;
00407 
00408     protected:
00409       __c_locale            _M_c_locale_codecvt;
00410 
00411     public:
00412       static locale::id         id;
00413 
00414       explicit
00415       codecvt(size_t __refs = 0);
00416 
00417       explicit
00418       codecvt(__c_locale __cloc, size_t __refs = 0);
00419 
00420     protected:
00421       virtual
00422       ~codecvt();
00423 
00424       virtual result
00425       do_out(state_type& __state, const intern_type* __from,
00426          const intern_type* __from_end, const intern_type*& __from_next,
00427          extern_type* __to, extern_type* __to_end,
00428          extern_type*& __to_next) const;
00429 
00430       virtual result
00431       do_unshift(state_type& __state,
00432          extern_type* __to, extern_type* __to_end,
00433          extern_type*& __to_next) const;
00434 
00435       virtual result
00436       do_in(state_type& __state,
00437          const extern_type* __from, const extern_type* __from_end,
00438          const extern_type*& __from_next,
00439          intern_type* __to, intern_type* __to_end,
00440          intern_type*& __to_next) const;
00441 
00442       virtual
00443       int do_encoding() const throw();
00444 
00445       virtual
00446       bool do_always_noconv() const throw();
00447 
00448       virtual
00449       int do_length(state_type&, const extern_type* __from,
00450             const extern_type* __end, size_t __max) const;
00451 
00452       virtual int
00453       do_max_length() const throw();
00454     };
00455 #endif //_GLIBCXX_USE_WCHAR_T
00456 
00457   /// class codecvt_byname [22.2.1.6].
00458   template<typename _InternT, typename _ExternT, typename _StateT>
00459     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
00460     {
00461     public:
00462       explicit
00463       codecvt_byname(const char* __s, size_t __refs = 0)
00464       : codecvt<_InternT, _ExternT, _StateT>(__refs)
00465       {
00466     if (__builtin_strcmp(__s, "C") != 0
00467         && __builtin_strcmp(__s, "POSIX") != 0)
00468       {
00469         this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
00470         this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
00471       }
00472       }
00473 
00474     protected:
00475       virtual
00476       ~codecvt_byname() { }
00477     };
00478 
00479   // Inhibit implicit instantiations for required instantiations,
00480   // which are defined via explicit instantiations elsewhere.
00481 #if _GLIBCXX_EXTERN_TEMPLATE
00482   extern template class codecvt_byname<char, char, mbstate_t>;
00483 
00484   extern template
00485     const codecvt<char, char, mbstate_t>&
00486     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
00487 
00488   extern template
00489     bool
00490     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
00491 
00492 #ifdef _GLIBCXX_USE_WCHAR_T
00493   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
00494 
00495   extern template
00496     const codecvt<wchar_t, char, mbstate_t>&
00497     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
00498 
00499   extern template
00500     bool
00501     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
00502 #endif
00503 #endif
00504 
00505 _GLIBCXX_END_NAMESPACE_VERSION
00506 } // namespace std
00507 
00508 #endif // _CODECVT_H