libstdc++
|
00001 // Locale support (codecvt) -*- C++ -*- 00002 00003 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 00004 // 2008, 2009, 2010 00005 // Free Software Foundation, Inc. 00006 // 00007 // This file is part of the GNU ISO C++ Library. This library is free 00008 // software; you can redistribute it and/or modify it under the 00009 // terms of the GNU General Public License as published by the 00010 // Free Software Foundation; either version 3, or (at your option) 00011 // any later version. 00012 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 // GNU General Public License for more details. 00017 00018 // Under Section 7 of GPL version 3, you are granted additional 00019 // permissions described in the GCC Runtime Library Exception, version 00020 // 3.1, as published by the Free Software Foundation. 00021 00022 // You should have received a copy of the GNU General Public License and 00023 // a copy of the GCC Runtime Library Exception along with this program; 00024 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00025 // <http://www.gnu.org/licenses/>. 00026 00027 // 00028 // ISO C++ 14882: 22.2.1.5 Template class codecvt 00029 // 00030 00031 // Written by Benjamin Kosnik <bkoz@redhat.com> 00032 00033 /** @file ext/codecvt_specializations.h 00034 * This file is a GNU extension to the Standard C++ Library. 00035 */ 00036 00037 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H 00038 #define _EXT_CODECVT_SPECIALIZATIONS_H 1 00039 00040 #include <bits/c++config.h> 00041 #include <locale> 00042 #include <iconv.h> 00043 00044 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) 00045 { 00046 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00047 00048 /// Extension to use iconv for dealing with character encodings. 00049 // This includes conversions and comparisons between various character 00050 // sets. This object encapsulates data that may need to be shared between 00051 // char_traits, codecvt and ctype. 00052 class encoding_state 00053 { 00054 public: 00055 // Types: 00056 // NB: A conversion descriptor subsumes and enhances the 00057 // functionality of a simple state type such as mbstate_t. 00058 typedef iconv_t descriptor_type; 00059 00060 protected: 00061 // Name of internal character set encoding. 00062 std::string _M_int_enc; 00063 00064 // Name of external character set encoding. 00065 std::string _M_ext_enc; 00066 00067 // Conversion descriptor between external encoding to internal encoding. 00068 descriptor_type _M_in_desc; 00069 00070 // Conversion descriptor between internal encoding to external encoding. 00071 descriptor_type _M_out_desc; 00072 00073 // The byte-order marker for the external encoding, if necessary. 00074 int _M_ext_bom; 00075 00076 // The byte-order marker for the internal encoding, if necessary. 00077 int _M_int_bom; 00078 00079 // Number of external bytes needed to construct one complete 00080 // character in the internal encoding. 00081 // NB: -1 indicates variable, or stateful, encodings. 00082 int _M_bytes; 00083 00084 public: 00085 explicit 00086 encoding_state() 00087 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0) 00088 { } 00089 00090 explicit 00091 encoding_state(const char* __int, const char* __ext, 00092 int __ibom = 0, int __ebom = 0, int __bytes = 1) 00093 : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 00094 _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes) 00095 { init(); } 00096 00097 // 21.1.2 traits typedefs 00098 // p4 00099 // typedef STATE_T state_type 00100 // requires: state_type shall meet the requirements of 00101 // CopyConstructible types (20.1.3) 00102 // NB: This does not preserve the actual state of the conversion 00103 // descriptor member, but it does duplicate the encoding 00104 // information. 00105 encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0) 00106 { construct(__obj); } 00107 00108 // Need assignment operator as well. 00109 encoding_state& 00110 operator=(const encoding_state& __obj) 00111 { 00112 construct(__obj); 00113 return *this; 00114 } 00115 00116 ~encoding_state() 00117 { destroy(); } 00118 00119 bool 00120 good() const throw() 00121 { 00122 const descriptor_type __err = (iconv_t)(-1); 00123 bool __test = _M_in_desc && _M_in_desc != __err; 00124 __test &= _M_out_desc && _M_out_desc != __err; 00125 return __test; 00126 } 00127 00128 int 00129 character_ratio() const 00130 { return _M_bytes; } 00131 00132 const std::string 00133 internal_encoding() const 00134 { return _M_int_enc; } 00135 00136 int 00137 internal_bom() const 00138 { return _M_int_bom; } 00139 00140 const std::string 00141 external_encoding() const 00142 { return _M_ext_enc; } 00143 00144 int 00145 external_bom() const 00146 { return _M_ext_bom; } 00147 00148 const descriptor_type& 00149 in_descriptor() const 00150 { return _M_in_desc; } 00151 00152 const descriptor_type& 00153 out_descriptor() const 00154 { return _M_out_desc; } 00155 00156 protected: 00157 void 00158 init() 00159 { 00160 const descriptor_type __err = (iconv_t)(-1); 00161 const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size(); 00162 if (!_M_in_desc && __have_encodings) 00163 { 00164 _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str()); 00165 if (_M_in_desc == __err) 00166 std::__throw_runtime_error(__N("encoding_state::_M_init " 00167 "creating iconv input descriptor failed")); 00168 } 00169 if (!_M_out_desc && __have_encodings) 00170 { 00171 _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str()); 00172 if (_M_out_desc == __err) 00173 std::__throw_runtime_error(__N("encoding_state::_M_init " 00174 "creating iconv output descriptor failed")); 00175 } 00176 } 00177 00178 void 00179 construct(const encoding_state& __obj) 00180 { 00181 destroy(); 00182 _M_int_enc = __obj._M_int_enc; 00183 _M_ext_enc = __obj._M_ext_enc; 00184 _M_ext_bom = __obj._M_ext_bom; 00185 _M_int_bom = __obj._M_int_bom; 00186 _M_bytes = __obj._M_bytes; 00187 init(); 00188 } 00189 00190 void 00191 destroy() throw() 00192 { 00193 const descriptor_type __err = (iconv_t)(-1); 00194 if (_M_in_desc && _M_in_desc != __err) 00195 { 00196 iconv_close(_M_in_desc); 00197 _M_in_desc = 0; 00198 } 00199 if (_M_out_desc && _M_out_desc != __err) 00200 { 00201 iconv_close(_M_out_desc); 00202 _M_out_desc = 0; 00203 } 00204 } 00205 }; 00206 00207 /// encoding_char_traits 00208 // Custom traits type with encoding_state for the state type, and the 00209 // associated fpos<encoding_state> for the position type, all other 00210 // bits equivalent to the required char_traits instantiations. 00211 template<typename _CharT> 00212 struct encoding_char_traits : public std::char_traits<_CharT> 00213 { 00214 typedef encoding_state state_type; 00215 typedef typename std::fpos<state_type> pos_type; 00216 }; 00217 00218 _GLIBCXX_END_NAMESPACE_VERSION 00219 } // namespace 00220 00221 00222 namespace std _GLIBCXX_VISIBILITY(default) 00223 { 00224 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00225 00226 using __gnu_cxx::encoding_state; 00227 00228 /// codecvt<InternT, _ExternT, encoding_state> specialization. 00229 // This partial specialization takes advantage of iconv to provide 00230 // code conversions between a large number of character encodings. 00231 template<typename _InternT, typename _ExternT> 00232 class codecvt<_InternT, _ExternT, encoding_state> 00233 : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state> 00234 { 00235 public: 00236 // Types: 00237 typedef codecvt_base::result result; 00238 typedef _InternT intern_type; 00239 typedef _ExternT extern_type; 00240 typedef __gnu_cxx::encoding_state state_type; 00241 typedef state_type::descriptor_type descriptor_type; 00242 00243 // Data Members: 00244 static locale::id id; 00245 00246 explicit 00247 codecvt(size_t __refs = 0) 00248 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00249 { } 00250 00251 explicit 00252 codecvt(state_type& __enc, size_t __refs = 0) 00253 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00254 { } 00255 00256 protected: 00257 virtual 00258 ~codecvt() { } 00259 00260 virtual result 00261 do_out(state_type& __state, const intern_type* __from, 00262 const intern_type* __from_end, const intern_type*& __from_next, 00263 extern_type* __to, extern_type* __to_end, 00264 extern_type*& __to_next) const; 00265 00266 virtual result 00267 do_unshift(state_type& __state, extern_type* __to, 00268 extern_type* __to_end, extern_type*& __to_next) const; 00269 00270 virtual result 00271 do_in(state_type& __state, const extern_type* __from, 00272 const extern_type* __from_end, const extern_type*& __from_next, 00273 intern_type* __to, intern_type* __to_end, 00274 intern_type*& __to_next) const; 00275 00276 virtual int 00277 do_encoding() const throw(); 00278 00279 virtual bool 00280 do_always_noconv() const throw(); 00281 00282 virtual int 00283 do_length(state_type&, const extern_type* __from, 00284 const extern_type* __end, size_t __max) const; 00285 00286 virtual int 00287 do_max_length() const throw(); 00288 }; 00289 00290 template<typename _InternT, typename _ExternT> 00291 locale::id 00292 codecvt<_InternT, _ExternT, encoding_state>::id; 00293 00294 // This adaptor works around the signature problems of the second 00295 // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 00296 // uses 'char**', which matches the POSIX 1003.1-2001 standard. 00297 // Using this adaptor, g++ will do the work for us. 00298 template<typename _Tp> 00299 inline size_t 00300 __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*), 00301 iconv_t __cd, char** __inbuf, size_t* __inbytes, 00302 char** __outbuf, size_t* __outbytes) 00303 { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); } 00304 00305 template<typename _InternT, typename _ExternT> 00306 codecvt_base::result 00307 codecvt<_InternT, _ExternT, encoding_state>:: 00308 do_out(state_type& __state, const intern_type* __from, 00309 const intern_type* __from_end, const intern_type*& __from_next, 00310 extern_type* __to, extern_type* __to_end, 00311 extern_type*& __to_next) const 00312 { 00313 result __ret = codecvt_base::error; 00314 if (__state.good()) 00315 { 00316 const descriptor_type& __desc = __state.out_descriptor(); 00317 const size_t __fmultiple = sizeof(intern_type); 00318 size_t __fbytes = __fmultiple * (__from_end - __from); 00319 const size_t __tmultiple = sizeof(extern_type); 00320 size_t __tbytes = __tmultiple * (__to_end - __to); 00321 00322 // Argument list for iconv specifies a byte sequence. Thus, 00323 // all to/from arrays must be brutally casted to char*. 00324 char* __cto = reinterpret_cast<char*>(__to); 00325 char* __cfrom; 00326 size_t __conv; 00327 00328 // Some encodings need a byte order marker as the first item 00329 // in the byte stream, to designate endian-ness. The default 00330 // value for the byte order marker is NULL, so if this is 00331 // the case, it's not necessary and we can just go on our 00332 // merry way. 00333 int __int_bom = __state.internal_bom(); 00334 if (__int_bom) 00335 { 00336 size_t __size = __from_end - __from; 00337 intern_type* __cfixed = static_cast<intern_type*> 00338 (__builtin_alloca(sizeof(intern_type) * (__size + 1))); 00339 __cfixed[0] = static_cast<intern_type>(__int_bom); 00340 char_traits<intern_type>::copy(__cfixed + 1, __from, __size); 00341 __cfrom = reinterpret_cast<char*>(__cfixed); 00342 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00343 &__fbytes, &__cto, &__tbytes); 00344 } 00345 else 00346 { 00347 intern_type* __cfixed = const_cast<intern_type*>(__from); 00348 __cfrom = reinterpret_cast<char*>(__cfixed); 00349 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 00350 &__cto, &__tbytes); 00351 } 00352 00353 if (__conv != size_t(-1)) 00354 { 00355 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00356 __to_next = reinterpret_cast<extern_type*>(__cto); 00357 __ret = codecvt_base::ok; 00358 } 00359 else 00360 { 00361 if (__fbytes < __fmultiple * (__from_end - __from)) 00362 { 00363 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00364 __to_next = reinterpret_cast<extern_type*>(__cto); 00365 __ret = codecvt_base::partial; 00366 } 00367 else 00368 __ret = codecvt_base::error; 00369 } 00370 } 00371 return __ret; 00372 } 00373 00374 template<typename _InternT, typename _ExternT> 00375 codecvt_base::result 00376 codecvt<_InternT, _ExternT, encoding_state>:: 00377 do_unshift(state_type& __state, extern_type* __to, 00378 extern_type* __to_end, extern_type*& __to_next) const 00379 { 00380 result __ret = codecvt_base::error; 00381 if (__state.good()) 00382 { 00383 const descriptor_type& __desc = __state.in_descriptor(); 00384 const size_t __tmultiple = sizeof(intern_type); 00385 size_t __tlen = __tmultiple * (__to_end - __to); 00386 00387 // Argument list for iconv specifies a byte sequence. Thus, 00388 // all to/from arrays must be brutally casted to char*. 00389 char* __cto = reinterpret_cast<char*>(__to); 00390 size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0, 00391 &__cto, &__tlen); 00392 00393 if (__conv != size_t(-1)) 00394 { 00395 __to_next = reinterpret_cast<extern_type*>(__cto); 00396 if (__tlen == __tmultiple * (__to_end - __to)) 00397 __ret = codecvt_base::noconv; 00398 else if (__tlen == 0) 00399 __ret = codecvt_base::ok; 00400 else 00401 __ret = codecvt_base::partial; 00402 } 00403 else 00404 __ret = codecvt_base::error; 00405 } 00406 return __ret; 00407 } 00408 00409 template<typename _InternT, typename _ExternT> 00410 codecvt_base::result 00411 codecvt<_InternT, _ExternT, encoding_state>:: 00412 do_in(state_type& __state, const extern_type* __from, 00413 const extern_type* __from_end, const extern_type*& __from_next, 00414 intern_type* __to, intern_type* __to_end, 00415 intern_type*& __to_next) const 00416 { 00417 result __ret = codecvt_base::error; 00418 if (__state.good()) 00419 { 00420 const descriptor_type& __desc = __state.in_descriptor(); 00421 const size_t __fmultiple = sizeof(extern_type); 00422 size_t __flen = __fmultiple * (__from_end - __from); 00423 const size_t __tmultiple = sizeof(intern_type); 00424 size_t __tlen = __tmultiple * (__to_end - __to); 00425 00426 // Argument list for iconv specifies a byte sequence. Thus, 00427 // all to/from arrays must be brutally casted to char*. 00428 char* __cto = reinterpret_cast<char*>(__to); 00429 char* __cfrom; 00430 size_t __conv; 00431 00432 // Some encodings need a byte order marker as the first item 00433 // in the byte stream, to designate endian-ness. The default 00434 // value for the byte order marker is NULL, so if this is 00435 // the case, it's not necessary and we can just go on our 00436 // merry way. 00437 int __ext_bom = __state.external_bom(); 00438 if (__ext_bom) 00439 { 00440 size_t __size = __from_end - __from; 00441 extern_type* __cfixed = static_cast<extern_type*> 00442 (__builtin_alloca(sizeof(extern_type) * (__size + 1))); 00443 __cfixed[0] = static_cast<extern_type>(__ext_bom); 00444 char_traits<extern_type>::copy(__cfixed + 1, __from, __size); 00445 __cfrom = reinterpret_cast<char*>(__cfixed); 00446 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00447 &__flen, &__cto, &__tlen); 00448 } 00449 else 00450 { 00451 extern_type* __cfixed = const_cast<extern_type*>(__from); 00452 __cfrom = reinterpret_cast<char*>(__cfixed); 00453 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00454 &__flen, &__cto, &__tlen); 00455 } 00456 00457 00458 if (__conv != size_t(-1)) 00459 { 00460 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00461 __to_next = reinterpret_cast<intern_type*>(__cto); 00462 __ret = codecvt_base::ok; 00463 } 00464 else 00465 { 00466 if (__flen < static_cast<size_t>(__from_end - __from)) 00467 { 00468 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00469 __to_next = reinterpret_cast<intern_type*>(__cto); 00470 __ret = codecvt_base::partial; 00471 } 00472 else 00473 __ret = codecvt_base::error; 00474 } 00475 } 00476 return __ret; 00477 } 00478 00479 template<typename _InternT, typename _ExternT> 00480 int 00481 codecvt<_InternT, _ExternT, encoding_state>:: 00482 do_encoding() const throw() 00483 { 00484 int __ret = 0; 00485 if (sizeof(_ExternT) <= sizeof(_InternT)) 00486 __ret = sizeof(_InternT) / sizeof(_ExternT); 00487 return __ret; 00488 } 00489 00490 template<typename _InternT, typename _ExternT> 00491 bool 00492 codecvt<_InternT, _ExternT, encoding_state>:: 00493 do_always_noconv() const throw() 00494 { return false; } 00495 00496 template<typename _InternT, typename _ExternT> 00497 int 00498 codecvt<_InternT, _ExternT, encoding_state>:: 00499 do_length(state_type&, const extern_type* __from, 00500 const extern_type* __end, size_t __max) const 00501 { return std::min(__max, static_cast<size_t>(__end - __from)); } 00502 00503 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00504 // 74. Garbled text for codecvt::do_max_length 00505 template<typename _InternT, typename _ExternT> 00506 int 00507 codecvt<_InternT, _ExternT, encoding_state>:: 00508 do_max_length() const throw() 00509 { return 1; } 00510 00511 _GLIBCXX_END_NAMESPACE_VERSION 00512 } // namespace 00513 00514 #endif