00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00136 class t3lib_cs {
00137 var $noCharByteVal=63;
00138
00139
00140 var $parsedCharsets=array();
00141
00142
00143 var $caseFolding=array();
00144
00145
00146 var $toASCII=array();
00147
00148
00149 var $twoByteSets=array(
00150 'ucs-2'=>1,
00151 );
00152
00153
00154 var $fourByteSets=array(
00155 'ucs-4'=>1,
00156 'utf-32'=>1,
00157 );
00158
00159
00160 var $eucBasedSets=array(
00161 'gb2312'=>1,
00162 'big5'=>1,
00163 'euc-kr'=>1,
00164 'shift_jis'=>1,
00165 );
00166
00167
00168
00169 var $synonyms=array(
00170 'us' => 'ascii',
00171 'us-ascii'=> 'ascii',
00172 'cp819' => 'iso-8859-1',
00173 'ibm819' => 'iso-8859-1',
00174 'iso-ir-100' => 'iso-8859-1',
00175 'iso-ir-109' => 'iso-8859-2',
00176 'iso-ir-148' => 'iso-8859-9',
00177 'iso-ir-199' => 'iso-8859-14',
00178 'iso-ir-203' => 'iso-8859-15',
00179 'csisolatin1' => 'iso-8859-1',
00180 'csisolatin2' => 'iso-8859-2',
00181 'csisolatin3' => 'iso-8859-3',
00182 'csisolatin5' => 'iso-8859-9',
00183 'csisolatin8' => 'iso-8859-14',
00184 'csisolatin9' => 'iso-8859-15',
00185 'csisolatingreek' => 'iso-8859-7',
00186 'iso-celtic' => 'iso-8859-14',
00187 'latin1' => 'iso-8859-1',
00188 'latin2' => 'iso-8859-2',
00189 'latin3' => 'iso-8859-3',
00190 'latin5' => 'iso-8859-9',
00191 'latin6' => 'iso-8859-10',
00192 'latin8' => 'iso-8859-14',
00193 'latin9' => 'iso-8859-15',
00194 'l1' => 'iso-8859-1',
00195 'l2' => 'iso-8859-2',
00196 'l3' => 'iso-8859-3',
00197 'l5' => 'iso-8859-9',
00198 'l6' => 'iso-8859-10',
00199 'l8' => 'iso-8859-14',
00200 'l9' => 'iso-8859-15',
00201 'cyrillic' => 'iso-8859-5',
00202 'arabic' => 'iso-8859-6',
00203 'tis-620' => 'iso-8859-11',
00204 'win874' => 'windows-874',
00205 'win1250' => 'windows-1250',
00206 'win1251' => 'windows-1251',
00207 'win1252' => 'windows-1252',
00208 'win1253' => 'windows-1253',
00209 'win1254' => 'windows-1254',
00210 'win1255' => 'windows-1255',
00211 'win1256' => 'windows-1256',
00212 'win1257' => 'windows-1257',
00213 'win1258' => 'windows-1258',
00214 'cp1250' => 'windows-1250',
00215 'cp1251' => 'windows-1251',
00216 'cp1252' => 'windows-1252',
00217 'ms-ee' => 'windows-1250',
00218 'ms-ansi' => 'windows-1252',
00219 'ms-greek' => 'windows-1253',
00220 'ms-turk' => 'windows-1254',
00221 'winbaltrim' => 'windows-1257',
00222 'koi-8ru' => 'koi-8r',
00223 'koi8r' => 'koi-8r',
00224 'cp878' => 'koi-8r',
00225 'mac' => 'macroman',
00226 'macintosh' => 'macroman',
00227 'euc-cn' => 'gb2312',
00228 'x-euc-cn' => 'gb2312',
00229 'euccn' => 'gb2312',
00230 'cp936' => 'gb2312',
00231 'big-5' => 'big5',
00232 'cp950' => 'big5',
00233 'eucjp' => 'euc-jp',
00234 'sjis' => 'shift_jis',
00235 'shift-jis' => 'shift_jis',
00236 'cp932' => 'shift_jis',
00237 'cp949' => 'euc-kr',
00238 'utf7' => 'utf-7',
00239 'utf8' => 'utf-8',
00240 'utf16' => 'utf-16',
00241 'utf32' => 'utf-32',
00242 'utf8' => 'utf-8',
00243 'ucs2' => 'ucs-2',
00244 'ucs4' => 'ucs-4',
00245 );
00246
00247
00248 var $lang_to_langfamily=array(
00249
00250
00251
00252 'ar' => 'arabic',
00253 'bg' => 'cyrillic',
00254 'cs' => 'east_european',
00255 'da' => 'west_european',
00256 'de' => 'west_european',
00257 'es' => 'west_european',
00258 'et' => 'estonian',
00259 'eu' => 'west_european',
00260 'fi' => 'west_european',
00261 'fr' => 'west_european',
00262 'gr' => 'greek',
00263 'hr' => 'east_european',
00264 'hu' => 'east_european',
00265 'iw' => 'hebrew',
00266 'is' => 'west_european',
00267 'it' => 'west_european',
00268 'ja' => 'japanese',
00269 'kl' => 'west_european',
00270 'ko' => 'korean',
00271 'lt' => 'lithuanian',
00272 'lv' => 'west_european',
00273 'nl' => 'west_european',
00274 'no' => 'west_european',
00275 'pl' => 'east_european',
00276 'pt' => 'west_european',
00277 'ro' => 'east_european',
00278 'ru' => 'cyrillic',
00279 'sk' => 'east_european',
00280 'sl' => 'east_european',
00281 'sv' => 'west_european',
00282 'th' => 'thai',
00283 'uk' => 'cyrillic',
00284 'vi' => 'vietnamese',
00285 'zh' => 'chinese',
00286
00287 'chs' => 'simpl_chinese',
00288 'cht' => 'trad_chinese',
00289 'csy' => 'east_european',
00290 'dan' => 'west_european',
00291 'deu' => 'west_european',
00292 'dea' => 'west_european',
00293 'des' => 'west_european',
00294 'ena' => 'west_european',
00295 'enc' => 'west_european',
00296 'eng' => 'west_european',
00297 'enz' => 'west_european',
00298 'enu' => 'west_european',
00299 'nld' => 'west_european',
00300 'nlb' => 'west_european',
00301 'fin' => 'west_european',
00302 'fra' => 'west_european',
00303 'frb' => 'west_european',
00304 'frc' => 'west_european',
00305 'frs' => 'west_european',
00306 'ell' => 'greek',
00307 'hun' => 'east_european',
00308 'isl' => 'west_euorpean',
00309 'ita' => 'west_european',
00310 'its' => 'west_european',
00311 'jpn' => 'japanese',
00312 'kor' => 'korean',
00313 'nor' => 'west_european',
00314 'non' => 'west_european',
00315 'plk' => 'east_european',
00316 'ptg' => 'west_european',
00317 'ptb' => 'west_european',
00318 'rus' => 'east_european',
00319 'sky' => 'east_european',
00320 'esp' => 'west_european',
00321 'esm' => 'west_european',
00322 'esn' => 'west_european',
00323 'sve' => 'west_european',
00324 'trk' => 'turkish',
00325
00326 'bulgarian' => 'east_european',
00327 'catalan' => 'west_european',
00328 'croatian' => 'east_european',
00329 'czech' => 'east_european',
00330 'danish' => 'west_european',
00331 'dutch' => 'west_european',
00332 'english' => 'west_european',
00333 'finnish' => 'west_european',
00334 'french' => 'west_european',
00335 'galician' => 'west_european',
00336 'german' => 'west_european',
00337 'hungarian' => 'east_european',
00338 'icelandic' => 'west_european',
00339 'italian' => 'west_european',
00340 'latvian' => 'west_european',
00341 'lettish' => 'west_european',
00342 'norwegian' => 'west_european',
00343 'polish' => 'east_european',
00344 'portuguese' => 'west_european',
00345 'russian' => 'cyrillic',
00346 'romanian' => 'east_european',
00347 'slovak' => 'east_european',
00348 'slovenian' => 'east_european',
00349 'spanish' => 'west_european',
00350 'svedish' => 'west_european',
00351 'turkish' => 'east_european',
00352 'ukrainian' => 'cyrillic',
00353 );
00354
00355
00356 var $lang_to_charset_unix=array(
00357 'west_european' => 'iso-8859-1',
00358 'estonian' => 'iso-8859-1',
00359 'east_european' => 'iso-8859-2',
00360 'baltic' => 'iso-8859-4',
00361 'cyrillic' => 'iso-8859-5',
00362 'arabic' => 'iso-8859-6',
00363 'greek' => 'iso-8859-7',
00364 'hebrew' => 'iso-8859-8',
00365 'turkish' => 'iso-8859-9',
00366 'thai' => 'iso-8859-11',
00367 'lithuanian' => 'iso-8859-13',
00368 'chinese' => 'gb2312',
00369 'japanese' => 'euc-jp',
00370 'korean' => 'euc-kr',
00371 'simpl_chinese' => 'gb2312',
00372 'trad_chinese' => 'big5',
00373 'vietnamese' => '',
00374 );
00375
00376
00377 var $lang_to_charset_windows=array(
00378 'east_european' => 'windows-1250',
00379 'cyrillic' => 'windows-1251',
00380 'west_european' => 'windows-1252',
00381 'greek' => 'windows-1253',
00382 'turkish' => 'windows-1254',
00383 'hebrew' => 'windows-1255',
00384 'arabic' => 'windows-1256',
00385 'baltic' => 'windows-1257',
00386 'estonian' => 'windows-1257',
00387 'lithuanian' => 'windows-1257',
00388 'vietnamese' => 'windows-1258',
00389 'thai' => 'cp874',
00390 'korean' => 'cp949',
00391 'chinese' => 'gb2312',
00392 'japanese' => 'shift_jis',
00393 'simpl_chinese' => 'gb2312',
00394 'trad_chinese' => 'big5',
00395 );
00396
00397
00398 var $locale_to_charset=array(
00399 'japanese.euc' => 'euc-jp',
00400 'ja_jp.ujis' => 'euc-jp',
00401 'korean.euc' => 'euc-kr',
00402 'zh_cn' => 'gb2312',
00403 'zh_hk' => 'big5',
00404 'zh_tw' => 'big5',
00405 );
00406
00407
00408
00409 var $charSetArray = array(
00410 'dk' => '',
00411 'de' => '',
00412 'no' => '',
00413 'it' => '',
00414 'fr' => '',
00415 'es' => '',
00416 'nl' => '',
00417 'cz' => 'windows-1250',
00418 'pl' => 'iso-8859-2',
00419 'si' => 'windows-1250',
00420 'fi' => '',
00421 'tr' => 'iso-8859-9',
00422 'se' => '',
00423 'pt' => '',
00424 'ru' => 'windows-1251',
00425 'ro' => 'iso-8859-2',
00426 'ch' => 'gb2312',
00427 'sk' => 'windows-1250',
00428 'lt' => 'windows-1257',
00429 'is' => 'utf-8',
00430 'hr' => 'windows-1250',
00431 'hu' => 'iso-8859-2',
00432 'gl' => '',
00433 'th' => 'iso-8859-11',
00434 'gr' => 'iso-8859-7',
00435 'hk' => 'big5',
00436 'eu' => '',
00437 'bg' => 'windows-1251',
00438 'br' => '',
00439 'et' => 'iso-8859-4',
00440 'ar' => 'iso-8859-6',
00441 'he' => 'utf-8',
00442 'ua' => 'windows-1251',
00443 'jp' => 'shift_jis',
00444 'lv' => 'utf-8',
00445 'vn' => 'utf-8',
00446 'ca' => 'iso-8859-15',
00447 'ba' => 'iso-8859-2',
00448 'kr' => 'euc-kr',
00449 );
00450
00451
00452
00453 var $isoArray = array(
00454 'dk' => 'da',
00455 'de' => '',
00456 'no' => '',
00457 'it' => '',
00458 'fr' => '',
00459 'es' => '',
00460 'nl' => '',
00461 'cz' => 'cs',
00462 'pl' => '',
00463 'si' => 'sl',
00464 'fi' => '',
00465 'tr' => '',
00466 'se' => 'sv',
00467 'pt' => '',
00468 'ru' => '',
00469 'ro' => '',
00470 'ch' => 'zh_CN',
00471 'sk' => '',
00472 'lt' => '',
00473 'is' => '',
00474 'hr' => '',
00475 'hu' => '',
00476 'gl' => '',
00477 'th' => '',
00478 'gr' => 'el',
00479 'hk' => 'zh_HK',
00480 'eu' => '',
00481 'bg' => '',
00482 'br' => 'pt_BR',
00483 'et' => '',
00484 'ar' => '',
00485 'he' => 'iw',
00486 'ua' => 'uk',
00487 'jp' => 'ja',
00488 'lv' => '',
00489 'vn' => 'vi',
00490 'ca' => '',
00491 'ba' => '',
00492 'kr' => '',
00493 );
00494
00502 function parse_charset($charset) {
00503 $charset = strtolower($charset);
00504 if (isset($this->synonyms[$charset])) $charset = $this->synonyms[$charset];
00505
00506 return $charset;
00507 }
00508
00521 function get_locale_charset($locale) {
00522 $locale = strtolower($locale);
00523
00524
00525 if (isset($this->locale_to_charset[$locale])) return $this->locale_to_charset[$locale];
00526
00527
00528 list($locale,$modifier) = explode('@',$locale);
00529
00530
00531 list($locale,$charset) = explode('.',$locale);
00532 if ($charset) return $this->parse_charset($charset);
00533
00534
00535 if ($modifier == 'euro') return 'iso-8859-15';
00536
00537
00538 list($language,$country) = explode('_',$locale);
00539 if (isset($this->lang_to_langfamily[$language])) $language = $this->lang_to_langfamily[$language];
00540
00541 if (TYPO3_OS == 'WIN') {
00542 $cs = $this->lang_to_charset_windows[$language];
00543 } else {
00544 $cs = $this->lang_to_charset_unix[$language];
00545 }
00546
00547 return $cs ? $cs : 'iso-8859-1';
00548 }
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00574 function conv($str,$fromCS,$toCS,$useEntityForNoChar=0) {
00575 if ($fromCS==$toCS) return $str;
00576
00577
00578 if ($toCS=='utf-8' || !$useEntityForNoChar) {
00579 switch($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod']) {
00580 case 'mbstring':
00581 $conv_str = mb_convert_encoding($str,$toCS,$fromCS);
00582 if (false !== $conv_str) return $conv_str;
00583 break;
00584
00585 case 'iconv':
00586 $conv_str = iconv($fromCS,$toCS.'
00587 if (false !== $conv_str) return $conv_str;
00588 break;
00589
00590 case 'recode':
00591 $conv_str = recode_string($fromCS.'..'.$toCS,$str);
00592 if (false !== $conv_str) return $conv_str;
00593 break;
00594 }
00595
00596 }
00597
00598 if ($fromCS!='utf-8') $str=$this->utf8_encode($str,$fromCS);
00599 if ($toCS!='utf-8') $str=$this->utf8_decode($str,$toCS,$useEntityForNoChar);
00600 return $str;
00601 }
00602
00614 function convArray(&$array,$fromCS,$toCS,$useEntityForNoChar=0) {
00615 foreach($array as $key => $value) {
00616 if (is_array($array[$key])) {
00617 $this->convArray($array[$key],$fromCS,$toCS,$useEntityForNoChar);
00618 } else {
00619 $array[$key] = $this->conv($array[$key],$fromCS,$toCS,$useEntityForNoChar);
00620 }
00621 }
00622 }
00623
00631 function utf8_encode($str,$charset) {
00632
00633
00634 if ($this->initCharset($charset)) {
00635 $strLen = strlen($str);
00636 $outStr='';
00637
00638 for ($a=0;$a<$strLen;$a++) {
00639 $chr=substr($str,$a,1);
00640 $ord=ord($chr);
00641 if (isset($this->twoByteSets[$charset])) {
00642 $ord2 = ord($str{$a+1});
00643 $ord = $ord<<8 & $ord2;
00644
00645 if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
00646 $outStr.=$this->parsedCharsets[$charset]['local'][$ord];
00647 } else $outStr.=chr($this->noCharByteVal);
00648 $a++;
00649 } elseif ($ord>127) {
00650 if (isset($this->eucBasedSets[$charset])) {
00651 $a++;
00652 $ord2=ord(substr($str,$a,1));
00653 $ord = $ord*256+$ord2;
00654 }
00655 elseif ($charset == 'shift_jis' && ($ord <160 || $ord>223)) {
00656 $a++;
00657 $ord2=ord(substr($str,$a,1));
00658 $ord = $ord*256+$ord2;
00659 }
00660
00661 if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
00662 $outStr.=$this->parsedCharsets[$charset]['local'][$ord];
00663 } else $outStr.=chr($this->noCharByteVal);
00664 } else $outStr.=$chr;
00665 }
00666 return $outStr;
00667 }
00668 }
00669
00678 function utf8_decode($str,$charset,$useEntityForNoChar=0) {
00679
00680
00681 if ($this->initCharset($charset)) {
00682 $strLen = strlen($str);
00683 $outStr='';
00684 $buf='';
00685 for ($a=0,$i=0;$a<$strLen;$a++,$i++) {
00686 $chr=substr($str,$a,1);
00687 $ord=ord($chr);
00688 if ($ord>127) {
00689 if ($ord & 64) {
00690
00691 $buf=$chr;
00692 for ($b=0;$b<8;$b++) {
00693 $ord = $ord << 1;
00694 if ($ord & 128) {
00695 $a++;
00696 $buf.=substr($str,$a,1);
00697 } else break;
00698 }
00699
00700 if (isset($this->parsedCharsets[$charset]['utf8'][$buf])) {
00701 $mByte = $this->parsedCharsets[$charset]['utf8'][$buf];
00702 if ($mByte>255) {
00703 $outStr.= chr(($mByte >> 8) & 255).chr($mByte & 255);
00704 } else $outStr.= chr($mByte);
00705 } elseif ($useEntityForNoChar) {
00706 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';';
00707 } else $outStr.=chr($this->noCharByteVal);
00708 } else $outStr.=chr($this->noCharByteVal);
00709 } else $outStr.=$chr;
00710 }
00711 return $outStr;
00712 }
00713 }
00714
00721 function utf8_to_entities($str) {
00722 $strLen = strlen($str);
00723 $outStr='';
00724 $buf='';
00725 for ($a=0;$a<$strLen;$a++) {
00726 $chr=substr($str,$a,1);
00727 $ord=ord($chr);
00728 if ($ord>127) {
00729 if ($ord & 64) {
00730 $buf=$chr;
00731 for ($b=0;$b<8;$b++) {
00732 $ord = $ord << 1;
00733 if ($ord & 128) {
00734 $a++;
00735 $buf.=substr($str,$a,1);
00736 } else break;
00737 }
00738
00739 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';';
00740 } else $outStr.=chr($this->noCharByteVal);
00741 } else $outStr.=$chr;
00742 }
00743
00744 return $outStr;
00745 }
00746
00754 function entities_to_utf8($str,$alsoStdHtmlEnt=0) {
00755 if ($alsoStdHtmlEnt) {
00756 $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES));
00757 }
00758
00759 $token = md5(microtime());
00760 $parts = explode($token,ereg_replace('(&([#[:alnum:]]*);)',$token.'\2'.$token,$str));
00761 foreach($parts as $k => $v) {
00762 if ($k%2) {
00763 if (substr($v,0,1)=='#') {
00764 if (substr($v,1,1)=='x') {
00765 $parts[$k] = $this->UnumberToChar(hexdec(substr($v,2)));
00766 } else {
00767 $parts[$k] = $this->UnumberToChar(substr($v,1));
00768 }
00769 } elseif ($alsoStdHtmlEnt && $trans_tbl['&'.$v.';']) {
00770 $parts[$k] = $this->utf8_encode($trans_tbl['&'.$v.';'],'iso-8859-1');
00771 } else {
00772 $parts[$k] ='&'.$v.';';
00773 }
00774 }
00775 }
00776
00777 return implode('',$parts);
00778 }
00779
00788 function utf8_to_numberarray($str,$convEntities=0,$retChar=0) {
00789
00790 if ($convEntities) {
00791 $str = $this->entities_to_utf8($str,1);
00792 }
00793
00794 $strLen = strlen($str);
00795 $outArr=array();
00796 $buf='';
00797 for ($a=0;$a<$strLen;$a++) {
00798 $chr=substr($str,$a,1);
00799 $ord=ord($chr);
00800 if ($ord>127) {
00801 if ($ord & 64) {
00802 $buf=$chr;
00803 for ($b=0;$b<8;$b++) {
00804 $ord = $ord << 1;
00805 if ($ord & 128) {
00806 $a++;
00807 $buf.=substr($str,$a,1);
00808 } else break;
00809 }
00810
00811 $outArr[]=$retChar?$buf:$this->utf8CharToUnumber($buf);
00812 } else $outArr[]=$retChar?chr($this->noCharByteVal):$this->noCharByteVal;
00813 } else $outArr[]=$retChar?chr($ord):$ord;
00814 }
00815
00816 return $outArr;
00817 }
00818
00838 function UnumberToChar($cbyte) {
00839 $str='';
00840
00841 if ($cbyte < 0x80) {
00842 $str.=chr($cbyte);
00843 } else if ($cbyte < 0x800) {
00844 $str.=chr(0xC0 | ($cbyte >> 6));
00845 $str.=chr(0x80 | ($cbyte & 0x3F));
00846 } else if ($cbyte < 0x10000) {
00847 $str.=chr(0xE0 | ($cbyte >> 12));
00848 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00849 $str.=chr(0x80 | ($cbyte & 0x3F));
00850 } else if ($cbyte < 0x200000) {
00851 $str.=chr(0xF0 | ($cbyte >> 18));
00852 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00853 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00854 $str.=chr(0x80 | ($cbyte & 0x3F));
00855 } else if ($cbyte < 0x4000000) {
00856 $str.=chr(0xF8 | ($cbyte >> 24));
00857 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F));
00858 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00859 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00860 $str.=chr(0x80 | ($cbyte & 0x3F));
00861 } else if ($cbyte < 0x80000000) {
00862 $str.=chr(0xFC | ($cbyte >> 30));
00863 $str.=chr(0x80 | (($cbyte >> 24) & 0x3F));
00864 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F));
00865 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00866 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00867 $str.=chr(0x80 | ($cbyte & 0x3F));
00868 } else {
00869 $str .= chr($this->noCharByteVal);
00870 }
00871 return $str;
00872 }
00873
00883 function utf8CharToUnumber($str,$hex=0) {
00884 $ord=ord(substr($str,0,1));
00885
00886 if (($ord & 192) == 192) {
00887 $binBuf='';
00888 for ($b=0;$b<8;$b++) {
00889 $ord = $ord << 1;
00890 if ($ord & 128) {
00891 $binBuf.=substr('00000000'.decbin(ord(substr($str,$b+1,1))),-6);
00892 } else break;
00893 }
00894 $binBuf=substr('00000000'.decbin(ord(substr($str,0,1))),-(6-$b)).$binBuf;
00895
00896 $int = bindec($binBuf);
00897 } else $int = $ord;
00898
00899 return $hex ? 'x'.dechex($int) : $int;
00900 }
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00926 function initCharset($charset) {
00927
00928 if (!is_array($this->parsedCharsets[$charset])) {
00929
00930
00931 $charsetConvTableFile = PATH_t3lib.'csconvtbl/'.$charset.'.tbl';
00932
00933
00934 if ($charset && t3lib_div::validPathStr($charsetConvTableFile) && @is_file($charsetConvTableFile)) {
00935
00936
00937 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/charset_'.$charset.'.tbl');
00938 if ($cacheFile && @is_file($cacheFile)) {
00939 $this->parsedCharsets[$charset]=unserialize(t3lib_div::getUrl($cacheFile));
00940 } else {
00941
00942 $lines=t3lib_div::trimExplode(chr(10),t3lib_div::getUrl($charsetConvTableFile),1);
00943
00944 $this->parsedCharsets[$charset]=array('local'=>array(),'utf8'=>array());
00945
00946 $detectedType='';
00947 foreach($lines as $value) {
00948 if (trim($value) && substr($value,0,1)!='#') {
00949
00950
00951
00952 if (!$detectedType) $detectedType = ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value) ? 'whitespaced' : 'ms-token';
00953
00954 if ($detectedType=='ms-token') {
00955 list($hexbyte,$utf8) = split('=|:',$value,3);
00956 } elseif ($detectedType=='whitespaced') {
00957 $regA=array();
00958 ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value,$regA);
00959 $hexbyte = $regA[1];
00960 $utf8 = 'U+'.$regA[2];
00961 }
00962 $decval = hexdec(trim($hexbyte));
00963 if ($decval>127) {
00964 $utf8decval = hexdec(substr(trim($utf8),2));
00965 $this->parsedCharsets[$charset]['local'][$decval]=$this->UnumberToChar($utf8decval);
00966 $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]]=$decval;
00967 }
00968 }
00969 }
00970 if ($cacheFile) {
00971 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->parsedCharsets[$charset]));
00972 }
00973 }
00974 return 2;
00975 } else return false;
00976 } else return 1;
00977 }
00978
00988 function initUnicodeData($mode=null) {
00989
00990 $cacheFileCase = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_utf-8.tbl');
00991 $cacheFileASCII = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_utf-8.tbl');
00992
00993
00994 switch($mode) {
00995 case 'case':
00996 if (is_array($this->caseFolding['utf-8'])) return 1;
00997
00998
00999 if ($cacheFileCase && @is_file($cacheFileCase)) {
01000 $this->caseFolding['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileCase));
01001 return 2;
01002 }
01003 break;
01004
01005 case 'ascii':
01006 if (is_array($this->toASCII['utf-8'])) return 1;
01007
01008
01009 if ($cacheFileASCII && @is_file($cacheFileASCII)) {
01010 $this->toASCII['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileASCII));
01011 return 2;
01012 }
01013 break;
01014 }
01015
01016
01017 $unicodeDataFile = PATH_t3lib.'unidata/UnicodeData.txt';
01018 if (!(t3lib_div::validPathStr($unicodeDataFile) && @is_file($unicodeDataFile))) return false;
01019
01020 $fh = fopen($unicodeDataFile,'rb');
01021 if (!$fh) return false;
01022
01023
01024
01025 $this->caseFolding['utf-8'] = array();
01026 $utf8CaseFolding =& $this->caseFolding['utf-8'];
01027 $utf8CaseFolding['toUpper'] = array();
01028 $utf8CaseFolding['toLower'] = array();
01029 $utf8CaseFolding['toTitle'] = array();
01030
01031 $decomposition = array();
01032 $mark = array();
01033 $number = array();
01034 $omit = array();
01035
01036 while (!feof($fh)) {
01037 $line = fgets($fh,4096);
01038
01039 list($char,$name,$cat,,,$decomp,,,$num,,,,$upper,$lower,$title,) = split(';', rtrim($line));
01040
01041 $ord = hexdec($char);
01042 if ($ord > 0xFFFF) break;
01043
01044 $utf8_char = $this->UnumberToChar($ord);
01045
01046 if ($upper) $utf8CaseFolding['toUpper'][$utf8_char] = $this->UnumberToChar(hexdec($upper));
01047 if ($lower) $utf8CaseFolding['toLower'][$utf8_char] = $this->UnumberToChar(hexdec($lower));
01048
01049 if ($title && $title != $upper) $utf8CaseFolding['toTitle'][$utf8_char] = $this->UnumberToChar(hexdec($title));
01050
01051 switch ($cat{0}) {
01052 case 'M':
01053 $mark["U+$char"] = 1;
01054 break;
01055
01056 case 'N':
01057 if ($ord > 0x80 && $num != '') $number["U+$char"] = $num;
01058 }
01059
01060
01061 $match = array();
01062 if (ereg('^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH',$name,$match) && !$decomp) {
01063 $c = ord($match[2]);
01064 if ($match[1] == 'SMALL') $c += 32;
01065
01066 $decomposition["U+$char"] = array(dechex($c));
01067 continue;
01068 }
01069
01070 $match = array();
01071 if (ereg('(<.*>)? *(.+)',$decomp,$match)) {
01072 switch($match[1]) {
01073 case '<circle>':
01074 $match[2] = '0028 '.$match[2].' 0029';
01075 break;
01076
01077 case '<square>':
01078 $match[2] = '005B '.$match[2].' 005D';
01079 break;
01080
01081 case '<compat>':
01082 if (ereg('^0020 ',$match[2])) continue 2;
01083 break;
01084
01085
01086 case '<initial>':
01087 case '<medial>':
01088 case '<final>':
01089 case '<isolated>':
01090 case '<vertical>':
01091 continue 2;
01092 }
01093 $decomposition["U+$char"] = split(' ',$match[2]);
01094 }
01095 }
01096 fclose($fh);
01097
01098
01099 $specialCasingFile = PATH_t3lib.'unidata/SpecialCasing.txt';
01100 if (t3lib_div::validPathStr($specialCasingFile) && @is_file($specialCasingFile)) {
01101 $fh = fopen($specialCasingFile,'rb');
01102 if ($fh) {
01103 while (!feof($fh)) {
01104 $line = fgets($fh,4096);
01105 if ($line{0} != '#' && trim($line) != '') {
01106
01107 list($char,$lower,$title,$upper,$cond) = t3lib_div::trimExplode(';', $line);
01108 if ($cond == '' || $cond{0} == '#') {
01109 $utf8_char = $this->UnumberToChar(hexdec($char));
01110 if ($char != $lower) {
01111 $arr = split(' ',$lower);
01112 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01113 $utf8CaseFolding['toLower'][$utf8_char] = implode('',$arr);
01114 }
01115 if ($char != $title && $title != $upper) {
01116 $arr = split(' ',$title);
01117 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01118 $utf8CaseFolding['toTitle'][$utf8_char] = implode('',$arr);
01119 }
01120 if ($char != $upper) {
01121 $arr = split(' ',$upper);
01122 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01123 $utf8CaseFolding['toUpper'][$utf8_char] = implode('',$arr);
01124 }
01125 }
01126 }
01127 }
01128 fclose($fh);
01129 }
01130 }
01131
01132
01133 $customTranslitFile = PATH_t3lib.'unidata/Translit.txt';
01134 if (t3lib_div::validPathStr($customTranslitFile) && @is_file($customTranslitFile)) {
01135 $fh = fopen($customTranslitFile,'rb');
01136 if ($fh) {
01137 while (!feof($fh)) {
01138 $line = fgets($fh,4096);
01139 if ($line{0} != '#' && trim($line) != '') {
01140 list($char,$translit) = t3lib_div::trimExplode(';', $line);
01141 if (!$translit) $omit["U+$char"] = 1;
01142 $decomposition["U+$char"] = split(' ', $translit);
01143
01144 }
01145 }
01146 fclose($fh);
01147 }
01148 }
01149
01150
01151 foreach($decomposition as $from => $to) {
01152 $code_decomp = array();
01153
01154 while ($code_value = array_shift($to)) {
01155 if (isset($decomposition["U+$code_value"])) {
01156 foreach(array_reverse($decomposition["U+$code_value"]) as $cv) {
01157 array_unshift($to, $cv);
01158 }
01159 } elseif (!isset($mark["U+$code_value"])) {
01160 array_push($code_decomp, $code_value);
01161 }
01162 }
01163 if (count($code_decomp) || isset($omit[$from])) {
01164 $decomposition[$from] = $code_decomp;
01165 } else {
01166 unset($decomposition[$from]);
01167 }
01168 }
01169
01170
01171 $this->toASCII['utf-8'] = array();
01172 $ascii =& $this->toASCII['utf-8'];
01173
01174 foreach($decomposition as $from => $to) {
01175 $code_decomp = array();
01176 while ($code_value = array_shift($to)) {
01177 $ord = hexdec($code_value);
01178 if ($ord > 127)
01179 continue 2;
01180 else
01181 array_push($code_decomp,chr($ord));
01182 }
01183 $ascii[$this->UnumberToChar(hexdec($from))] = join('',$code_decomp);
01184 }
01185
01186
01187 foreach($number as $from => $to) {
01188 $utf8_char = $this->UnumberToChar(hexdec($from));
01189 if (!isset($ascii[$utf8_char])) {
01190 $ascii[$utf8_char] = $to;
01191 }
01192 }
01193
01194 if ($cacheFileCase) {
01195 t3lib_div::writeFileToTypo3tempDir($cacheFileCase,serialize($utf8CaseFolding));
01196 }
01197
01198 if ($cacheFileASCII) {
01199 t3lib_div::writeFileToTypo3tempDir($cacheFileASCII,serialize($ascii));
01200 }
01201
01202 return 3;
01203 }
01204
01213 function initCaseFolding($charset) {
01214
01215 if (is_array($this->caseFolding[$charset])) return 1;
01216
01217
01218 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_'.$charset.'.tbl');
01219 if ($cacheFile && @is_file($cacheFile)) {
01220 $this->caseFolding[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
01221 return 2;
01222 }
01223
01224
01225 if (!$this->initCharset($charset)) {
01226 return false;
01227 }
01228
01229
01230 if (!$this->initUnicodeData('case')) {
01231 return false;
01232 }
01233
01234 $nochar = chr($this->noCharByteVal);
01235 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) {
01236
01237 $c = $this->utf8_decode($utf8, $charset);
01238
01239
01240 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toUpper'][$utf8], $charset);
01241 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toUpper'][$c] = $cc;
01242
01243
01244 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toLower'][$utf8], $charset);
01245 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toLower'][$c] = $cc;
01246
01247
01248 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toTitle'][$utf8], $charset);
01249 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toTitle'][$c] = $cc;
01250 }
01251
01252
01253 for ($i=ord('a'); $i<=ord('z'); $i++) {
01254 $this->caseFolding[$charset]['toUpper'][chr($i)] = chr($i-32);
01255 }
01256 for ($i=ord('A'); $i<=ord('Z'); $i++) {
01257 $this->caseFolding[$charset]['toLower'][chr($i)] = chr($i+32);
01258 }
01259
01260 if ($cacheFile) {
01261 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->caseFolding[$charset]));
01262 }
01263
01264 return 3;
01265 }
01266
01275 function initToASCII($charset) {
01276
01277 if (is_array($this->toASCII[$charset])) return 1;
01278
01279
01280 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_'.$charset.'.tbl');
01281 if ($cacheFile && @is_file($cacheFile)) {
01282 $this->toASCII[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
01283 return 2;
01284 }
01285
01286
01287 if (!$this->initCharset($charset)) {
01288 return false;
01289 }
01290
01291
01292 if (!$this->initUnicodeData('ascii')) {
01293 return false;
01294 }
01295
01296 $nochar = chr($this->noCharByteVal);
01297 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) {
01298
01299 $c = $this->utf8_decode($utf8, $charset);
01300
01301 if (isset($this->toASCII['utf-8'][$utf8])) {
01302 $this->toASCII[$charset][$c] = $this->toASCII['utf-8'][$utf8];
01303 }
01304 }
01305
01306 if ($cacheFile) {
01307 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->toASCII[$charset]));
01308 }
01309
01310 return 3;
01311 }
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01346 function substr($charset,$string,$start,$len=null) {
01347 if ($len===0) return '';
01348
01349 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01350
01351 if ($len==null) {
01352 $enc = mb_internal_encoding();
01353 mb_internal_encoding('utf-8');
01354 $str = mb_substr($string,$start);
01355 mb_internal_encoding($enc);
01356
01357 return $str;
01358 }
01359 else return mb_substr($string,$start,$len,'utf-8');
01360 } elseif ($charset == 'utf-8') {
01361 return $this->utf8_substr($string,$start,$len);
01362 } elseif ($this->eucBasedSets[$charset]) {
01363 return $this->euc_substr($string,$start,$charset,$len);
01364 } elseif ($this->twoByteSets[$charset]) {
01365 return substr($string,$start*2,$len*2);
01366 } elseif ($this->fourByteSets[$charset]) {
01367 return substr($string,$start*4,$len*4);
01368 }
01369
01370
01371 return $len === NULL ? substr($string,$start) : substr($string,$start,$len);
01372 }
01373
01384 function strlen($charset,$string) {
01385 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01386 return mb_strlen($string,$charset);
01387 } elseif ($charset == 'utf-8') {
01388 return $this->utf8_strlen($string);
01389 } elseif ($this->eucBasedSets[$charset]) {
01390 return $this->euc_strlen($string,$charset);
01391 } elseif ($this->twoByteSets[$charset]) {
01392 return strlen($string)/2;
01393 } elseif ($this->fourByteSets[$charset]) {
01394 return strlen($string)/4;
01395 }
01396
01397 return strlen($string);
01398 }
01399
01412 function crop($charset,$string,$len,$crop='') {
01413 if (intval($len) == 0) return $string;
01414
01415 if ($charset == 'utf-8') {
01416 $i = $this->utf8_char2byte_pos($string,$len);
01417 } elseif ($this->eucBasedSets[$charset]) {
01418 $i = $this->euc_char2byte_pos($string,$len,$charset);
01419 } else {
01420 if ($len > 0) {
01421 $i = $len;
01422 } else {
01423 $i = strlen($string)+$len;
01424 if ($i<=0) $i = false;
01425 }
01426 }
01427
01428 if ($i === false) {
01429 return $string;
01430 } else {
01431 if ($len > 0) {
01432 if (strlen($string{$i})) {
01433 return substr($string,0,$i).$crop;
01434
01435 }
01436 } else {
01437 if (strlen($string{$i-1})) {
01438 return $crop.substr($string,$i);
01439 }
01440 }
01441
01442
01443
01444
01445
01446
01447
01448
01449
01450
01451 }
01452 return $string;
01453 }
01454
01465 function strtrunc($charset,$string,$len) {
01466 if ($len <= 0) return '';
01467
01468 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01469 return mb_strcut($string,0,$len,$charset);
01470 } elseif ($charset == 'utf-8') {
01471 return $this->utf8_strtrunc($string,$len);
01472 } elseif ($this->eucBasedSets[$charset]) {
01473 return $this->euc_strtrunc($string,$charset);
01474 } elseif ($this->twoByteSets[$charset]) {
01475 if ($len % 2) $len--;
01476 } elseif ($this->fourByteSets[$charset]) {
01477 $x = $len % 4;
01478 $len -= $x;
01479 }
01480
01481 return substr($string,0,$len);
01482 }
01483
01499 function conv_case($charset,$string,$case) {
01500 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring' && float(phpversion()) >= 4.3) {
01501 if ($case == 'toLower') {
01502 return mb_strtolower($str,'utf-8');
01503 } else {
01504 return mb_strtoupper($str,'utf-8');
01505 }
01506 } elseif ($charset == 'utf-8') {
01507 return $this->utf8_char_mapping($string,'case',$case);
01508 } elseif (isset($this->eucBasedSets[$charset])) {
01509 return $this->euc_char_mapping($string,$charset,'case',$case);
01510 } else {
01511
01512 return $this->sb_char_mapping($string,$charset,'case',$case);
01513 }
01514
01515 return $string;
01516 }
01517
01525 function specCharsToASCII($charset,$string) {
01526 if ($charset == 'utf-8') {
01527 return $this->utf8_char_mapping($string,'ascii');
01528 } elseif (isset($this->eucBasedSets[$charset])) {
01529 return $this->euc_char_mapping($string,$charset,'ascii');
01530 } else {
01531
01532 return $this->sb_char_mapping($string,$charset,'ascii');
01533 }
01534
01535 return $string;
01536 }
01537
01538
01539
01540
01541
01542
01543
01544
01545
01546
01547
01548
01549
01550
01551
01552
01553
01554
01565 function sb_char_mapping($str,$charset,$mode,$opt='') {
01566 switch($mode) {
01567 case 'case':
01568 if (!$this->initCaseFolding($charset)) return $str;
01569 $map =& $this->caseFolding[$charset][$opt];
01570 break;
01571
01572 case 'ascii':
01573 if (!$this->initToASCII($charset)) return $str;
01574 $map =& $this->toASCII[$charset];
01575 break;
01576
01577 default:
01578 return $str;
01579 }
01580
01581 $out = '';
01582 for($i=0; strlen($str{$i}); $i++) {
01583 $c = $str{$i};
01584 if (isset($map[$c])) {
01585 $out .= $map[$c];
01586 } else {
01587 $out .= $c;
01588 }
01589 }
01590
01591 return $out;
01592 }
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608
01620 function utf8_substr($str,$start,$len=null) {
01621 if (!strcmp($len,'0')) return '';
01622
01623 $byte_start = $this->utf8_char2byte_pos($str,$start);
01624 if ($byte_start === false) {
01625 if ($start > 0) {
01626 return false;
01627 } else {
01628 $start = 0;
01629 }
01630 }
01631
01632 $str = substr($str,$byte_start);
01633
01634 if ($len!=null) {
01635 $byte_end = $this->utf8_char2byte_pos($str,$len);
01636 if ($byte_end === false)
01637 return $len<0 ? '' : $str;
01638 else
01639 return substr($str,0,$byte_end);
01640 }
01641 else return $str;
01642 }
01643
01653 function utf8_strlen($str) {
01654 $n=0;
01655 for($i=0; strlen($str{$i}); $i++) {
01656 $c = ord($str{$i});
01657 if (!($c & 0x80))
01658 $n++;
01659 elseif (($c & 0xC0) == 0xC0)
01660 $n++;
01661 }
01662 return $n;
01663 }
01664
01674 function utf8_strtrunc($str,$len) {
01675 $i = $len-1;
01676 if (ord($str{$i}) & 0x80) {
01677 for (; $i>0 && !(ord($str{$i}) & 0x40); $i--) ;
01678 if ($i <= 0) return '';
01679 for ($bc=0, $mbs=ord($str{$i}); $mbs & 0x80; $mbs = $mbs << 1) $bc++;
01680 if ($bc+$i > $len) return substr($str,0,$i);
01681
01682 }
01683 return substr($str,$len);
01684 }
01685
01696 function utf8_strpos($haystack,$needle,$offset=0) {
01697 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01698 return mb_strpos($haystack,$needle,'utf-8');
01699 }
01700
01701 $byte_offset = $this->utf8_char2byte_pos($haystack,$offset);
01702 if ($byte_offset === false) return false;
01703
01704 $byte_pos = strpos($haystack,$needle,$byte_offset);
01705 if ($byte_pos === false) return false;
01706
01707 return $this->utf8_byte2char_pos($haystack,$byte_pos);
01708 }
01709
01719 function utf8_strrpos($haystack,$needle) {
01720 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01721 return mb_strrpos($haystack,$needle,'utf-8');
01722 }
01723
01724 $byte_pos = strrpos($haystack,$needle);
01725 if ($byte_pos === false) return false;
01726
01727 return $this->utf8_byte2char_pos($haystack,$byte_pos);
01728 }
01729
01739 function utf8_char2byte_pos($str,$pos) {
01740 $n = 0;
01741 $p = abs($pos);
01742
01743 if ($pos >= 0) {
01744 $i = 0;
01745 $d = 1;
01746 } else {
01747 $i = strlen($str)-1;
01748 $d = -1;
01749 }
01750
01751 for( ; strlen($str{$i}) && $n<$p; $i+=$d) {
01752 $c = (int)ord($str{$i});
01753 if (!($c & 0x80))
01754 $n++;
01755 elseif (($c & 0xC0) == 0xC0)
01756 $n++;
01757 }
01758 if (!strlen($str{$i})) return false;
01759
01760 if ($pos >= 0) {
01761
01762 while ((ord($str{$i}) & 0x80) && !(ord($str{$i}) & 0x40)) { $i++; }
01763 } else {
01764
01765 $i++;
01766 }
01767
01768 return $i;
01769 }
01770
01780 function utf8_byte2char_pos($str,$pos) {
01781 $n = 0;
01782 for($i=$pos; $i>0; $i--) {
01783 $c = (int)ord($str{$i});
01784 if (!($c & 0x80))
01785 $n++;
01786 elseif (($c & 0xC0) == 0xC0)
01787 $n++;
01788 }
01789 if (!strlen($str{$i})) return false;
01790
01791 return $n;
01792 }
01793
01803 function utf8_char_mapping($str,$mode,$opt='') {
01804 if (!$this->initUnicodeData($mode)) return $str;
01805
01806 $out = '';
01807 switch($mode) {
01808 case 'case':
01809 $map =& $this->caseFolding['utf-8'][$opt];
01810 break;
01811
01812 case 'ascii':
01813 $map =& $this->toASCII['utf-8'];
01814 break;
01815
01816 default:
01817 return $str;
01818 }
01819
01820 for($i=0; strlen($str{$i}); $i++) {
01821 $c = ord($str{$i});
01822 if (!($c & 0x80))
01823 $mbc = $str{$i};
01824 elseif (($c & 0xC0) == 0xC0) {
01825 for ($bc=0; $c & 0x80; $c = $c << 1) { $bc++; }
01826 $mbc = substr($str,$i,$bc);
01827 $i += $bc-1;
01828 }
01829
01830 if (isset($map[$mbc])) {
01831 $out .= $map[$mbc];
01832 } else {
01833 $out .= $mbc;
01834 }
01835 }
01836
01837 return $out;
01838 }
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858
01859
01860
01861
01862
01863
01864
01865
01866
01867
01868
01879 function euc_strtrunc($str,$len,$charset) {
01880 $sjis = ($charset == 'shift_jis');
01881 for ($i=0; strlen($str{$i}) && $i<$len; $i++) {
01882 $c = ord($str{$i});
01883 if ($sjis) {
01884 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++;
01885 }
01886 else {
01887 if ($c >= 0x80) $i++;
01888 }
01889 }
01890 if (!strlen($str{$i})) return $str;
01891
01892 if ($i>$len)
01893 return substr($str,0,$len-1);
01894 else
01895 return substr($str,0,$len);
01896 }
01897
01908 function euc_substr($str,$start,$charset,$len=null) {
01909 $byte_start = $this->euc_char2byte_pos($str,$start,$charset);
01910 if ($byte_start === false) return false;
01911
01912 $str = substr($str,$byte_start);
01913
01914 if ($len!=null) {
01915 $byte_end = $this->euc_char2byte_pos($str,$len,$charset);
01916 if ($byte_end === false)
01917 return $str;
01918 else
01919 return substr($str,0,$byte_end);
01920 }
01921 else return $str;
01922 }
01923
01933 function euc_strlen($str,$charset) {
01934 $sjis = ($charset == 'shift_jis');
01935 $n=0;
01936 for ($i=0; strlen($str{$i}); $i++) {
01937 $c = ord($str{$i});
01938 if ($sjis) {
01939 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++;
01940 }
01941 else {
01942 if ($c >= 0x80) $i++;
01943 }
01944
01945 $n++;
01946 }
01947
01948 return $n;
01949 }
01950
01960 function euc_char2byte_pos($str,$pos,$charset) {
01961 $sjis = ($charset == 'shift_jis');
01962 $n = 0;
01963 $p = abs($pos);
01964
01965 if ($pos >= 0) {
01966 $i = 0;
01967 $d = 1;
01968 } else {
01969 $i = strlen($str)-1;
01970 $d = -1;
01971 }
01972
01973 for ( ; strlen($str{$i}) && $n<$p; $i+=$d) {
01974 $c = ord($str{$i});
01975 if ($sjis) {
01976 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i+=$d;
01977 }
01978 else {
01979 if ($c >= 0x80) $i+=$d;
01980 }
01981
01982 $n++;
01983 }
01984 if (!strlen($str{$i})) return false;
01985
01986 if ($pos < 0) $i++;
01987
01988 return $i;
01989 }
01990
02001 function euc_char_mapping($str,$charset,$mode,$opt='') {
02002 switch($mode) {
02003 case 'case':
02004 if (!$this->initCaseFolding($charset)) return $str;
02005 $map =& $this->caseFolding[$charset][$opt];
02006 break;
02007
02008 case 'ascii':
02009 if (!$this->initToASCII($charset)) return $str;
02010 $map =& $this->toASCII[$charset];
02011 break;
02012
02013 default:
02014 return $str;
02015 }
02016
02017 $sjis = ($charset == 'shift_jis');
02018 $out = '';
02019 for($i=0; strlen($str{$i}); $i++) {
02020 $mbc = $str{$i};
02021 $c = ord($mbc);
02022
02023 if ($sjis) {
02024 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) {
02025 $mbc = substr($str,$i,2);
02026 $i++;
02027 }
02028 }
02029 else {
02030 if ($c >= 0x80) {
02031 $mbc = substr($str,$i,2);
02032 $i++;
02033 }
02034 }
02035
02036 if (isset($map[$mbc])) {
02037 $out .= $map[$mbc];
02038 } else {
02039 $out .= $mbc;
02040 }
02041 }
02042
02043 return $out;
02044 }
02045
02046 }
02047
02048 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']) {
02049 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']);
02050 }
02051 ?>