Main Page | Directories | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages | Examples

class.t3lib_parsehtml.php

Go to the documentation of this file.
00001 <?php
00002 /***************************************************************
00003 *  Copyright notice
00004 *
00005 *  (c) 1999-2004 Kasper Skaarhoj (kasperYYYY@typo3.com)
00006 *  All rights reserved
00007 *
00008 *  This script is part of the TYPO3 project. The TYPO3 project is
00009 *  free software; you can redistribute it and/or modify
00010 *  it under the terms of the GNU General Public License as published by
00011 *  the Free Software Foundation; either version 2 of the License, or
00012 *  (at your option) any later version.
00013 *
00014 *  The GNU General Public License can be found at
00015 *  http://www.gnu.org/copyleft/gpl.html.
00016 *  A copy is found in the textfile GPL.txt and important notices to the license
00017 *  from the author is found in LICENSE.txt distributed with these scripts.
00018 *
00019 *
00020 *  This script is distributed in the hope that it will be useful,
00021 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023 *  GNU General Public License for more details.
00024 *
00025 *  This copyright notice MUST APPEAR in all copies of the script!
00026 ***************************************************************/
00106 class t3lib_parsehtml {
00107    var $caseShift_cache=array();
00108 
00109 
00110    // *******************************************'
00111    // COPY FROM class.tslib_content.php: / BEGIN
00112    // substituteSubpart
00113    // Cleaned locally 2/2003 !!!! (so different from tslib_content version)
00114    // *******************************************'
00115 
00123    function getSubpart($content, $marker) {
00124       if ($marker && strstr($content,$marker))  {
00125          $start = strpos($content, $marker)+strlen($marker);
00126          $stop = @strpos($content, $marker, $start+1);
00127          $sub = substr($content, $start, $stop-$start);
00128 
00129          $reg=Array();
00130          ereg('^[^<]*-->',$sub,$reg);
00131          $start+=strlen($reg[0]);
00132 
00133          $reg=Array();
00134          ereg('<!--[^>]*$',$sub,$reg);
00135          $stop-=strlen($reg[0]);
00136 
00137          return substr($content, $start, $stop-$start);
00138       }
00139    }
00140 
00151    function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)   {
00152       $start = strpos($content, $marker);
00153       $stop = @strpos($content, $marker, $start+1)+strlen($marker);
00154       if ($start && $stop>$start)   {
00155          // code before
00156          $before = substr($content, 0, $start);
00157          $reg=Array();
00158          ereg('<!--[^>]*$',$before,$reg);
00159          $start-=strlen($reg[0]);
00160          if ($keepMarker)  {
00161             $reg_k=Array();
00162             if ($reg[0])   ereg('^[^>]*-->',substr($content,$start),$reg_k);
00163             $before_marker = substr($content, $start, strlen($reg_k[0]?$reg_k[0]:$marker));
00164          }
00165          $before = substr($content, 0, $start);
00166             // code after
00167          $after = substr($content, $stop);
00168          $reg=Array();
00169          ereg('^[^<]*-->',$after,$reg);
00170          $stop+=strlen($reg[0]);
00171          if ($keepMarker)  {
00172             $reg_k=Array();
00173             if ($reg[0])   ereg('<!--[^<]*$',substr($content,0,$stop),$reg_k);
00174             $sLen = strlen($reg_k[0]?$reg_k[0]:$marker);
00175             $after_marker = substr($content, $stop-$sLen,$sLen);
00176          }
00177          $after = substr($content, $stop);
00178 
00179 
00180             // replace?
00181          if (is_array($subpartContent))   {
00182             $substContent=$subpartContent[0].$this->getSubpart($content,$marker).$subpartContent[1];
00183          } else {
00184             $substContent=$subpartContent;
00185          }
00186 
00187          if ($recursive && strpos($after, $marker))   {
00188             return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$this->substituteSubpart($after,$marker,$subpartContent);
00189          } else {
00190             return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$after;
00191          }
00192       } else {
00193          return $content;
00194       }
00195    }
00196    // *******************************************'
00197    // COPY FROM class.tslib_content.php: / END
00198    // *******************************************'
00199 
00200 
00201 
00202 
00203 
00204 
00205 
00206    /************************************
00207     *
00208     * Parsing HTML code
00209     *
00210     ************************************/
00211 
00223    function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)   {
00224       $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
00225       $regexStr = '</?('.implode('|',$tags).')(>|[[:space:]][^>]*>)';
00226 
00227       $parts = spliti($regexStr,$content);
00228 
00229       $newParts=array();
00230       $pointer=strlen($parts[0]);
00231       $buffer=$parts[0];
00232       $nested=0;
00233       reset($parts);
00234       next($parts);
00235       while(list($k,$v)=each($parts))  {
00236          $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
00237          $tagLen = strcspn(substr($content,$pointer),'>')+1;
00238 
00239          if (!$isEndTag)   {  // We meet a start-tag:
00240             if (!$nested)  {  // Ground level:
00241                $newParts[]=$buffer; // previous buffer stored
00242                $buffer='';
00243             }
00244             $nested++;  // We are inside now!
00245             $mbuffer=substr($content,$pointer,strlen($v)+$tagLen);   // New buffer set and pointer increased
00246             $pointer+=strlen($mbuffer);
00247             $buffer.=$mbuffer;
00248          } else { // If we meet an endtag:
00249             $nested--;  // decrease nested-level
00250             $eliminated=0;
00251             if ($eliminateExtraEndTags && $nested<0)  {
00252                $nested=0;
00253                $eliminated=1;
00254             } else {
00255                $buffer.=substr($content,$pointer,$tagLen);  // In any case, add the endtag to current buffer and increase pointer
00256             }
00257             $pointer+=$tagLen;
00258             if (!$nested && !$eliminated) {  // if we're back on ground level, (and not by eliminating tags...
00259                $newParts[]=$buffer;
00260                $buffer='';
00261             }
00262             $mbuffer=substr($content,$pointer,strlen($v));  // New buffer set and pointer increased
00263             $pointer+=strlen($mbuffer);
00264             $buffer.=$mbuffer;
00265          }
00266 
00267       }
00268       $newParts[]=$buffer;
00269       return $newParts;
00270    }
00271 
00284    function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0) {
00285       $parts = $this->splitIntoBlock($tag,$content,TRUE);
00286       foreach($parts as $k => $v)   {
00287          if ($k%2)   {
00288             $firstTagName = $this->getFirstTagName($v, TRUE);
00289             $tagsArray = array();
00290             $tagsArray['tag_start'] = $this->getFirstTag($v);
00291             $tagsArray['tag_end'] = '</'.$firstTagName.'>';
00292             $tagsArray['tag_name'] = strtolower($firstTagName);
00293             $tagsArray['add_level'] = 1;
00294             $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
00295 
00296             if ($callBackTags)   $tagsArray = $procObj->$callBackTags($tagsArray,$level);
00297 
00298             $parts[$k] =
00299                $tagsArray['tag_start'].
00300                $tagsArray['content'].
00301                $tagsArray['tag_end'];
00302          } else {
00303             if ($callBackContent)   $parts[$k] = $procObj->$callBackContent($parts[$k],$level);
00304          }
00305       }
00306 
00307       return implode('',$parts);
00308    }
00309 
00320    function splitTags($tag,$content)   {
00321       $tags = t3lib_div::trimExplode(',',$tag,1);
00322       $regexStr = '<('.implode('|',$tags).')(>|\/>|[[:space:]][^>]*>)';
00323       $parts = spliti($regexStr,$content);
00324 
00325       $pointer = strlen($parts[0]);
00326       $newParts = array();
00327       $newParts[] = $parts[0];
00328       reset($parts);
00329       next($parts);
00330       while(list($k,$v)=each($parts))  {
00331          $tagLen = strcspn(substr($content,$pointer),'>')+1;
00332 
00333             // Set tag:
00334          $tag = substr($content,$pointer,$tagLen); // New buffer set and pointer increased
00335          $newParts[] = $tag;
00336          $pointer+= strlen($tag);
00337 
00338             // Set content:
00339          $newParts[] = $v;
00340          $pointer+= strlen($v);
00341       }
00342       return $newParts;
00343    }
00344 
00354    function getAllParts($parts,$tag_parts=1,$include_tag=1) {
00355       reset($parts);
00356       $newParts=array();
00357       while(list($k,$v)=each($parts))  {
00358          if (($k+($tag_parts?0:1))%2)  {
00359             if (!$include_tag)   $v=$this->removeFirstAndLastTag($v);
00360             $newParts[]=$v;
00361          }
00362       }
00363       return $newParts;
00364    }
00365 
00373    function removeFirstAndLastTag($str)   {
00374          // First:
00375       $endLen = strcspn($str,'>')+1;
00376       $str = substr($str,$endLen);
00377          // Last:
00378       $str = strrev($str);
00379       $endLen = strcspn($str,'<')+1;
00380       $str = substr($str,$endLen);
00381          // return
00382       return strrev($str);
00383    }
00384 
00392    function getFirstTag($str) {
00393          // First:
00394       $endLen = strcspn($str,'>')+1;
00395       $str = substr($str,0,$endLen);
00396       return $str;
00397    }
00398 
00407    function getFirstTagName($str,$preserveCase=FALSE) {
00408       list($tag) = split('[[:space:]]',substr(trim($this->getFirstTag($str)),1,-1), 2);
00409       if (!$preserveCase)  $tag = strtoupper($tag);
00410 
00411       return trim($tag);
00412    }
00413 
00422    function get_tag_attributes($tag,$deHSC=0)   {
00423       list($components,$metaC) = $this->split_tag_attributes($tag);
00424       $name = '';  // attribute name is stored here
00425       $valuemode = '';
00426       $attributes = array();
00427       $attributesMeta = array();
00428       if (is_array($components)) {
00429          while (list($key,$val) = each ($components)) {
00430             if ($val != '=')  {  // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
00431                if ($valuemode)   {
00432                   if ($name)  {
00433                      $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
00434                      $attributesMeta[$name]['dashType']=$metaC[$key];
00435                      $name = '';
00436                   }
00437                } else {
00438                   if ($namekey = ereg_replace('[^a-zA-Z0-9_:-]','',$val))  {
00439                      $name = strtolower($namekey);
00440                      $attributesMeta[$name]=array();
00441                      $attributesMeta[$name]['origTag']=$namekey;
00442                      $attributes[$name] = '';
00443                   }
00444                }
00445                $valuemode = '';
00446             } else {
00447                $valuemode = 'on';
00448             }
00449          }
00450          if (is_array($attributes)) reset($attributes);
00451          return array($attributes,$attributesMeta);
00452       }
00453    }
00454 
00464    function split_tag_attributes($tag) {
00465       $tag_tmp = trim(eregi_replace ('^<[^[:space:]]*','',trim($tag)));
00466          // Removes any > in the end of the string
00467       $tag_tmp = trim(eregi_replace ('>$','',$tag_tmp));
00468 
00469       $metaValue = array();
00470       $value = array();
00471       while (strcmp($tag_tmp,''))   {  // Compared with empty string instead , 030102
00472          $firstChar=substr($tag_tmp,0,1);
00473          if (!strcmp($firstChar,'"') || !strcmp($firstChar,"'"))  {
00474             $reg=explode($firstChar,$tag_tmp,3);
00475             $value[]=$reg[1];
00476             $metaValue[]=$firstChar;
00477             $tag_tmp=trim($reg[2]);
00478          } elseif (!strcmp($firstChar,'=')) {
00479             $value[] = '=';
00480             $metaValue[]='';
00481             $tag_tmp = trim(substr($tag_tmp,1));      // Removes = chars.
00482          } else {
00483                // There are '' around the value. We look for the next ' ' or '>'
00484             $reg = split('[[:space:]=]',$tag_tmp,2);
00485             $value[] = trim($reg[0]);
00486             $metaValue[]='';
00487             $tag_tmp = trim(substr($tag_tmp,strlen($reg[0]),1).$reg[1]);
00488          }
00489       }
00490       if (is_array($value))   reset($value);
00491       return array($value,$metaValue);
00492    }
00493 
00507    function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area') {
00508       $content = strtolower($content);
00509       $analyzedOutput=array();
00510       $analyzedOutput['counts']=array();  // Counts appearances of start-tags
00511       $analyzedOutput['errors']=array();  // Lists ERRORS
00512       $analyzedOutput['warnings']=array();   // Lists warnings.
00513       $analyzedOutput['blocks']=array();  // Lists stats for block-tags
00514       $analyzedOutput['solo']=array(); // Lists stats for solo-tags
00515 
00516          // Block tags, must have endings...
00517       $blockTags = explode(',',$blockTags);
00518       foreach($blockTags as $tagName)  {
00519          $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
00520          $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
00521          $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
00522          if ($countBegin)  $analyzedOutput['counts'][$tagName]=$countBegin;
00523          if ($countBegin-$countEnd) {
00524             if ($countBegin-$countEnd > 0)   {
00525                $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
00526             } else {
00527                $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
00528             }
00529          }
00530       }
00531 
00532          // Solo tags, must NOT have endings...
00533       $soloTags = explode(',',$soloTags);
00534       foreach($soloTags as $tagName)   {
00535          $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
00536          $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
00537          $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
00538          if ($countBegin)  $analyzedOutput['counts'][$tagName]=$countBegin;
00539          if ($countEnd) {
00540             $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
00541          }
00542       }
00543 
00544       return $analyzedOutput;
00545    }
00546 
00547 
00548 
00549 
00550 
00551 
00552 
00553 
00554 
00555 
00556 
00557 
00558    /*********************************
00559     *
00560     * Clean HTML code
00561     *
00562     *********************************/
00563 
00600    function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())  {
00601       $newContent = array();
00602       $tokArr = explode('<',$content);
00603       $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
00604       next($tokArr);
00605 
00606       $c = 1;
00607       $tagRegister = array();
00608       $tagStack = array();
00609       while(list(,$tok)=each($tokArr)) {
00610          $firstChar = substr($tok,0,1);
00611 #        if (strcmp(trim($firstChar),'')) {     // It is a tag...
00612          if (ereg('[[:alnum:]\/]',$firstChar))  {     // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
00613             $tagEnd = strcspn($tok,'>');
00614             if (strlen($tok)!=$tagEnd) {  // If there is and end-bracket...
00615                $endTag = $firstChar=='/' ? 1 : 0;
00616                $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00617                $tagParts = split('[[:space:]]',$tagContent,2);
00618                $tagName = strtolower($tagParts[0]);
00619                if (isset($tags[$tagName]))   {
00620                   if (is_array($tags[$tagName]))   {  // If there is processing to do for the tag:
00621 
00622                      if (!$endTag)  {  // If NOT an endtag, do attribute processing (added dec. 2003)
00623                            // Override attributes
00624                         if (strcmp($tags[$tagName]['overrideAttribs'],'')) {
00625                            $tagParts[1]=$tags[$tagName]['overrideAttribs'];
00626                         }
00627 
00628                            // Allowed tags
00629                         if (strcmp($tags[$tagName]['allowedAttribs'],''))  {
00630                            if (!strcmp($tags[$tagName]['allowedAttribs'],'0'))   {  // No attribs allowed
00631                               $tagParts[1]='';
00632                            } elseif (trim($tagParts[1])) {
00633                               $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00634                               $tagParts[1]='';
00635                               $newTagAttrib = array();
00636                               $tList = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
00637                               while(list(,$allowTag)=each($tList))   {
00638                                  if (isset($tagAttrib[0][$allowTag]))   $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
00639                               }
00640                               $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
00641                            }
00642                         }
00643 
00644                            // Fixed attrib values
00645                         if (is_array($tags[$tagName]['fixAttrib']))  {
00646                            $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00647                            $tagParts[1]='';
00648                            reset($tags[$tagName]['fixAttrib']);
00649                            while(list($attr,$params)=each($tags[$tagName]['fixAttrib']))  {
00650                               if (strlen($params['set']))   $tagAttrib[0][$attr] = $params['set'];
00651                               if (strlen($params['unset'])) unset($tagAttrib[0][$attr]);
00652                               if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr]))   $tagAttrib[0][$attr]=$params['default'];
00653                               if ($params['always'] || isset($tagAttrib[0][$attr])) {
00654                                  if ($params['trim']) {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
00655                                  if ($params['intval'])  {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
00656                                  if ($params['lower'])   {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
00657                                  if ($params['upper'])   {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
00658                                  if ($params['range'])   {
00659                                     if (isset($params['range'][1]))  {
00660                                        $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
00661                                     } else {
00662                                        $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
00663                                     }
00664                                  }
00665                                  if (is_array($params['list']))   {
00666                                     if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName))) $tagAttrib[0][$attr]=$params['list'][0];
00667                                  }
00668                                  if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],'')))   {
00669                                     unset($tagAttrib[0][$attr]);
00670                                  }
00671                                  if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp']))) {
00672                                     unset($tagAttrib[0][$attr]);
00673                                  }
00674                                  if ($params['prefixLocalAnchors'])  {
00675                                     if (substr($tagAttrib[0][$attr],0,1)=='#')   {
00676                                        $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
00677                                        $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
00678                                        if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL')))      {
00679                                           $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
00680                                        }
00681                                     }
00682                                  }
00683                                  if ($params['prefixRelPathWith'])   {
00684                                     $urlParts = parse_url($tagAttrib[0][$attr]);
00685                                     if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/')   {  // If it is NOT an absolute URL (by http: or starting "/")
00686                                        $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
00687                                     }
00688                                  }
00689                                  if ($params['userFunc'])   {
00690                                     $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
00691                                  }
00692                               }
00693                            }
00694                            $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
00695                         }
00696                      } else { // If endTag, remove any possible attributes:
00697                         $tagParts[1]='';
00698                      }
00699 
00700                         // Protecting the tag by converting < and > to &lt; and &gt; ??
00701                      if ($tags[$tagName]['protect'])  {
00702                         $lt = '&lt;';  $gt = '&gt;';
00703                      } else {
00704                         $lt = '<';  $gt = '>';
00705                      }
00706                         // Remapping tag name?
00707                      if ($tags[$tagName]['remap']) $tagParts[0] = $tags[$tagName]['remap'];
00708 
00709                         // rmTagIfNoAttrib
00710                      if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib'])  {
00711                         $setTag=1;
00712 
00713                         if ($tags[$tagName]['nesting'])  {
00714                            if (!is_array($tagRegister[$tagName])) $tagRegister[$tagName]=array();
00715 
00716                            if ($endTag)   {
00717 /*                            if ($tags[$tagName]['nesting']=='global') {
00718                                  $lastEl = end($tagStack);
00719                                  $correctTag = !strcmp($tagName,$lastEl);
00720                               } else $correctTag=1;
00721    */
00722                               $correctTag=1;
00723                               if ($tags[$tagName]['nesting']=='global') {
00724                                  $lastEl = end($tagStack);
00725                                  if (strcmp($tagName,$lastEl)) {
00726                                     if (in_array($tagName,$tagStack))   {
00727                                        while(count($tagStack) && strcmp($tagName,$lastEl))   {
00728                                           $elPos = end($tagRegister[$lastEl]);
00729                                           unset($newContent[$elPos]);
00730 
00731                                           array_pop($tagRegister[$lastEl]);
00732                                           array_pop($tagStack);
00733                                           $lastEl = end($tagStack);
00734                                        }
00735                                     } else {
00736                                        $correctTag=0; // In this case the
00737                                     }
00738                                  }
00739                               }
00740                               if (!count($tagRegister[$tagName]) || !$correctTag)   {
00741                                  $setTag=0;
00742                               } else {
00743                                  array_pop($tagRegister[$tagName]);
00744                                  if ($tags[$tagName]['nesting']=='global') {array_pop($tagStack);}
00745                               }
00746                            } else {
00747                               array_push($tagRegister[$tagName],$c);
00748                               if ($tags[$tagName]['nesting']=='global') {array_push($tagStack,$tagName);}
00749                            }
00750                         }
00751 
00752                         if ($setTag)   {
00753                               // Setting the tag
00754                            $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='&lt;');
00755                         }
00756                      }
00757                   } else {
00758                      $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
00759                   }
00760                } elseif ($keepAll) {   // This is if the tag was not defined in the array for processing:
00761                   if (!strcmp($keepAll,'protect')) {
00762                      $lt = '&lt;';  $gt = '&gt;';
00763                   } else {
00764                      $lt = '<';  $gt = '>';
00765                   }
00766                   $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='&lt;');
00767                }
00768                $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
00769             } else {
00770                $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);   // There were not end-bracket, so no tag...
00771             }
00772          } else {
00773             $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);   // It was not a tag anyways
00774          }
00775       }
00776 
00777          // Unsetting tags:
00778       reset($tagRegister);
00779       while(list($tag,$positions)=each($tagRegister)) {
00780          reset($positions);
00781          while(list(,$pKey)=each($positions))   {
00782             unset($newContent[$pKey]);
00783          }
00784       }
00785 
00786       return implode('',$newContent);
00787    }
00788 
00796    function bidir_htmlspecialchars($value,$dir) {
00797       if ($dir==1)   {
00798          $value = htmlspecialchars($value);
00799       } elseif ($dir==2)   {
00800          $value = t3lib_div::deHSCentities(htmlspecialchars($value));
00801       } elseif ($dir==-1) {
00802          $value = str_replace('&gt;','>',$value);
00803          $value = str_replace('&lt;','<',$value);
00804          $value = str_replace('&quot;','"',$value);
00805          $value = str_replace('&amp;','&',$value);
00806       }
00807       return $value;
00808    }
00809 
00818    function prefixResourcePath($main_prefix,$content,$alternatives=array())   {
00819 
00820       $parts = $this->splitTags('td,table,body,img,input,form,link,script,a',$content);
00821       foreach($parts as $k => $v)   {
00822          if ($k%2)   {
00823             $params = $this->get_tag_attributes($v,1);
00824             $tagEnd = substr($v,-2)=='/>' ? ' />' : '>'; // Detect tag-ending so that it is re-applied correctly.
00825             $firstTagName = $this->getFirstTagName($v);  // The 'name' of the first tag
00826             $somethingDone=0;
00827             $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
00828             switch(strtolower($firstTagName))   {
00829                   // background - attribute:
00830                case 'td':
00831                case 'body':
00832                case 'table':
00833                   $src = $params[0]['background'];
00834                   if ($src)   {
00835                      $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background']);
00836                      $somethingDone=1;
00837                   }
00838                break;
00839                   // src attribute
00840                case 'img':
00841                case 'input':
00842                case 'script':
00843                   $src = $params[0]['src'];
00844                   if ($src)   {
00845                      $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src']);
00846                      $somethingDone=1;
00847                   }
00848                break;
00849                case 'link':
00850                case 'a':
00851                   $src = $params[0]['href'];
00852                   if ($src)   {
00853                      $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href']);
00854                      $somethingDone=1;
00855                   }
00856                break;
00857                   // action attribute
00858                case 'form':
00859                   $src = $params[0]['action'];
00860                   if ($src)   {
00861                      $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action']);
00862                      $somethingDone=1;
00863                   }
00864                break;
00865             }
00866             if ($somethingDone)  {
00867                $tagParts = split('[[:space:]]',$v,2);
00868                $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
00869                $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).
00870                            $tagEnd;
00871             }
00872          }
00873       }
00874       $content = implode('',$parts);
00875 
00876          // Fix <style> section:
00877       $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
00878       if (strlen($prefix)) {
00879          $parts = $this->splitIntoBlock('style',$content);
00880          foreach($parts as $k => $v)   {
00881             if ($k%2)   {
00882                $parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2\3',$parts[$k]);
00883             }
00884          }
00885          $content = implode('',$parts);
00886       }
00887 
00888       return $content;
00889    }
00890 
00899    function prefixRelPath($prefix,$srcVal)   {
00900       $pU = parse_url($srcVal);
00901       if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/')  { // If not an absolute URL.
00902          $srcVal = $prefix.$srcVal;
00903       }
00904       return $srcVal;
00905    }
00906 
00917    function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)  {
00918       $fontSplit = $this->splitIntoBlock('font',$value); // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
00919       reset($fontSplit);
00920       while(list($k,$v)=each($fontSplit)) {
00921          if ($k%2)   {  // font:
00922             $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
00923             $newAttribs=array();
00924             if ($keepFace && $attribArray['face']) $newAttribs[]='face="'.$attribArray['face'].'"';
00925             if ($keepSize && $attribArray['size']) $newAttribs[]='size="'.$attribArray['size'].'"';
00926             if ($keepColor && $attribArray['color'])  $newAttribs[]='color="'.$attribArray['color'].'"';
00927 
00928             $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
00929             if (count($newAttribs)) {
00930                $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
00931             } else {
00932                $fontSplit[$k]=$innerContent;
00933             }
00934          }
00935       }
00936       return implode('',$fontSplit);
00937    }
00938 
00948    function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')   {
00949 
00950       foreach($tags as $from => $to)   {
00951          $value = eregi_replace($ltChar.$from.'>',$ltChar2.$to.'>',$value);
00952          $value = eregi_replace($ltChar.$from.'[[:space:]]([^>]*)>',$ltChar2.$to.' \\1>',$value);
00953          $value = eregi_replace($ltChar.'\/'.$from.'[^>]*>',$ltChar2.'/'.$to.'>',$value);
00954       }
00955       return $value;
00956    }
00957 
00965    function unprotectTags($content,$tagList='') {
00966       $tagsArray = t3lib_div::trimExplode(',',$tagList,1);
00967       $contentParts = explode('&lt;',$content);
00968       next($contentParts); // bypass the first
00969       while(list($k,$tok)=each($contentParts))  {
00970          $firstChar = substr($tok,0,1);
00971          if (strcmp(trim($firstChar),'')) {
00972             $subparts = explode('&gt;',$tok,2);
00973             $tagEnd = strlen($subparts[0]);
00974             if (strlen($tok)!=$tagEnd) {
00975                $endTag = $firstChar=='/' ? 1 : 0;
00976                $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00977                $tagParts = split('[[:space:]]',$tagContent,2);
00978                $tagName = strtolower($tagParts[0]);
00979                if (!strcmp($tagList,'') || in_array($tagName,$tagsArray))  {
00980                   $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
00981                } else $contentParts[$k] = '&lt;'.$tok;
00982             } else $contentParts[$k] = '&lt;'.$tok;
00983          } else $contentParts[$k] = '&lt;'.$tok;
00984       }
00985 
00986       return implode('',$contentParts);
00987    }
00988 
00998    function stripTagsExcept($value,$tagList) {
00999       $tags=t3lib_div::trimExplode(',',$tagList,1);
01000       $forthArr=array();
01001       $backArr=array();
01002       while(list(,$theTag)=each($tags))   {
01003          $forthArr[$theTag]=md5($theTag);
01004          $backArr[md5($theTag)]=$theTag;
01005       }
01006          $value = $this->mapTags($value,$forthArr,'<','_');
01007          $value=strip_tags($value);
01008          $value = $this->mapTags($value,$backArr,'_','<');
01009       return $value;
01010    }
01011 
01021    function caseShift($str,$flag,$cacheKey='')  {
01022       if (is_array($str))  {
01023          if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey]))   {
01024             reset($str);
01025             while(list($k)=each($str)) {
01026                $str[$k] = strtoupper($str[$k]);
01027             }
01028             if ($cacheKey) $this->caseShift_cache[$cacheKey]=$str;
01029          } else {
01030             $str = $this->caseShift_cache[$cacheKey];
01031          }
01032       } elseif (!$flag) $str = strtoupper($str);
01033       return $str;
01034    }
01035 
01045    function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)  {
01046       $accu=array();
01047       reset($tagAttrib);
01048       while(list($k,$v)=each($tagAttrib)) {
01049          if ($xhtmlClean)  {
01050             $attr=strtolower($k);
01051             if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
01052                $attr.='="'.htmlspecialchars($v).'"';
01053             }
01054          } else {
01055             $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
01056             if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
01057                $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
01058                $attr.='='.$dash.$v.$dash;
01059             }
01060          }
01061          $accu[]=$attr;
01062       }
01063       return implode(' ',$accu);
01064    }
01065 
01074    function get_tag_attributes_classic($tag,$deHSC=0) {
01075       $attr=$this->get_tag_attributes($tag,$deHSC);
01076       return is_array($attr[0])?$attr[0]:array();
01077    }
01078 
01087    function indentLines($content, $number=1, $indentChar="\t") {
01088       $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
01089       $lines = explode(chr(10),str_replace(chr(13),'',$content));
01090       while(list($k,$v) = each($lines))   {
01091          $lines[$k] = $preTab.$v;
01092       }
01093       return implode(chr(10), $lines);
01094    }
01095 
01104    function HTMLparserConfig($TSconfig,$keepTags=array())   {
01105          // Allow tags (base list, merged with incoming array)
01106       $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
01107       $keepTags = array_merge($alTags,$keepTags);
01108 
01109          // Set config properties.
01110       if (is_array($TSconfig['tags.']))   {
01111          reset($TSconfig['tags.']);
01112          while(list($key,$tagC)=each($TSconfig['tags.']))   {
01113             if (!is_array($tagC) && $key==strtolower($key)) {
01114                if (!strcmp($tagC,'0')) unset($keepTags[$key]);
01115                if (!strcmp($tagC,'1') && !isset($keepTags[$key])) $keepTags[$key]=1;
01116             }
01117          }
01118 
01119          reset($TSconfig['tags.']);
01120          while(list($key,$tagC)=each($TSconfig['tags.']))   {
01121             if (is_array($tagC) && $key==strtolower($key))  {
01122                $key=substr($key,0,-1);
01123                if (!is_array($keepTags[$key]))  $keepTags[$key]=array();
01124                if (is_array($tagC['fixAttrib.']))  {
01125                   reset($tagC['fixAttrib.']);
01126                   while(list($atName,$atConfig)=each($tagC['fixAttrib.'])) {
01127                      if (is_array($atConfig))   {
01128                         $atName=substr($atName,0,-1);
01129                         if (!is_array($keepTags[$key]['fixAttrib'][$atName])) {
01130                            $keepTags[$key]['fixAttrib'][$atName]=array();
01131                         }
01132                         $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig);    // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01133                         if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],'')) $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
01134                         if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],''))  $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
01135                      }
01136                   }
01137                }
01138                unset($tagC['fixAttrib.']);
01139                unset($tagC['fixAttrib']);
01140                $keepTags[$key] = array_merge($keepTags[$key],$tagC);       // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01141             }
01142          }
01143       }
01144          // localNesting
01145       if ($TSconfig['localNesting'])   {
01146          $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
01147          while(list(,$tn)=each($lN))   {
01148             if (isset($keepTags[$tn])) {
01149                $keepTags[$tn]['nesting']=1;
01150             }
01151          }
01152       }
01153       if ($TSconfig['globalNesting'])  {
01154          $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
01155          while(list(,$tn)=each($lN))   {
01156             if (isset($keepTags[$tn])) {
01157                if (!is_array($keepTags[$tn]))   $keepTags[$tn]=array();
01158                $keepTags[$tn]['nesting']='global';
01159             }
01160          }
01161       }
01162       if ($TSconfig['rmTagIfNoAttrib'])   {
01163          $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
01164          while(list(,$tn)=each($lN))   {
01165             if (isset($keepTags[$tn])) {
01166                if (!is_array($keepTags[$tn]))   $keepTags[$tn]=array();
01167                $keepTags[$tn]['rmTagIfNoAttrib']=1;
01168             }
01169          }
01170       }
01171       if ($TSconfig['noAttrib']) {
01172          $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
01173          while(list(,$tn)=each($lN))   {
01174             if (isset($keepTags[$tn])) {
01175                if (!is_array($keepTags[$tn]))   $keepTags[$tn]=array();
01176                $keepTags[$tn]['allowedAttribs']=0;
01177             }
01178          }
01179       }
01180       if ($TSconfig['removeTags'])  {
01181          $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
01182          while(list(,$tn)=each($lN))   {
01183             $keepTags[$tn]=array();
01184             $keepTags[$tn]['allowedAttribs']=0;
01185             $keepTags[$tn]['rmTagIfNoAttrib']=1;
01186          }
01187       }
01188 
01189          // Create additional configuration:
01190       $addConfig=array();
01191       if ($TSconfig['xhtml_cleaning']) {
01192          $addConfig['xhtml']=1;
01193       }
01194 
01195       return array(
01196          $keepTags,
01197          ''.$TSconfig['keepNonMatchedTags'],
01198          intval($TSconfig['htmlSpecialChars']),
01199          $addConfig
01200       );
01201    }
01202 
01228    function XHTML_clean($content)   {
01229       $content = $this->HTMLcleaner(
01230          $content,
01231          array(), // No tags treated specially
01232          1,       // Keep ALL tags.
01233          0,       // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
01234          array('xhtml' => 1)
01235       );
01236       return $content;
01237    }
01238 
01250    function processTag($value,$conf,$endTag,$protected=0)   {
01251          // Return immediately if protected or no parameters
01252       if ($protected || !count($conf)) return $value;
01253          // OK then, begin processing for XHTML output:
01254          // STILL VERY EXPERIMENTAL!!
01255       if ($conf['xhtml'])  {
01256          if ($endTag)   {  // Endtags are just set lowercase right away
01257             $value = strtolower($value);
01258          } elseif (substr($value,0,2)!='<!') {  // ... and comments are ignored.
01259             $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1));   // Finding inner value with out < >
01260             list($tagName,$tagP)=split('[[:space:]]',$inValue,2); // Separate attributes and tagname
01261             $tagName = strtolower($tagName);
01262 
01263                // Process attributes
01264             $tagAttrib = $this->get_tag_attributes($tagP);
01265             if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt']))      $tagAttrib[0]['alt']='';   // Set alt attribute for all images (not XHTML though...)
01266             if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type']))  $tagAttrib[0]['type']='text/javascript';  // Set type attribute for all script-tags
01267             $outA=array();
01268             reset($tagAttrib[0]);
01269             while(list($attrib_name,$attrib_value)=each($tagAttrib[0])) {
01270                   // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
01271                $outA[]=$attrib_name.'="'.htmlspecialchars($this->bidir_htmlspecialchars($attrib_value,-1)).'"';
01272             }
01273             $newTag='<'.trim($tagName.' '.implode(' ',$outA));
01274                // All tags that are standalone (not wrapping, not having endtags) should be ended with '/>'
01275             if (t3lib_div::inList('img,br,hr,meta,link,base,area,input',$tagName) || substr($value,-2)=='/>')  {
01276                $newTag.=' />';
01277             } else {
01278                $newTag.='>';
01279             }
01280             $value = $newTag;
01281          }
01282       }
01283 
01284       return $value;
01285    }
01286 
01296    function processContent($value,$dir,$conf)   {
01297       if ($dir!=0)   $value = $this->bidir_htmlspecialchars($value,$dir);
01298       return $value;
01299    }
01300 }
01301 
01302 
01303 
01304 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php'])  {
01305    include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
01306 }
01307 ?>

Generated on Sun Oct 3 01:05:48 2004 for TYPO3core 3.7.0 dev by  doxygen 1.3.8-20040913