jenka
01-29-2007, 08:05 AM
hi
i recently went live with new software (php-based 'oscailt'), only to discover that when users contribute their articles, every single or double quote gets a slash put in front of it when published. and when someone edits the article, more slashes are added.
i really don't know php well, so i don't know what regular expression to look for in this code (this is 'contentcleansing.php' - the file that supposedly 'cleanses' newly-published content of bad html before publishing it to the site).
and the developers of the software haven't responded at all....
can anyone tell me where the adding of slashes (\) is taking place in this code??!
---------------------------------------------
function stripShortenedTags($content)
{
$tag = "a";
$content = preg_replace('@<'.$tag.'[^>]*href=\s*[\"\']([^\s]*)[\"\'][^>]*>.*?</'.$tag.'>@si', "\\1", $content);
return $content;
}
function listMessedUpBBCode($clean)
{
global $allowed_bb_code, $system_config;
$problems = array();
$try_bb_code = array_map('strtoupper', $allowed_bb_code);
foreach($try_bb_code as $bb_tag)
{
$clean = strtoupper($clean);
$stripped = strip_tags($clean);//in case its hidden in html attributes
$open_matches = array();
$close_matches = array();
$opening_tag_count = preg_match_all('@\['.$bb_tag.'([^\]]*?)\]@si', $stripped, $open_matches);
$closing_tag_count = preg_match_all('@\[/'.$bb_tag.'\s*?\]@si', $stripped, $close_matches);
if($opening_tag_count != $closing_tag_count)
{
if($closing_tag_count > $opening_tag_count)
{
$problems[] = "Closing Tag [/$bb_tag] with no matching opening tag (x".($closing_tag_count-$opening_tag_count).")";
}
else
{
$problems[] = "Open Tag [$bb_tag] with no matching closing tag (x".($opening_tag_count-$closing_tag_count).")";
}
return $problems;
}
elseif($opening_tag_count == 0) continue;
$start_pos = -1;
$end_pos = -1;
//now check that there is no crap in between
for($i = 0; $i < count($open_matches[0]); $i++)
{
$start_pos = strpos($clean, $open_matches[0][$i], $start_pos+1);
$end_pos = strpos($clean, $close_matches[0][$i], $end_pos+1);
if($start_pos === false or $end_pos === false)
{
$problems[] = "BBCode [$bb_tag] embedded in HTML - This is not allowed";
return $problems;
}
elseif($end_pos < ($start_pos+strlen($open_matches[0][$i])))
{
$problems[] = "BBCode [$bb_tag] incorrectly specified - the start tag needs to come before the end tag!";
return $problems;
}
$filling = trim(substr($clean, $start_pos+strlen($open_matches[0][$i]),$end_pos-($start_pos+strlen($open_matches[0][$i]))));
//look for embedded html
if(strlen($filling) == 0)
{
$problems[] = "BB Tag: [$bb_tag] has empty contents. Tags with empty contents don't do anything except take up space.";
return $problems;
}
$tag_matches = array();
if(preg_match('@<(/?\w+)@', $filling, $tag_matches))
{
$problems[] = "HTML Tag: <".$tag_matches[1][0]."> embedded in BB Tags - This is not allowed";
return $problems;
}
//look for incorrectly embedded bbcode
if($bb_tag === 'LIST')
{
$list_tags = array();
$embedded_count = preg_match_all('@\[(/?[\w\*][^\]]*?)\]@si', $filling, $list_tags);
if($embedded_count == 0)
{
$problems[] = "No List Entries found in BB List. Empty Lists are
not allowed. To place an entry in a list, use the tag";
return $problems;
}
foreach($list_tags[1] as $m)
{
if(trim($m) != '*')
{
$problems[] = "BB Tag: [".$m."] embedded in BB List - This is
not allowed";
}
}
}
else
{
if(strlen($filling) > $system_config->maximum_allowed_bbed_text)
{
$problems[] = "You have embedded too much text inside the BB Tag: [".$bb_tag."] you are only allowed to tag up to $system_config->maximum_allowed_bbed_text characters in one section. The feature is for highlighting small chunks of text, not huge screeds.";
}
if($bb_tag === 'QUOTE')//allowed embedded i's and b's but numbers must match
{
$m = array();
$open_tags = preg_match_all('@\[\s*?\]@si', $filling, $m);
$close_tags = preg_match_all('@\[/[ib]\s*?\]@si', $filling, $m);
if($open_tags != $close_tags)
{
$problems[] = "You have mismatched BB Tags inside BB quotes. You can use the [i] and tags inside quoted text, but you can't embed them and you must close your tags properly by using and after the text that you want highlighted.";
return $problems;
}
$embed_bb_code = array_map('strtoupper', $allowed_bb_code);
foreach($embed_bb_code as $embed_bb_tag)
{
$t = strtoupper($embed_bb_tag);
if($t == 'I' or $t == 'B') continue;
elseif(preg_match('@\[/?'.$t.'\b@si', $filling))
{
$problems[] = "You have embedded the BB Tag [$t] inside the tag . This is not allowed.";
return $problems;
}
}
}
else
{
$embed_bb_code = array_map('strtoupper', $allowed_bb_code);
foreach($embed_bb_code as $embed_bb_tag)
{
if((strpos($filling, '['.$embed_bb_tag) !== false) or (strpos($filling, '[/'.$embed_bb_tag) !== false))
{
$problems[] = "You have embedded the BB Tag [$embed_bb_tag] inside the tag [$bb_tag]. This is not allowed.";
return $problems;
}
}
}
}
}
}
return $problems;
}
function unprocessBBCode($clean)
{
//return $clean;
$clean = preg_replace('@<CITE\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</CITE>@si', "[QUOTE]\\1", $clean);
$clean = preg_replace('@<UL\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</UL>@si', "\\1\n", $clean);
$clean = preg_replace('@<STRONG\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</STRONG>@si', "\\1", $clean);
$clean = preg_replace('@<EM\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</EM>@si', "\\1", $clean);
$clean = preg_replace('@<LI\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</LI>@si', "\n \\1", $clean);
return $clean;
}
function processBBCode($clean)
{
$problems = listMessedUpBBCode($clean);
if(count($problems) > 0)
{
//echo "<P>".htmlspecialchars($clean)."</P>";
}
global $allowed_bb_code, $system_config;
//$problems = array();
$try_bb_code = array_map('strtoupper', $allowed_bb_code);
foreach($try_bb_code as $bb_tag)
{
if($bb_tag == 'LIST')
{
$pattern = '@(\[LIST\b[^\]]*?\])\s*(\[\*[^\]]*?\].*?)(\[/LIST\b[^\]]*?\])@sie';
$clean = preg_replace($pattern, "'<UL class=\"BBINPUT\">'.processListBB('\\2').'</UL>'", $clean);
}
else
{
$patterns = array('@\['.$bb_tag.'\b[^\]]*?\]@si', '@\[/'.$bb_tag.'\b[^\]]*?\]@si');
if($bb_tag == 'I') $r = 'em';
elseif($bb_tag == 'B') $r = 'strong';
elseif($bb_tag == 'QUOTE') $r = 'cite';
$replacements = array("<$r class='BBINPUT'>", "</$r>");
$clean = preg_replace($patterns, $replacements, $clean);
}
}
return $clean;
}
function processListBB($str)
{
$rtn = "";
//echo "<P>processing $str</P>";
$matches = preg_split('@[\*[^\]]*?\]\s*@', $str);
foreach($matches as $match)
{
$x = trim($match);
if($x != "")
{
$rtn .= "<LI class='BBINPUT'>$x</LI>";
}
}
return $rtn;
}
function compactWhiteSpace($clean)
{
//first get rid of multiple non-breaking spaces
$clean = str_replace("\r\n", "\n", $clean);
$clean = str_replace("\r", "\n", $clean);
$clean = preg_replace('/[^\S\n]{1,}/', " ", $clean);
//then get rid of multiple blank lines.
$clean = preg_replace('/\n\s+\n/', "\n\n", $clean);
return $clean;
}
function listUnclosedTags($content, $allowed_tags)
{
global $system_config;
$unc = array();
foreach($allowed_tags as $tag)
{
if($tag == 'br' or $tag == 'img' or $tag == 'hr') continue;
elseif(!$system_config->enforce_strict_tag_closing && ($tag == 'p' or $tag == 'li' or $tag=='td' or $tag == 'tr' or $tag == 'dd' or $tag == 'dt' or $tag == 'thead' or $tag == 'tbody' or $tag == 'tfoot')) continue;
$start_matches = array();
$end_matches = array();
$open_count = preg_match_all('@<'.$tag.'\b[^>]*?>@si', $content, $start_matches);
$close_count = preg_match_all('@</'.$tag.'\b[^>]*?>@si', $content, $end_matches);
if($open_count != $close_count)
{
if($close_count > $open_count)
{
$unc[] = "(".($close_count-$open_count).") Closing Tag </$tag>) with no matching opening tag";
}
else
{
$unc[] = "(".($open_count-$close_count).") Open Tag <$tag> with no matching closing tag";
}
}
}
return $unc;
}
//this is a bottle-neck
function stripTagList($unclean, $tags, $attrs = false)
{
global $performance_test;
$tag_str = '<'.implode("><",$tags).'>';
//while($unclean != strip_tags($unclean, $tag_str))
//{
$unclean = strip_tags($unclean, $tag_str);
$unclean = stripEvilAttributes($unclean);
//}
if($attrs !== false)
{
$replacements = array();
$contained_tags = getContainedTagList($unclean);
foreach($contained_tags as $tag => $occurrences)
{
//echo "<P>processing tag $tag ".htmlspecialchars(implode(":::", $occurrences))."</P>";
if(!isset($attrs[$tag]) or count($attrs[$tag]) == 0)
{
foreach($occurrences as $one_oc)
{
if(!isset($replacements[$one_oc]) && strcasecmp($one_oc, "<$tag>") != 0)
{
$replacements[$one_oc] = "<$tag>";
}
}
}
else
{
$full_strip = false;
$attr_list = $attrs[$tag];
foreach($occurrences as $one_oc)
{
if(!isset($replacements[$one_oc]) && strcasecmp($one_oc, "<$tag>") != 0)
{
//$replacements[$one_oc] = stripNonApprovedAttributes($one_oc, $tag, $attr_list);
$full_strip = true;
break;
}
}
if($full_strip)
{
//echo "<P>stripping attributes from $tag: ".implode("::", $attr_list)."</P>";
$unclean = stripNonApprovedAttributes($unclean, $tag, $attr_list);
}
}
}
if(count($replacements) > 0)
{
$unclean = str_replace(array_keys($replacements), array_values($replacements), $unclean);
}
//foreach($replacements as $orig => $noo)
//{
// echo "<P>replacing $orig with $noo</P>";
// $unclean = str_replace($orig, $noo, $unclean);
//}
/*foreach($tags as $tag)
{
if(!isset($attrs[$tag])) $attr_list = array();
else $attr_list = $attrs[$tag];
if(!is_array($attr_list)) $attr_list = array();
$unclean = stripNonApprovedAttributes($unclean, $tag, $attr_list);
}*/
}
if($performance_test > 7) markTime("tags stripped from content");
return $unclean;
}
function getContainedTagList($html_str)
{
$tags = array();
$reg_ex = '@<([\w]+)[^>]*>@s';
$matches = array();
preg_match_all($reg_ex, $html_str, $matches);
//set up array of tag -> tag string..
if($matches and is_array($matches) and count($matches[1]) > 0)
{
for($i = 0; $i<count($matches[0]); $i++)
{
if(!isset($tags[$matches[1][$i]]))
{
$tags[$matches[1][$i]] = array();
}
$tags[$matches[1][$i]][] = $matches[0][$i];
}
/* foreach($matches[0] as $m)
{
if(!isset($tags[$m[1]]))
{
$tags[$m[1]] = array();
}
$tags[$m[1]] = $m[0];
}*/
}
return $tags;
}
function filterApprovedAttributes($tag_txt, $tag_name, $attrs)
{
return $tag_txt;
}
function stripNonApprovedAttributes($msg,$tag,$attrs)
{
//$msg = stripEvilAttributes($msg);
$lengthfirst = 0;
while (strstr(substr($msg,$lengthfirst),"<$tag ")!="")
{
$rtn = "<$tag";
$imgstart = $lengthfirst + strpos(substr($msg,$lengthfirst), "<$tag ");
$partafterwith = substr($msg,$imgstart);
$img = substr($partafterwith,0,strpos($partafterwith,">")+1);
if(strlen($img) == 0) continue;
foreach($attrs as $attr)
{
//echo "<P>trying attribut $attr in ".htmlspecialchars($img)."</P>";
$matches = array();
if(preg_match('@\b'.$attr.'\b\s*=\s*([\w\"\'].*)@si', $img, $matches))
{
$trailing_bit = $matches[1];
if($trailing_bit{0} == '"')
{
$end = strpos($trailing_bit, '"', 1);
//echo "<b>end is $end</b>";
if($end !== false) $rtn .= " $attr=".substr($trailing_bit, 0, $end+1);
}
elseif($trailing_bit{0} == "'")
{
$end = strpos($trailing_bit, "'", 1);
if($end !== false) $rtn .= " $attr=".substr($trailing_bit, 0, $end+1);
}
else
{
if(preg_match('@\w+@', $trailing_bit, $matches))
{
$rtn .= " $attr".'="'.$matches[0].'"';
}
}
}
}
$rtn .= ">";
$partafter = substr($partafterwith,strpos($partafterwith,">")+1);
$msg = substr($msg,0,$imgstart).$rtn.$partafter;
$lengthfirst = $imgstart+3;
}
return $msg;
}
//black_list_scrubbing
function stripEvilTags($unclean)
{
global $evil_tags;
$count = 0;
//foreach ($evil_tags as $tag)
//{
$tag = "(".implode('|', $evil_tags).")";
$unclean = preg_replace('@<'.$tag.'[^>]*?>@si', "<!-- evil tag removed -->", $unclean);
$unclean = preg_replace('@</'.$tag.'[^>]*?>@si', "<!-- evil tag end removed -->", $unclean);
//}
return $unclean;
}
function stripEvilAttributes($unclean)
{
global $evil_attributes;
$att = implode('|', $evil_attributes);
//foreach($evil_attributes as $att)
//{
//echo "<P>stripping $att</p>";
$unclean = preg_replace('@(<[^>]*?)\b('.$att.')\b([^>]*?>)@si', '\1\3', $unclean);
//}
return $unclean;
}
function containsEvilTags($unclean)
{
global $system_config, $evil_tags, $code_delimiters, $evil_attributes;
$matches = 0;
foreach($evil_tags as $tag)
{
if((preg_match('@<'.$tag.'[^>]*?>@si', $unclean)))
{
if($system_config->security_recording_level > 0)
{
logSecurityMessage("Attempt to use evil tag: $tag");
}
return true;
}
}
foreach($code_delimiters as $delim)
{
if(strpos($unclean, $delim) !== false)
{
if($system_config->security_recording_level > 0)
{
logSecurityMessage("Attempt to use code delimiter: $delim");
}
return true;
}
}
foreach($evil_attributes as $att)
{
if((preg_match('@<[^>]*?'.$att.'[^>]*?>@si', $unclean)))
{
if($system_config->security_recording_level > 0)
{
logSecurityMessage("Attempt to use evil attribute: $att");
}
return true;
}
}
return false;
}
function stripUnclosedTags($unclean, $tag_list)
{}
?>
i recently went live with new software (php-based 'oscailt'), only to discover that when users contribute their articles, every single or double quote gets a slash put in front of it when published. and when someone edits the article, more slashes are added.
i really don't know php well, so i don't know what regular expression to look for in this code (this is 'contentcleansing.php' - the file that supposedly 'cleanses' newly-published content of bad html before publishing it to the site).
and the developers of the software haven't responded at all....
can anyone tell me where the adding of slashes (\) is taking place in this code??!
---------------------------------------------
function stripShortenedTags($content)
{
$tag = "a";
$content = preg_replace('@<'.$tag.'[^>]*href=\s*[\"\']([^\s]*)[\"\'][^>]*>.*?</'.$tag.'>@si', "\\1", $content);
return $content;
}
function listMessedUpBBCode($clean)
{
global $allowed_bb_code, $system_config;
$problems = array();
$try_bb_code = array_map('strtoupper', $allowed_bb_code);
foreach($try_bb_code as $bb_tag)
{
$clean = strtoupper($clean);
$stripped = strip_tags($clean);//in case its hidden in html attributes
$open_matches = array();
$close_matches = array();
$opening_tag_count = preg_match_all('@\['.$bb_tag.'([^\]]*?)\]@si', $stripped, $open_matches);
$closing_tag_count = preg_match_all('@\[/'.$bb_tag.'\s*?\]@si', $stripped, $close_matches);
if($opening_tag_count != $closing_tag_count)
{
if($closing_tag_count > $opening_tag_count)
{
$problems[] = "Closing Tag [/$bb_tag] with no matching opening tag (x".($closing_tag_count-$opening_tag_count).")";
}
else
{
$problems[] = "Open Tag [$bb_tag] with no matching closing tag (x".($opening_tag_count-$closing_tag_count).")";
}
return $problems;
}
elseif($opening_tag_count == 0) continue;
$start_pos = -1;
$end_pos = -1;
//now check that there is no crap in between
for($i = 0; $i < count($open_matches[0]); $i++)
{
$start_pos = strpos($clean, $open_matches[0][$i], $start_pos+1);
$end_pos = strpos($clean, $close_matches[0][$i], $end_pos+1);
if($start_pos === false or $end_pos === false)
{
$problems[] = "BBCode [$bb_tag] embedded in HTML - This is not allowed";
return $problems;
}
elseif($end_pos < ($start_pos+strlen($open_matches[0][$i])))
{
$problems[] = "BBCode [$bb_tag] incorrectly specified - the start tag needs to come before the end tag!";
return $problems;
}
$filling = trim(substr($clean, $start_pos+strlen($open_matches[0][$i]),$end_pos-($start_pos+strlen($open_matches[0][$i]))));
//look for embedded html
if(strlen($filling) == 0)
{
$problems[] = "BB Tag: [$bb_tag] has empty contents. Tags with empty contents don't do anything except take up space.";
return $problems;
}
$tag_matches = array();
if(preg_match('@<(/?\w+)@', $filling, $tag_matches))
{
$problems[] = "HTML Tag: <".$tag_matches[1][0]."> embedded in BB Tags - This is not allowed";
return $problems;
}
//look for incorrectly embedded bbcode
if($bb_tag === 'LIST')
{
$list_tags = array();
$embedded_count = preg_match_all('@\[(/?[\w\*][^\]]*?)\]@si', $filling, $list_tags);
if($embedded_count == 0)
{
$problems[] = "No List Entries found in BB List. Empty Lists are
not allowed. To place an entry in a list, use the tag";
return $problems;
}
foreach($list_tags[1] as $m)
{
if(trim($m) != '*')
{
$problems[] = "BB Tag: [".$m."] embedded in BB List - This is
not allowed";
}
}
}
else
{
if(strlen($filling) > $system_config->maximum_allowed_bbed_text)
{
$problems[] = "You have embedded too much text inside the BB Tag: [".$bb_tag."] you are only allowed to tag up to $system_config->maximum_allowed_bbed_text characters in one section. The feature is for highlighting small chunks of text, not huge screeds.";
}
if($bb_tag === 'QUOTE')//allowed embedded i's and b's but numbers must match
{
$m = array();
$open_tags = preg_match_all('@\[\s*?\]@si', $filling, $m);
$close_tags = preg_match_all('@\[/[ib]\s*?\]@si', $filling, $m);
if($open_tags != $close_tags)
{
$problems[] = "You have mismatched BB Tags inside BB quotes. You can use the [i] and tags inside quoted text, but you can't embed them and you must close your tags properly by using and after the text that you want highlighted.";
return $problems;
}
$embed_bb_code = array_map('strtoupper', $allowed_bb_code);
foreach($embed_bb_code as $embed_bb_tag)
{
$t = strtoupper($embed_bb_tag);
if($t == 'I' or $t == 'B') continue;
elseif(preg_match('@\[/?'.$t.'\b@si', $filling))
{
$problems[] = "You have embedded the BB Tag [$t] inside the tag . This is not allowed.";
return $problems;
}
}
}
else
{
$embed_bb_code = array_map('strtoupper', $allowed_bb_code);
foreach($embed_bb_code as $embed_bb_tag)
{
if((strpos($filling, '['.$embed_bb_tag) !== false) or (strpos($filling, '[/'.$embed_bb_tag) !== false))
{
$problems[] = "You have embedded the BB Tag [$embed_bb_tag] inside the tag [$bb_tag]. This is not allowed.";
return $problems;
}
}
}
}
}
}
return $problems;
}
function unprocessBBCode($clean)
{
//return $clean;
$clean = preg_replace('@<CITE\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</CITE>@si', "[QUOTE]\\1", $clean);
$clean = preg_replace('@<UL\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</UL>@si', "\\1\n", $clean);
$clean = preg_replace('@<STRONG\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</STRONG>@si', "\\1", $clean);
$clean = preg_replace('@<EM\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</EM>@si', "\\1", $clean);
$clean = preg_replace('@<LI\s*class=[\'"]?BBINPUT[\'"]?>(.*?)</LI>@si', "\n \\1", $clean);
return $clean;
}
function processBBCode($clean)
{
$problems = listMessedUpBBCode($clean);
if(count($problems) > 0)
{
//echo "<P>".htmlspecialchars($clean)."</P>";
}
global $allowed_bb_code, $system_config;
//$problems = array();
$try_bb_code = array_map('strtoupper', $allowed_bb_code);
foreach($try_bb_code as $bb_tag)
{
if($bb_tag == 'LIST')
{
$pattern = '@(\[LIST\b[^\]]*?\])\s*(\[\*[^\]]*?\].*?)(\[/LIST\b[^\]]*?\])@sie';
$clean = preg_replace($pattern, "'<UL class=\"BBINPUT\">'.processListBB('\\2').'</UL>'", $clean);
}
else
{
$patterns = array('@\['.$bb_tag.'\b[^\]]*?\]@si', '@\[/'.$bb_tag.'\b[^\]]*?\]@si');
if($bb_tag == 'I') $r = 'em';
elseif($bb_tag == 'B') $r = 'strong';
elseif($bb_tag == 'QUOTE') $r = 'cite';
$replacements = array("<$r class='BBINPUT'>", "</$r>");
$clean = preg_replace($patterns, $replacements, $clean);
}
}
return $clean;
}
function processListBB($str)
{
$rtn = "";
//echo "<P>processing $str</P>";
$matches = preg_split('@[\*[^\]]*?\]\s*@', $str);
foreach($matches as $match)
{
$x = trim($match);
if($x != "")
{
$rtn .= "<LI class='BBINPUT'>$x</LI>";
}
}
return $rtn;
}
function compactWhiteSpace($clean)
{
//first get rid of multiple non-breaking spaces
$clean = str_replace("\r\n", "\n", $clean);
$clean = str_replace("\r", "\n", $clean);
$clean = preg_replace('/[^\S\n]{1,}/', " ", $clean);
//then get rid of multiple blank lines.
$clean = preg_replace('/\n\s+\n/', "\n\n", $clean);
return $clean;
}
function listUnclosedTags($content, $allowed_tags)
{
global $system_config;
$unc = array();
foreach($allowed_tags as $tag)
{
if($tag == 'br' or $tag == 'img' or $tag == 'hr') continue;
elseif(!$system_config->enforce_strict_tag_closing && ($tag == 'p' or $tag == 'li' or $tag=='td' or $tag == 'tr' or $tag == 'dd' or $tag == 'dt' or $tag == 'thead' or $tag == 'tbody' or $tag == 'tfoot')) continue;
$start_matches = array();
$end_matches = array();
$open_count = preg_match_all('@<'.$tag.'\b[^>]*?>@si', $content, $start_matches);
$close_count = preg_match_all('@</'.$tag.'\b[^>]*?>@si', $content, $end_matches);
if($open_count != $close_count)
{
if($close_count > $open_count)
{
$unc[] = "(".($close_count-$open_count).") Closing Tag </$tag>) with no matching opening tag";
}
else
{
$unc[] = "(".($open_count-$close_count).") Open Tag <$tag> with no matching closing tag";
}
}
}
return $unc;
}
//this is a bottle-neck
function stripTagList($unclean, $tags, $attrs = false)
{
global $performance_test;
$tag_str = '<'.implode("><",$tags).'>';
//while($unclean != strip_tags($unclean, $tag_str))
//{
$unclean = strip_tags($unclean, $tag_str);
$unclean = stripEvilAttributes($unclean);
//}
if($attrs !== false)
{
$replacements = array();
$contained_tags = getContainedTagList($unclean);
foreach($contained_tags as $tag => $occurrences)
{
//echo "<P>processing tag $tag ".htmlspecialchars(implode(":::", $occurrences))."</P>";
if(!isset($attrs[$tag]) or count($attrs[$tag]) == 0)
{
foreach($occurrences as $one_oc)
{
if(!isset($replacements[$one_oc]) && strcasecmp($one_oc, "<$tag>") != 0)
{
$replacements[$one_oc] = "<$tag>";
}
}
}
else
{
$full_strip = false;
$attr_list = $attrs[$tag];
foreach($occurrences as $one_oc)
{
if(!isset($replacements[$one_oc]) && strcasecmp($one_oc, "<$tag>") != 0)
{
//$replacements[$one_oc] = stripNonApprovedAttributes($one_oc, $tag, $attr_list);
$full_strip = true;
break;
}
}
if($full_strip)
{
//echo "<P>stripping attributes from $tag: ".implode("::", $attr_list)."</P>";
$unclean = stripNonApprovedAttributes($unclean, $tag, $attr_list);
}
}
}
if(count($replacements) > 0)
{
$unclean = str_replace(array_keys($replacements), array_values($replacements), $unclean);
}
//foreach($replacements as $orig => $noo)
//{
// echo "<P>replacing $orig with $noo</P>";
// $unclean = str_replace($orig, $noo, $unclean);
//}
/*foreach($tags as $tag)
{
if(!isset($attrs[$tag])) $attr_list = array();
else $attr_list = $attrs[$tag];
if(!is_array($attr_list)) $attr_list = array();
$unclean = stripNonApprovedAttributes($unclean, $tag, $attr_list);
}*/
}
if($performance_test > 7) markTime("tags stripped from content");
return $unclean;
}
function getContainedTagList($html_str)
{
$tags = array();
$reg_ex = '@<([\w]+)[^>]*>@s';
$matches = array();
preg_match_all($reg_ex, $html_str, $matches);
//set up array of tag -> tag string..
if($matches and is_array($matches) and count($matches[1]) > 0)
{
for($i = 0; $i<count($matches[0]); $i++)
{
if(!isset($tags[$matches[1][$i]]))
{
$tags[$matches[1][$i]] = array();
}
$tags[$matches[1][$i]][] = $matches[0][$i];
}
/* foreach($matches[0] as $m)
{
if(!isset($tags[$m[1]]))
{
$tags[$m[1]] = array();
}
$tags[$m[1]] = $m[0];
}*/
}
return $tags;
}
function filterApprovedAttributes($tag_txt, $tag_name, $attrs)
{
return $tag_txt;
}
function stripNonApprovedAttributes($msg,$tag,$attrs)
{
//$msg = stripEvilAttributes($msg);
$lengthfirst = 0;
while (strstr(substr($msg,$lengthfirst),"<$tag ")!="")
{
$rtn = "<$tag";
$imgstart = $lengthfirst + strpos(substr($msg,$lengthfirst), "<$tag ");
$partafterwith = substr($msg,$imgstart);
$img = substr($partafterwith,0,strpos($partafterwith,">")+1);
if(strlen($img) == 0) continue;
foreach($attrs as $attr)
{
//echo "<P>trying attribut $attr in ".htmlspecialchars($img)."</P>";
$matches = array();
if(preg_match('@\b'.$attr.'\b\s*=\s*([\w\"\'].*)@si', $img, $matches))
{
$trailing_bit = $matches[1];
if($trailing_bit{0} == '"')
{
$end = strpos($trailing_bit, '"', 1);
//echo "<b>end is $end</b>";
if($end !== false) $rtn .= " $attr=".substr($trailing_bit, 0, $end+1);
}
elseif($trailing_bit{0} == "'")
{
$end = strpos($trailing_bit, "'", 1);
if($end !== false) $rtn .= " $attr=".substr($trailing_bit, 0, $end+1);
}
else
{
if(preg_match('@\w+@', $trailing_bit, $matches))
{
$rtn .= " $attr".'="'.$matches[0].'"';
}
}
}
}
$rtn .= ">";
$partafter = substr($partafterwith,strpos($partafterwith,">")+1);
$msg = substr($msg,0,$imgstart).$rtn.$partafter;
$lengthfirst = $imgstart+3;
}
return $msg;
}
//black_list_scrubbing
function stripEvilTags($unclean)
{
global $evil_tags;
$count = 0;
//foreach ($evil_tags as $tag)
//{
$tag = "(".implode('|', $evil_tags).")";
$unclean = preg_replace('@<'.$tag.'[^>]*?>@si', "<!-- evil tag removed -->", $unclean);
$unclean = preg_replace('@</'.$tag.'[^>]*?>@si', "<!-- evil tag end removed -->", $unclean);
//}
return $unclean;
}
function stripEvilAttributes($unclean)
{
global $evil_attributes;
$att = implode('|', $evil_attributes);
//foreach($evil_attributes as $att)
//{
//echo "<P>stripping $att</p>";
$unclean = preg_replace('@(<[^>]*?)\b('.$att.')\b([^>]*?>)@si', '\1\3', $unclean);
//}
return $unclean;
}
function containsEvilTags($unclean)
{
global $system_config, $evil_tags, $code_delimiters, $evil_attributes;
$matches = 0;
foreach($evil_tags as $tag)
{
if((preg_match('@<'.$tag.'[^>]*?>@si', $unclean)))
{
if($system_config->security_recording_level > 0)
{
logSecurityMessage("Attempt to use evil tag: $tag");
}
return true;
}
}
foreach($code_delimiters as $delim)
{
if(strpos($unclean, $delim) !== false)
{
if($system_config->security_recording_level > 0)
{
logSecurityMessage("Attempt to use code delimiter: $delim");
}
return true;
}
}
foreach($evil_attributes as $att)
{
if((preg_match('@<[^>]*?'.$att.'[^>]*?>@si', $unclean)))
{
if($system_config->security_recording_level > 0)
{
logSecurityMessage("Attempt to use evil attribute: $att");
}
return true;
}
}
return false;
}
function stripUnclosedTags($unclean, $tag_list)
{}
?>