You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
374 lines
13 KiB
374 lines
13 KiB
<?php
|
|
|
|
/**
|
|
* Xss过滤
|
|
*/
|
|
|
|
namespace libraries;
|
|
|
|
class Xss {
|
|
|
|
/**
|
|
* 允许的标签
|
|
* 先剔除不允许的,再过滤允许的
|
|
*
|
|
* 以 a 标签为例:
|
|
* 1. 允许a的所有 ['a'] => ''
|
|
* 2. 不允许a的 style ['a'] => array('disallowed' => array('style'))
|
|
* 3. 只允许a的 href rel ['a'] = array('allowed' => array('href', 'rel'))
|
|
*
|
|
*/
|
|
protected $allowedTags = array();
|
|
|
|
/**
|
|
* 允许的style属性
|
|
*/
|
|
protected $allowedStyleProperties = array();
|
|
|
|
/**
|
|
* 允许的style域名
|
|
*/
|
|
protected $allowedStyleDomain = array();
|
|
|
|
|
|
|
|
/**
|
|
* 执行Xss filter
|
|
* @param string $string 字符
|
|
* @param array $allowedTags array('a'=>array()) 允许的标签
|
|
* @param array $allowedStyleProperties array('font-size','font-weight') 允许的属性
|
|
*/
|
|
public function filter($string, $allowedTags = array(), $allowedStyleProperties = array()) {
|
|
//非UTF8编码直接置空
|
|
if (!$this->isUTF8($string)) {
|
|
return '';
|
|
}
|
|
//设置tags
|
|
$this->setAllowedTags($allowedTags);
|
|
$this->setAllowedStyleProperties($allowedStyleProperties);
|
|
//去除结尾符
|
|
$string = str_replace(chr(0), '', $string);
|
|
//去除Netscape JS
|
|
$string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
|
|
//转义&
|
|
$string = str_replace('&', '&', $string);
|
|
//反转&
|
|
$string = preg_replace('/&#([0-9]+;)/', '&#\1', $string);
|
|
$string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
|
|
$string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
|
|
//回调处理
|
|
return preg_replace_callback('%
|
|
(
|
|
<(?=[^a-zA-Z!/]) # a lone <
|
|
| # or
|
|
<!--.*?--> # a comment
|
|
| # or
|
|
<[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string
|
|
| # or
|
|
> # just a >
|
|
)%x', array($this, 'split'), $string);
|
|
}
|
|
|
|
/**
|
|
* 分析标签
|
|
*/
|
|
public function split($matches) {
|
|
$string = $matches[1];
|
|
//单个 < >
|
|
if (substr($string, 0, 1) != '<') {
|
|
return '>';
|
|
} elseif (strlen($string) == 1) {
|
|
return '<';
|
|
}
|
|
//匹配分析
|
|
if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->)$%', $string, $matches)) {
|
|
return '';
|
|
}
|
|
$slash = trim($matches[1]);
|
|
$elem = &$matches[2];
|
|
$attrlist = &$matches[3];
|
|
$comment = &$matches[4];
|
|
$elem = strtolower($elem);
|
|
//注释头
|
|
if ($comment) {
|
|
$elem = '!--';
|
|
}
|
|
//不在允许标签范围
|
|
if (!isset($this->allowedTags[$elem])) {
|
|
return '';
|
|
}
|
|
//允许注释并且是注释就直接返回
|
|
if ($comment) {
|
|
return $comment;
|
|
}
|
|
//是闭合标签直接返回
|
|
if ($slash != '') {
|
|
return "</$elem>";
|
|
}
|
|
//自闭合标签
|
|
$attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
|
|
$xhtml_slash = $count ? ' /' : '';
|
|
//清理属性
|
|
if (($attr2 = $this->attributes($attrlist, $elem)) === false) {
|
|
return '';
|
|
}
|
|
$attr2 = implode(' ', $attr2);
|
|
$attr2 = preg_replace('/[<>]/', '', $attr2);
|
|
$attr2 = strlen($attr2) ? ' ' . $attr2 : '';
|
|
|
|
return "<$elem$attr2$xhtml_slash>";
|
|
}
|
|
|
|
/**
|
|
* 清理属性
|
|
*/
|
|
public function attributes($attributes, $elem = '') {
|
|
$return = array();
|
|
$mode = 0;
|
|
$attrname = '';
|
|
$skip = false;
|
|
while (strlen($attributes) != 0) {
|
|
$working = 0;
|
|
switch ($mode) {
|
|
//属性名
|
|
case 0:
|
|
if (preg_match('/^([-a-zA-Z]+)/', $attributes, $match)) {
|
|
$working = 1;
|
|
$mode = 1;
|
|
$attrname = strtolower($match[1]);
|
|
$skip = substr($attrname, 0, 2) == 'on';
|
|
$attributes = preg_replace('/^[-a-zA-Z]+/', '', $attributes);
|
|
}
|
|
break;
|
|
//单个的属性值
|
|
case 1:
|
|
if (preg_match('/^\s*=\s*/', $attributes)) {
|
|
$working = 1;
|
|
$mode = 2;
|
|
$attributes = preg_replace('/^\s*=\s*/', '', $attributes);
|
|
break;
|
|
}
|
|
if (preg_match('/^\s+/', $attributes)) {
|
|
$working = 1;
|
|
$mode = 0;
|
|
if (!$skip) {
|
|
$return[$attrname] = array();
|
|
}
|
|
$attributes = preg_replace('/^\s+/', '', $attributes);
|
|
}
|
|
break;
|
|
//属性值
|
|
case 2:
|
|
if (preg_match('/^"([^"]*)"(\s+|$)/', $attributes, $match)) {
|
|
$working = 1;
|
|
$mode = 0;
|
|
if (!$skip) {
|
|
$return[$attrname] = array(
|
|
'value' => $match[1],
|
|
'delimiter' => '"',
|
|
);
|
|
}
|
|
$attributes = preg_replace('/^"[^"]*"(\s+|$)/', '', $attributes);
|
|
break;
|
|
}
|
|
if (preg_match("/^'([^']*)'(\s+|$)/", $attributes, $match)) {
|
|
$working = 1;
|
|
$mode = 0;
|
|
if (!$skip) {
|
|
$return[$attrname] = array(
|
|
'value' => $match[1],
|
|
'delimiter' => "'",
|
|
);
|
|
}
|
|
$attributes = preg_replace("/^'[^']*'(\s+|$)/", '', $attributes);
|
|
break;
|
|
}
|
|
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attributes, $match)) {
|
|
$working = 1;
|
|
$mode = 0;
|
|
if (!$skip) {
|
|
$return[$attrname] = array(
|
|
'value' => $match[1],
|
|
'delimiter' => '"',
|
|
);
|
|
}
|
|
$attributes = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attributes);
|
|
}
|
|
break;
|
|
}
|
|
//没有匹配到的,直接过滤
|
|
if ($working == 0) {
|
|
$attributes = preg_replace('/
|
|
^
|
|
(
|
|
"[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
|
|
| # or
|
|
\'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
|
|
| # or
|
|
\S # - a non-whitespace character
|
|
)* # any number of the above three
|
|
\s* # any number of whitespaces
|
|
/x', '', $attributes);
|
|
$mode = 0;
|
|
}
|
|
}
|
|
if ($mode == 1 && !$skip) {
|
|
$return[$attrname] = array();
|
|
}
|
|
//执行属性的清理
|
|
$tag = isset($this->allowedTags[$elem]) ? $this->allowedTags[$elem] : array();
|
|
foreach ($return as $name => $info) {
|
|
if (!isset($info['value'])) {
|
|
continue;
|
|
}
|
|
//去掉不允许的
|
|
if (isset($tag['disallowed']) && in_array($name, $tag['disallowed'])) {
|
|
unset($return[$name]);
|
|
continue;
|
|
}
|
|
//只留允许的
|
|
if (isset($tag['allowed']) && !in_array($name, $tag['allowed'])) {
|
|
unset($return[$name]);
|
|
continue;
|
|
}
|
|
//对style深度清理
|
|
if ($name == 'style') {
|
|
$sanitized_properties = array();
|
|
$properties = array_filter(array_map('trim', explode(';', $this->decodeEntities($info['value']))));
|
|
foreach ($properties as $property) {
|
|
if (!preg_match('#^([a-zA-Z][-a-zA-Z]*)\s*:\s*(.*)$#', $property, $property_matches)) {
|
|
continue;
|
|
}
|
|
$property_name = strtolower($property_matches[1]);
|
|
$property_value = &$property_matches[2];
|
|
if (!isset($this->allowedStyleProperties[$property_name])) {
|
|
continue;
|
|
}
|
|
if (strpos($property_value, 'url(') !== false) {
|
|
if (!preg_match('`url\(\s*(([\'"]?)(?:[^)]|(?<=\\\\)\\))+[\'"]?)\s*\)`', $property_value, $url) || empty($url[1])) {
|
|
continue;
|
|
}
|
|
if (!empty($url[2])) {
|
|
if (substr($url[1], -1) != $url[2]) {
|
|
continue;
|
|
}
|
|
$url[1] = substr($url[1], 1, -1);
|
|
}
|
|
$url = preg_replace('`\\\\([(),\'"\s])`', '\1', $url[1]);
|
|
if ($this->filterBadProtocol($url) != $url) {
|
|
continue;
|
|
}
|
|
if (!preg_match('`^/[^/]+`', $url)) {
|
|
$match = false;
|
|
foreach ($this->allowedStyleDomain as $reg) {
|
|
if (preg_match($reg, $url)) {
|
|
$match = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!$match) {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
$sanitized_properties[] = $property_name . ':' . $this->checkPlain($property_value);
|
|
}
|
|
if (empty($sanitized_properties)) {
|
|
unset($return[$name]);
|
|
continue;
|
|
}
|
|
$info['value'] = implode('; ', $sanitized_properties);
|
|
}
|
|
else {
|
|
$info['value'] = $this->filterBadProtocol($info['value']);
|
|
}
|
|
|
|
$return[$name] = $name . '=' . $info['delimiter'] . $info['value'] . $info['delimiter'];
|
|
}
|
|
|
|
return $return;
|
|
}
|
|
|
|
//设置允许的标签
|
|
public function setAllowedTags($tags) {
|
|
foreach ($tags as $k => $tag) {
|
|
if (is_int($k) && is_string($tag)) {
|
|
unset($tags[$k]);
|
|
$tags[$tag] = array();
|
|
}
|
|
}
|
|
$this->allowedTags = $tags;
|
|
}
|
|
|
|
//设置允许的style属性
|
|
public function setAllowedStyleProperties($properties) {
|
|
$this->allowedStyleProperties = array_flip($properties);
|
|
}
|
|
|
|
//设置允许的style domain
|
|
public function setAllowedStyleDomain($domain) {
|
|
if (is_string($domain)) {
|
|
$this->allowedStyleDomain[] = '`^(https?://|//)' . $domain . '`i';
|
|
} elseif (is_array($domain)) {
|
|
foreach ($domain as $d) {
|
|
$this->allowedStyleDomain[] = '`^(https?://|//)' . $d . '`i';
|
|
}
|
|
}
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* 是否为UTF-8编码
|
|
*/
|
|
public function isUTF8($text) {
|
|
if (strlen($text) == 0) {
|
|
return true;
|
|
}
|
|
return (preg_match('/^./us', $text) == 1);
|
|
}
|
|
|
|
/**
|
|
* 过滤链接协议字符
|
|
*/
|
|
public function filterBadProtocol($string) {
|
|
$string = static::decodeEntities($string);
|
|
return static::checkPlain($this->stripDangerousProtocols($string));
|
|
}
|
|
|
|
/**
|
|
* 过滤非法Uri
|
|
*/
|
|
public function stripDangerousProtocols($uri) {
|
|
$allowed_protocols = array('http' => true, 'https' => true);
|
|
do {
|
|
$before = $uri;
|
|
$colonpos = strpos($uri, ':');
|
|
if ($colonpos > 0) {
|
|
$protocol = substr($uri, 0, $colonpos);
|
|
if (preg_match('![/?#]!', $protocol)) {
|
|
break;
|
|
}
|
|
if (!isset($allowed_protocols[strtolower($protocol)])) {
|
|
$uri = substr($uri, $colonpos + 1);
|
|
}
|
|
}
|
|
} while ($before != $uri);
|
|
|
|
return $uri;
|
|
}
|
|
|
|
/**
|
|
* 转义HTML
|
|
*/
|
|
public function checkPlain($text) {
|
|
return htmlspecialchars($text, ENT_QUOTES, 'UTF-8');
|
|
}
|
|
|
|
/**
|
|
* 反转义HTML
|
|
*/
|
|
public function decodeEntities($text) {
|
|
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
|
|
}
|
|
|
|
}
|
|
|