Commit 9ad29e96 authored by Robert Lyon's avatar Robert Lyon Committed by Aaron Wells

Updating HTMLPurifier to version 4.6.0 (Bug #1266976)

Change-Id: I1a6145ca25ecde56c83f2540d04874e973e926f6
Signed-off-by: Robert Lyon's avatarRobert Lyon <robertl@catalyst.net.nz>
parent d6eea1d5
......@@ -14,7 +14,8 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
spl_autoload_register('__autoload');
}
} elseif (!function_exists('__autoload')) {
function __autoload($class) {
function __autoload($class)
{
return HTMLPurifier_Bootstrap::autoload($class);
}
}
......
<?php
if (!defined('HTMLPURIFIER_PREFIX')) {
define('HTMLPURIFIER_PREFIX', __DIR__);
}
......@@ -8,11 +8,13 @@
/**
* Purify HTML.
* @param $html String HTML to purify
* @param $config Configuration to use, can be any value accepted by
* @param string $html String HTML to purify
* @param mixed $config Configuration to use, can be any value accepted by
* HTMLPurifier_Config::create()
* @return string
*/
function HTMLPurifier($html, $config = null) {
function HTMLPurifier($html, $config = null)
{
static $purifier = false;
if (!$purifier) {
$purifier = new HTMLPurifier();
......
......@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.4.0
* @version 4.6.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
......@@ -19,6 +19,7 @@
*/
require 'HTMLPurifier.php';
require 'HTMLPurifier/Arborize.php';
require 'HTMLPurifier/AttrCollections.php';
require 'HTMLPurifier/AttrDef.php';
require 'HTMLPurifier/AttrTransform.php';
......@@ -54,9 +55,11 @@ require 'HTMLPurifier/Language.php';
require 'HTMLPurifier/LanguageFactory.php';
require 'HTMLPurifier/Length.php';
require 'HTMLPurifier/Lexer.php';
require 'HTMLPurifier/Node.php';
require 'HTMLPurifier/PercentEncoder.php';
require 'HTMLPurifier/PropertyList.php';
require 'HTMLPurifier/PropertyListIterator.php';
require 'HTMLPurifier/Queue.php';
require 'HTMLPurifier/Strategy.php';
require 'HTMLPurifier/StringHash.php';
require 'HTMLPurifier/StringHashParser.php';
......@@ -72,6 +75,7 @@ require 'HTMLPurifier/URISchemeRegistry.php';
require 'HTMLPurifier/UnitConverter.php';
require 'HTMLPurifier/VarParser.php';
require 'HTMLPurifier/VarParserException.php';
require 'HTMLPurifier/Zipper.php';
require 'HTMLPurifier/AttrDef/CSS.php';
require 'HTMLPurifier/AttrDef/Clone.php';
require 'HTMLPurifier/AttrDef/Enum.php';
......@@ -165,6 +169,7 @@ require 'HTMLPurifier/HTMLModule/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Ruby.php';
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
require 'HTMLPurifier/HTMLModule/SafeObject.php';
require 'HTMLPurifier/HTMLModule/SafeScripting.php';
require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
......@@ -188,6 +193,9 @@ require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
require 'HTMLPurifier/Node/Comment.php';
require 'HTMLPurifier/Node/Element.php';
require 'HTMLPurifier/Node/Text.php';
require 'HTMLPurifier/Strategy/Composite.php';
require 'HTMLPurifier/Strategy/Core.php';
require 'HTMLPurifier/Strategy/FixNesting.php';
......
......@@ -7,7 +7,8 @@
require_once dirname(__FILE__) . '/HTMLPurifier.auto.php';
function kses($string, $allowed_html, $allowed_protocols = null) {
function kses($string, $allowed_html, $allowed_protocols = null)
{
$config = HTMLPurifier_Config::createDefault();
$allowed_elements = array();
$allowed_attributes = array();
......@@ -19,7 +20,6 @@ function kses($string, $allowed_html, $allowed_protocols = null) {
}
$config->set('HTML.AllowedElements', $allowed_elements);
$config->set('HTML.AllowedAttributes', $allowed_attributes);
$allowed_schemes = array();
if ($allowed_protocols !== null) {
$config->set('URI.AllowedSchemes', $allowed_protocols);
}
......
......@@ -19,7 +19,7 @@
*/
/*
HTML Purifier 4.4.0 - Standards Compliant HTML Filtering
HTML Purifier 4.6.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
......@@ -54,66 +54,97 @@
class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '4.4.0';
/**
* Version of HTML Purifier.
* @type string
*/
public $version = '4.6.0';
/** Constant with version of HTML Purifier */
const VERSION = '4.4.0';
/**
* Constant with version of HTML Purifier.
*/
const VERSION = '4.6.0';
/** Global configuration object */
/**
* Global configuration object.
* @type HTMLPurifier_Config
*/
public $config;
/** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
/**
* Array of extra filter objects to run on HTML,
* for backwards compatibility.
* @type HTMLPurifier_Filter[]
*/
private $filters = array();
/** Single instance of HTML Purifier */
/**
* Single instance of HTML Purifier.
* @type HTMLPurifier
*/
private static $instance;
protected $strategy, $generator;
/**
* @type HTMLPurifier_Strategy_Core
*/
protected $strategy;
/**
* @type HTMLPurifier_Generator
*/
protected $generator;
/**
* Resultant HTMLPurifier_Context of last run purification. Is an array
* of contexts if the last called method was purifyArray().
* Resultant context of last run purification.
* Is an array of contexts if the last called method was purifyArray().
* @type HTMLPurifier_Context
*/
public $context;
/**
* Initializes the purifier.
* @param $config Optional HTMLPurifier_Config object for all instances of
* the purifier, if omitted, a default configuration is
* supplied (which can be overridden on a per-use basis).
*
* @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
* for all instances of the purifier, if omitted, a default
* configuration is supplied (which can be overridden on a
* per-use basis).
* The parameter can also be any type that
* HTMLPurifier_Config::create() supports.
*/
public function __construct($config = null) {
public function __construct($config = null)
{
$this->config = HTMLPurifier_Config::create($config);
$this->strategy = new HTMLPurifier_Strategy_Core();
$this->strategy = new HTMLPurifier_Strategy_Core();
}
/**
* Adds a filter to process the output. First come first serve
* @param $filter HTMLPurifier_Filter object
*
* @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
*/
public function addFilter($filter) {
trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
public function addFilter($filter)
{
trigger_error(
'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
' in the Filter namespace or Filter.Custom',
E_USER_WARNING
);
$this->filters[] = $filter;
}
/**
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
*
* @param $html String of HTML to purify
* @param $config HTMLPurifier_Config object for this operation, if omitted,
* defaults to the config object specified during this
* @param string $html String of HTML to purify
* @param HTMLPurifier_Config $config Config object for this operation,
* if omitted, defaults to the config object specified during this
* object's construction. The parameter can also be any type
* that HTMLPurifier_Config::create() supports.
* @return Purified HTML
*
* @return string Purified HTML
*/
public function purify($html, $config = null) {
public function purify($html, $config = null)
{
// :TODO: make the config merge in, instead of replace
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
......@@ -151,8 +182,12 @@ class HTMLPurifier
unset($filter_flags['Custom']);
$filters = array();
foreach ($filter_flags as $filter => $flag) {
if (!$flag) continue;
if (strpos($filter, '.') !== false) continue;
if (!$flag) {
continue;
}
if (strpos($filter, '.') !== false) {
continue;
}
$class = "HTMLPurifier_Filter_$filter";
$filters[] = new $class;
}
......@@ -175,9 +210,12 @@ class HTMLPurifier
// list of un-purified tokens
$lexer->tokenizeHTML(
// un-purified HTML
$html, $config, $context
$html,
$config,
$context
),
$config, $context
$config,
$context
)
);
......@@ -192,11 +230,15 @@ class HTMLPurifier
/**
* Filters an array of HTML snippets
* @param $config Optional HTMLPurifier_Config object for this operation.
*
* @param string[] $array_of_html Array of html snippets
* @param HTMLPurifier_Config $config Optional config object for this operation.
* See HTMLPurifier::purify() for more details.
* @return Array of purified HTML
*
* @return string[] Array of purified HTML
*/
public function purifyArray($array_of_html, $config = null) {
public function purifyArray($array_of_html, $config = null)
{
$context_array = array();
foreach ($array_of_html as $key => $html) {
$array_of_html[$key] = $this->purify($html, $config);
......@@ -208,11 +250,16 @@ class HTMLPurifier
/**
* Singleton for enforcing just one HTML Purifier in your system
* @param $prototype Optional prototype HTMLPurifier instance to
* overload singleton with, or HTMLPurifier_Config
* instance to configure the generated version with.
*
* @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
* HTMLPurifier instance to overload singleton with,
* or HTMLPurifier_Config instance to configure the
* generated version with.
*
* @return HTMLPurifier
*/
public static function instance($prototype = null) {
public static function instance($prototype = null)
{
if (!self::$instance || $prototype) {
if ($prototype instanceof HTMLPurifier) {
self::$instance = $prototype;
......@@ -226,12 +273,20 @@ class HTMLPurifier
}
/**
* Singleton for enforcing just one HTML Purifier in your system
*
* @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
* HTMLPurifier instance to overload singleton with,
* or HTMLPurifier_Config instance to configure the
* generated version with.
*
* @return HTMLPurifier
* @note Backwards compatibility, see instance()
*/
public static function getInstance($prototype = null) {
public static function getInstance($prototype = null)
{
return HTMLPurifier::instance($prototype);
}
}
// vim: et sw=4 sts=4
......@@ -13,6 +13,7 @@
$__dir = dirname(__FILE__);
require_once $__dir . '/HTMLPurifier.php';
require_once $__dir . '/HTMLPurifier/Arborize.php';
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
require_once $__dir . '/HTMLPurifier/AttrDef.php';
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
......@@ -48,9 +49,11 @@ require_once $__dir . '/HTMLPurifier/Language.php';
require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
require_once $__dir . '/HTMLPurifier/Length.php';
require_once $__dir . '/HTMLPurifier/Lexer.php';
require_once $__dir . '/HTMLPurifier/Node.php';
require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
require_once $__dir . '/HTMLPurifier/PropertyList.php';
require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
require_once $__dir . '/HTMLPurifier/Queue.php';
require_once $__dir . '/HTMLPurifier/Strategy.php';
require_once $__dir . '/HTMLPurifier/StringHash.php';
require_once $__dir . '/HTMLPurifier/StringHashParser.php';
......@@ -66,6 +69,7 @@ require_once $__dir . '/HTMLPurifier/URISchemeRegistry.php';
require_once $__dir . '/HTMLPurifier/UnitConverter.php';
require_once $__dir . '/HTMLPurifier/VarParser.php';
require_once $__dir . '/HTMLPurifier/VarParserException.php';
require_once $__dir . '/HTMLPurifier/Zipper.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
......@@ -159,6 +163,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeScripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
......@@ -182,6 +187,9 @@ require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
require_once $__dir . '/HTMLPurifier/Node/Comment.php';
require_once $__dir . '/HTMLPurifier/Node/Element.php';
require_once $__dir . '/HTMLPurifier/Node/Text.php';
require_once $__dir . '/HTMLPurifier/Strategy/Composite.php';
require_once $__dir . '/HTMLPurifier/Strategy/Core.php';
require_once $__dir . '/HTMLPurifier/Strategy/FixNesting.php';
......
<?php
/**
* Converts a stream of HTMLPurifier_Token into an HTMLPurifier_Node,
* and back again.
*
* @note This transformation is not an equivalence. We mutate the input
* token stream to make it so; see all [MUT] markers in code.
*/
class HTMLPurifier_Arborize
{
public static function arborize($tokens, $config, $context) {
$definition = $config->getHTMLDefinition();
$parent = new HTMLPurifier_Token_Start($definition->info_parent);
$stack = array($parent->toNode());
foreach ($tokens as $token) {
$token->skip = null; // [MUT]
$token->carryover = null; // [MUT]
if ($token instanceof HTMLPurifier_Token_End) {
$token->start = null; // [MUT]
$r = array_pop($stack);
assert($r->name === $token->name);
assert(empty($token->attr));
$r->endCol = $token->col;
$r->endLine = $token->line;
$r->endArmor = $token->armor;
continue;
}
$node = $token->toNode();
$stack[count($stack)-1]->children[] = $node;
if ($token instanceof HTMLPurifier_Token_Start) {
$stack[] = $node;
}
}
assert(count($stack) == 1);
return $stack[0];
}
public static function flatten($node, $config, $context) {
$level = 0;
$nodes = array($level => new HTMLPurifier_Queue(array($node)));
$closingTokens = array();
$tokens = array();
do {
while (!$nodes[$level]->isEmpty()) {
$node = $nodes[$level]->shift(); // FIFO
list($start, $end) = $node->toTokenPair();
if ($level > 0) {
$tokens[] = $start;
}
if ($end !== NULL) {
$closingTokens[$level][] = $end;
}
if ($node instanceof HTMLPurifier_Node_Element) {
$level++;
$nodes[$level] = new HTMLPurifier_Queue();
foreach ($node->children as $childNode) {
$nodes[$level]->push($childNode);
}
}
}
$level--;
if ($level && isset($closingTokens[$level])) {
while ($token = array_pop($closingTokens[$level])) {
$tokens[] = $token;
}
}
} while ($level > 0);
return $tokens;
}
}
......@@ -8,7 +8,8 @@ class HTMLPurifier_AttrCollections
{
/**
* Associative array of attribute collections, indexed by name
* Associative array of attribute collections, indexed by name.
* @type array
*/
public $info = array();
......@@ -16,10 +17,11 @@ class HTMLPurifier_AttrCollections
* Performs all expansions on internal data for use by other inclusions
* It also collects all attribute collection extensions from
* modules
* @param $attr_types HTMLPurifier_AttrTypes instance
* @param $modules Hash array of HTMLPurifier_HTMLModule members
* @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
* @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
*/
public function __construct($attr_types, $modules) {
public function __construct($attr_types, $modules)
{
// load extensions from the modules
foreach ($modules as $module) {
foreach ($module->attr_collections as $coll_i => $coll) {
......@@ -30,7 +32,9 @@ class HTMLPurifier_AttrCollections
if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
// merge in includes
$this->info[$coll_i][$attr_i] = array_merge(
$this->info[$coll_i][$attr_i], $attr);
$this->info[$coll_i][$attr_i],
$attr
);
continue;
}
$this->info[$coll_i][$attr_i] = $attr;
......@@ -49,20 +53,29 @@ class HTMLPurifier_AttrCollections
/**
* Takes a reference to an attribute associative array and performs
* all inclusions specified by the zero index.
* @param &$attr Reference to attribute array
* @param array &$attr Reference to attribute array
*/
public function performInclusions(&$attr) {
if (!isset($attr[0])) return;
public function performInclusions(&$attr)
{
if (!isset($attr[0])) {
return;
}
$merge = $attr[0];
$seen = array(); // recursion guard
// loop through all the inclusions
for ($i = 0; isset($merge[$i]); $i++) {
if (isset($seen[$merge[$i]])) continue;
if (isset($seen[$merge[$i]])) {
continue;
}
$seen[$merge[$i]] = true;
// foreach attribute of the inclusion, copy it over
if (!isset($this->info[$merge[$i]])) continue;
if (!isset($this->info[$merge[$i]])) {
continue;
}
foreach ($this->info[$merge[$i]] as $key => $value) {
if (isset($attr[$key])) continue; // also catches more inclusions
if (isset($attr[$key])) {
continue;
} // also catches more inclusions
$attr[$key] = $value;
}
if (isset($this->info[$merge[$i]][0])) {
......@@ -76,20 +89,24 @@ class HTMLPurifier_AttrCollections
/**
* Expands all string identifiers in an attribute array by replacing
* them with the appropriate values inside HTMLPurifier_AttrTypes
* @param &$attr Reference to attribute array
* @param $attr_types HTMLPurifier_AttrTypes instance
* @param array &$attr Reference to attribute array
* @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
*/
public function expandIdentifiers(&$attr, $attr_types) {
public function expandIdentifiers(&$attr, $attr_types)
{
// because foreach will process new elements we add, make sure we
// skip duplicates
$processed = array();
foreach ($attr as $def_i => $def) {
// skip inclusions
if ($def_i === 0) continue;
if ($def_i === 0) {
continue;
}
if (isset($processed[$def_i])) continue;
if (isset($processed[$def_i])) {
continue;
}
// determine whether or not attribute is required
if ($required = (strpos($def_i, '*') !== false)) {
......@@ -120,9 +137,7 @@ class HTMLPurifier_AttrCollections
unset($attr[$def_i]);
}
}
}
}
// vim: et sw=4 sts=4
......@@ -14,23 +14,25 @@ abstract class HTMLPurifier_AttrDef
{
/**
* Tells us whether or not an HTML attribute is minimized. Has no
* meaning in other contexts.
* Tells us whether or not an HTML attribute is minimized.
* Has no meaning in other contexts.
* @type bool
*/
public $minimized = false;
/**
* Tells us whether or not an HTML attribute is required. Has no
* meaning in other contexts
* Tells us whether or not an HTML attribute is required.
* Has no meaning in other contexts
* @type bool
*/
public $required = false;
/**
* Validates and cleans passed string according to a definition.
*
* @param $string String to be validated and cleaned.
* @param $config Mandatory HTMLPurifier_Config object.
* @param $context Mandatory HTMLPurifier_AttrContext object.
* @param string $string String to be validated and cleaned.
* @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
* @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
*/
abstract public function validate($string, $config, $context);
......@@ -55,7 +57,8 @@ abstract class HTMLPurifier_AttrDef
* parsing XML, thus, this behavior may still be correct. We
* assume that newlines have been normalized.
*/
public function parseCDATA($string) {
public function parseCDATA($string)
{
$string = trim($string);
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
return $string;
......@@ -63,10 +66,11 @@ abstract class HTMLPurifier_AttrDef
/**
* Factory method for creating this class from a string.
* @param $string String construction info
* @return Created AttrDef object corresponding to $string
* @param string $string String construction info