Commit 74ef4428 authored by Richard Mansfield's avatar Richard Mansfield Committed by Nigel McNie

Clean html on download

parent b7608c80
......@@ -39,6 +39,17 @@ $options = array();
if ($forcedl) {
$options['forcedownload'] = true;
}
else {
$downloadurl = get_config('wwwroot') . 'artefact/file/download.php?file=' . $fileid;
if (!empty($viewid)) {
$downloadurl .= '&view=' . $viewid;
}
if (!empty($size)) {
$downloadurl .= '&size=' . $size;
}
$downloadurl .= '&download=1';
$options['cleanhtmlparams'] = array('downloadurl' => $downloadurl);
}
if ($viewid && $fileid) {
if (!artefact_in_view($fileid, $viewid)) {
......
......@@ -587,6 +587,10 @@ $string['youraccounthasbeensuspendedtext'] = 'Your account has been suspended';
$string['youraccounthasbeenunsuspended'] = 'Your account has been unsuspended';
$string['youraccounthasbeenunsuspendedtext'] = 'Your account has been unsuspended'; // @todo: more info?
// Display of purified html
$string['htmlremovedmessage'] = 'Some potentially malicious content was detected and removed from this file.';
$string['downloadoriginalversion'] = 'Download the original version';
// size of stuff
$string['sizemb'] = 'MB';
$string['sizekb'] = 'KB';
......
......@@ -67,12 +67,30 @@ function serve_file($path, $filename, $options=array()) {
session_write_close(); // unlock session during fileserving
$mimetype = get_mime_type($path);
if (!$mimetype || (!is_image_mime_type($mimetype) && (isset($_SERVER['HTTP_USER_AGENT']) && false !== strpos($_SERVER['HTTP_USER_AGENT'], 'MSIE')))) {
$mimetype = 'application/forcedownload';
}
$lastmodified = filemtime($path);
$filesize = filesize($path);
if ($mimetype == 'text/html') {
if (isset($options['cleanhtmlparams']) && $filesize < 1024 * 1024) {
// Read file contents, clean if necessary
$originalhtml = file_get_contents($path);
$purifyresult = clean_text($originalhtml, true);
if ($purifyresult->purified) {
display_cleaned_html($purifyresult->html, $options['cleanhtmlparams']);
exit;
}
$fileoutput = $originalhtml;
}
else {
$options['forcedownload'] = true;
$mimetype = 'application/octet-stream';
}
}
if (!$mimetype || (!is_image_mime_type($mimetype) && (isset($_SERVER['HTTP_USER_AGENT']) && false !== strpos($_SERVER['HTTP_USER_AGENT'], 'MSIE'))) && !isset($fileoutput)) {
$mimetype = 'application/forcedownload';
}
if (ini_get('zlib.output_compression')) {
ini_set('zlib.output_compression', 'Off');
}
......@@ -96,7 +114,7 @@ function serve_file($path, $filename, $options=array()) {
header('Expires: '. gmdate('D, d M Y H:i:s', time() + $options['lifetime']) .' GMT');
header('Pragma: ');
if ($mimetype != 'text/plain' && $mimetype != 'text/html') {
if ($mimetype != 'text/plain' && $mimetype != 'text/html' && !isset($fileoutput)) {
@header('Accept-Ranges: bytes');
if (!empty($_SERVER['HTTP_RANGE']) && strpos($_SERVER['HTTP_RANGE'],'bytes=') !== FALSE) {
......@@ -166,7 +184,12 @@ function serve_file($path, $filename, $options=array()) {
}
header('Content-Length: ' . $filesize);
while (@ob_end_flush()); //flush the buffers - save memory and disable sid rewrite
readfile_chunked($path);
if (isset($fileoutput)) {
echo $fileoutput;
}
else {
readfile_chunked($path);
}
exit;
}
......
......@@ -131,7 +131,7 @@ class HTMLPurifier
* that HTMLPurifier_Config::create() supports.
* @return Purified HTML
*/
public function purify($html, $config = null) {
public function purify($html, $config = null, $test = false) {
// todo: make the config merge in, instead of replace
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
......@@ -168,27 +168,21 @@ class HTMLPurifier
$html = $this->filters[$i]->preFilter($html, $config, $context);
}
$dirtytokens = $lexer->tokenizeHTML($html, $config, $context);
$cleantokens = $this->strategy->execute($dirtytokens, $config, $context);
// purified HTML
$html =
$this->generator->generateFromTokens(
// list of tokens
$this->strategy->execute(
// list of un-purified tokens
$lexer->tokenizeHTML(
// un-purified HTML
$html, $config, $context
),
$config, $context
),
$config, $context
);
$html = $this->generator->generateFromTokens($cleantokens, $config, $context);
for ($i = $size - 1; $i >= 0; $i--) {
$html = $this->filters[$i]->postFilter($html, $config, $context);
}
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
$this->context =& $context;
if ($test) {
return (object) array('html' => $html, 'purified' => $dirtytokens != $cleantokens);
}
return $html;
}
......
......@@ -1833,14 +1833,31 @@ function format_introduction($introduction) {
* and removes any nasty tags that could mess up pages.
*
* @param string $text The text to be cleaned
* @param string $test Test whether anything was cleaned
* @return string The cleaned up text
*/
function clean_text($text) {
function clean_text($text, $test = false) {
require_once('htmlpurifier/HTMLPurifier.auto.php');
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache', 'SerializerPath', get_config('dataroot') . 'htmlpurifier');
$purifier = new HTMLPurifier($config);
return $purifier->purify($text);
return $purifier->purify($text, null, $test);
}
/**
* Displays purified html on a page with an explanatory message.
*
* @param string $html The purified html.
* @param array $params Variables passed to the template. Currently
* downloadurl - link to download the original (dirty) file.
*/
function display_cleaned_html($html, $params) {
$smarty = smarty_core();
$smarty->assign('params', $params);
$smarty->assign('content', $html);
$smarty->display('cleanedhtml.tpl');
exit;
}
......
<html>
<head><title></title></head>
<body>
<hr />
<div>{str tag=htmlremovedmessage}</div>
{if !empty($params.downloadurl)}
<div>
<a href="{$params.downloadurl}">{str tag=downloadoriginalversion}</a>
</div>
{/if}
<hr />
<div>
{$content}
</div>
</body>
</html>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment