Commit b130fa9b authored by Francois Marier's avatar Francois Marier Committed by Gerrit Code Review
Browse files

Merge changes I6617bef5,I8b7af1a6,I48c3ff5c,If3b09f73,I1f7e92f5

* changes:
  Compress sitemaps using php's zlib rather than gzip executable
  Serve sitemaps from download.php and use download url in the index
  Remove daily sitemap before writing if it already exists
  Write sitemap xml files to dataroot, not docroot (bug #914490)
  Move check for sitemap configuration setting
parents 94a9d57e 728a6935
......@@ -28,16 +28,38 @@ define('INTERNAL', 1);
require('init.php');
require_once('file.php');
$data = $SESSION->get('downloadfile');
$type = param_alpha('type', null);
if (!$USER->is_logged_in() || empty($data) || empty($data['file'])) {
throw new NotFoundException(get_string('filenotfound'));
if ($type == 'sitemap') {
if (!get_config('generatesitemap')) {
throw new NotFoundException(get_string('filenotfound'));
}
if ($name = param_alphanumext('name', null)) {
if (!preg_match('/^sitemap_[a-z0-9_]+\.xml(\.gz)?$/', $name, $m)) {
throw new NotFoundException(get_string('filenotfound'));
}
$mimetype = empty($m[1]) ? 'text/xml' : 'application/gzip';
}
else {
$name = 'sitemap_index.xml';
$mimetype = 'text/xml';
}
$path = get_config('dataroot') . 'sitemaps/' . $name;
}
else {
$data = $SESSION->get('downloadfile');
if (!$USER->is_logged_in() || empty($data) || empty($data['file'])) {
throw new NotFoundException(get_string('filenotfound'));
}
$path = get_config('dataroot') . 'export/' . $USER->get('id') . '/' . $data['file'];
$path = get_config('dataroot') . 'export/' . $USER->get('id') . '/' . $data['file'];
$name = $data['name'];
$mimetype = $data['mimetype'];
}
if (!file_exists($path)) {
throw new NotFoundException(get_string('filenotfound'));
}
serve_file($path, $data['name'], $data['mimetype']);
serve_file($path, $name, $mimetype);
......@@ -2811,6 +2811,10 @@ function cron_site_data_daily() {
* A cronjob to generate a sitemap
*/
function cron_sitemap_daily() {
if (!get_config('generatesitemap')) {
return;
}
require_once(get_config('libroot') . 'searchlib.php');
require_once(get_config('libroot') . 'group.php');
require_once(get_config('libroot') . 'view.php');
......
......@@ -67,25 +67,38 @@ class Sitemap {
*/
private $currenturlset;
/**
* @var string The directory in which to put sitemap files
*/
private $directory;
/**
* @param bool $forcefull Force generation of a full sitemap (non-daily)
*/
public function __construct($forcefull = false) {
$this->directory = get_config('dataroot') . 'sitemaps/';
// on the first of the month, or if forced, generate the full sitemap
if (date("d") == 1 || $forcefull === true) {
$this->date_to_check = null;
$remove = 'sitemap_*.xml';
}
else { // otherwise limit to 'yesterday'
$this->date_to_check = date("Y-m-d", strtotime('yesterday'));
$remove = 'sitemap_' . date('Ymd') . '_*.xml';
}
// remove all content from the sitemaps directory so that
// only newly created sitemaps are added to the index
$sitemaps = glob(get_config('docroot') .'/sitemaps/sitemap_*.xml*');
foreach ($sitemaps as $sitemap) {
if (!unlink($sitemap)) {
log_warn(sprintf("Failed to remove sitemap: %s, please check directory and file permissions.", basename($sitemap)));
}
// remove any sitemaps we're about to replace
if (!$oldsitemaps = glob($this->directory . $remove)) {
$oldsitemaps = array();
}
if ($oldcompressed = glob($this->directory . $remove . '.gz')) {
$oldsitemaps = array_merge($oldsitemaps, $oldcompressed);
}
foreach ($oldsitemaps as $sitemap) {
if (!unlink($sitemap)) {
log_warn(sprintf("Failed to remove sitemap: %s, please check directory and file permissions.", basename($sitemap)));
}
} // otherwise limit to 'yesterday'
else {
$this->date_to_check = date("Y-m-d", strtotime('yesterday'));
}
}
......@@ -95,14 +108,9 @@ class Sitemap {
* @return bool
*/
public function generate() {
$generatesitemap = get_config('generatesitemap');
if (!$generatesitemap) {
log_info('Sitemap generation has been disabled.');
return false;
}
// check that the sitemaps directory exists and create it if it doesn't
check_dir_exists(get_config('docroot') . 'sitemaps', true);
check_dir_exists($this->directory, true);
// this is used by PluginInteractionForum::get_active_topics
$USER = new User();
......@@ -196,20 +204,20 @@ class Sitemap {
// step through each sitemap we have generated
foreach ($this->sitemaps as $key => $sitemap) {
$filename = sprintf("%ssitemaps/sitemap_%s_%d.xml", get_config('docroot'), date("Ymd"), $key);
$filename = sprintf("%ssitemap_%s_%d.xml", $this->directory, date("Ymd"), $key);
// if the save succeeded, add it to the index
if ($sitemap->save($filename) !== false) {
$xml = $sitemap->saveXML();
if ($xml !== false) {
// try to gzip the xml file
if (is_executable(get_config('pathtogzip'))) {
$command = sprintf('%s %s', get_config('pathtogzip'), escapeshellarg($filename));
$output = array();
exec($command, $output, $returnvar);
if ($returnvar != 0) {
log_warn('gzip command failed.');
}
if (function_exists('gzopen') and $zp = gzopen($filename . '.gz', 'w9')) {
gzwrite($zp, $xml);
gzclose($zp);
}
else {
log_info('Skipping compression of xml file - gzip command not found, or not executable.');
if (!file_put_contents($filename, $xml)) {
throw new SystemException(sprintf("Saving of this sitemap file failed: %s", $filename));
}
}
}
else {
......@@ -218,13 +226,13 @@ class Sitemap {
}
// get a list of sitemaps in the sitemap directory
$sitemaps = glob(get_config('docroot') .'/sitemaps/sitemap_*.xml*');
$sitemaps = glob($this->directory . 'sitemap_*.xml*');
foreach ($sitemaps as $sitemap) {
// create a <sitemap> node for each one we're adding
$sitemapelement = $doc->createElement('sitemap');
// create and encode the url
$sitemapurl = sprintf("%ssitemaps/%s", get_config('wwwroot'), basename($sitemap));
$sitemapurl = sprintf("%sdownload.php?type=sitemap&name=%s", get_config('wwwroot'), basename($sitemap));
$sitemapurl = utf8_encode(htmlspecialchars($sitemapurl, ENT_QUOTES, 'UTF-8'));
// add it to the <sitemap> node
......@@ -244,7 +252,7 @@ class Sitemap {
// add the index to the main doc and save it
$doc->appendChild($sitemapindex);
$indexfilename = sprintf("%ssitemap_index.xml", get_config('docroot'));
$indexfilename = sprintf("%ssitemap_index.xml", $this->directory);
$doc->save($indexfilename);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment