Commit 4210a108 authored by Son Nguyen's avatar Son Nguyen

Bug 1594615: Replace PEAR::XML_Feed_Parser

Add new class FeedReader
Remove PEAR::XML_Feed_Parser
Add behat tests for external blocks

Change-Id: Idda5bff4624e8400340da22662e3e36296b52090
parent 92ff38eb
This diff is collapsed.
......@@ -11,7 +11,7 @@
defined('INTERNAL') || die();
require_once('XML/Feed/Parser.php');
require_once('feedreader.php');
class PluginBlocktypeExternalfeed extends MaharaCoreBlocktype {
......@@ -258,7 +258,7 @@ class PluginBlocktypeExternalfeed extends MaharaCoreBlocktype {
self::parse_feed($values['url'], $values['insecuresslmode'], $values['authuser'], $authpassword);
return;
}
catch (XML_Feed_Parser_Exception $e) {
catch (MaharaException $e) {
// Pad the response time to hinder timing side channel attacks
list($usec, $sec) = explode(" ", microtime());
......@@ -335,7 +335,7 @@ class PluginBlocktypeExternalfeed extends MaharaCoreBlocktype {
unset($data);
$data = self::parse_feed($feed->url, $feed->insecuresslmode, $feed->authuser, $feed->authpassword);
}
catch (XML_Feed_Parser_Exception $e) {
catch (MaharaException $e) {
// The feed must have changed in such a way as to become
// invalid since it was added. We ignore this case in the hope
// the feed will become valid some time later
......@@ -353,7 +353,7 @@ class PluginBlocktypeExternalfeed extends MaharaCoreBlocktype {
$data->lastupdate = db_format_timestamp(time());
update_record('blocktype_externalfeed_data', $data);
}
catch (XML_Feed_Parser_Exception $e) {
catch (MaharaException $e) {
// We tried to add the newly parsed data
}
}
......@@ -388,7 +388,7 @@ class PluginBlocktypeExternalfeed extends MaharaCoreBlocktype {
* @param bool $insecuresslmode Skip certificate checking
* @param string $authuser HTTP basic auth username to use
* @param string $authpassword HTTP basic auth password to use
* @throws XML_Feed_Parser_Exception
* @throws MaharaException
*/
public static function parse_feed($source, $insecuresslmode=false, $authuser='', $authpassword='') {
......@@ -420,17 +420,17 @@ class PluginBlocktypeExternalfeed extends MaharaCoreBlocktype {
$result = mahara_http_request($config, true);
if ($result->error) {
throw new XML_Feed_Parser_Exception($result->error);
throw new MaharaException('Feed url returned error', $result->error);
}
if (empty($result->data)) {
throw new XML_Feed_Parser_Exception('Feed url returned no data');
throw new MaharaException('Feed url returned no data');
}
try {
$feed = new XML_Feed_Parser($result->data, false, true, false);
$reader = new FeedReader($result->data);
}
catch (XML_Feed_Parser_Exception $e) {
catch (MaharaException $e) {
$cache[$source] = $e;
throw $e;
// Don't catch other exceptions, they're an indication something
......@@ -438,49 +438,52 @@ class PluginBlocktypeExternalfeed extends MaharaCoreBlocktype {
}
$data = new StdClass;
$data->title = $feed->title;
$data->title = $reader->get_channel_title();
$data->url = $source;
$data->authuser = $authuser;
$data->authpassword = $authpassword;
$data->insecuresslmode = (int)$insecuresslmode;
$data->link = $feed->link;
$data->description = $feed->description;
$data->link = $reader->get_channel_link();
$data->description = $reader->get_channel_description();
// Work out the icon for the feed depending on whether it's RSS or ATOM
$data->image = $feed->image;
if (!$data->image) {
// ATOM feed. These are simple strings
$data->image = $feed->logo ? $feed->logo : null;
}
$data->image = $reader->get_channel_image();
$data->content = array();
foreach ($feed as $count => $item) {
if ($count == 20) {
for ($i = 0; $i < $reader->get_count_items(); $i++) {
if ($i == 20) {
break;
}
$description = $item->content ? $item->content : ($item->description ? $item->description : ($item->summary ? $item->summary : null));
if (!$item->title) {
if (!empty($description)) {
$item->title = substr($description, 0, 60);
}
else if ($item->link) {
$item->title = $item->link;
}
else {
$item->title = get_string('notitle', 'view');
}
$description = $reader->get_item_content($i);
if (empty($description)) {
$description = $reader->get_item_description($i);
}
if (!$pubdate = $item->pubDate) {
if (!$pubdate = $item->date) {
if (!$pubdate = $item->published) {
$pubdate = $item->updated;
};
}
if (empty($description)) {
$description = $reader->get_item_summary($i);
}
$title = $reader->get_item_title($i);
if (empty($title)) {
$title = substr($description, 0, 60);
}
if (empty($title)) {
$title = $reader->get_item_link($i);
}
if (empty($title)) {
$title = get_string('notitle', 'view');
}
$pubdate = $reader->get_item_pubdate($i);
if (empty($pubdate)) {
$pubdate = $reader->get_item_date($i);
}
if (empty($pubdate)) {
$pubdate = $reader->get_item_published($i);
}
if (empty($pubdate)) {
$pubdate = $reader->get_item_updated($i);
}
$data->content[] = (object)array(
'title' => $item->title,
'link' => $item->link,
'title' => $title,
'link' => $reader->get_item_link($i),
'description' => $description,
'pubdate' => $pubdate,
);
......@@ -594,7 +597,7 @@ class PluginBlocktypeExternalfeed extends MaharaCoreBlocktype {
);
$values = self::instance_config_save($urloptions);
}
catch (XML_Feed_Parser_Exception $e) {
catch (MaharaException $e) {
log_info("Note: was unable to parse RSS feed for new blockinstance. URL was {$config['config']['url']}");
$values = array();
}
......
<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
/**
* Key gateway class for XML_Feed_Parser package
*
* PHP versions 5
*
* LICENSE: This source file is subject to version 3.0 of the PHP license
* that is available through the world-wide-web at the following URI:
* http://www.php.net/license/3_0.txt. If you did not receive a copy of
* the PHP License and are unable to obtain it through the web, please
* send a note to license@php.net so we can mail you a copy immediately.
*
* @category XML
* @package XML_Feed_Parser
* @author James Stewart <james@jystewart.net>
* @copyright 2005 James Stewart <james@jystewart.net>
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL
* @version CVS: $Id$
* @link http://pear.php.net/package/XML_Feed_Parser/
*/
/**
* XML_Feed_Parser_Type is an abstract class required by all of our
* feed types. It makes sense to load it here to keep the other files
* clean.
*/
require_once 'XML/Feed/Parser/Type.php';
/**
* We will throw exceptions when errors occur.
*/
require_once 'XML/Feed/Parser/Exception.php';
require_once 'XML/Feed/Parser/Factory.php';
/**
* This is the core of the XML_Feed_Parser package. It identifies feed types
* and abstracts access to them. It is an iterator, allowing for easy access
* to the entire feed.
*
* @author James Stewart <james@jystewart.net>
* @version Release: @package_version@
* @package XML_Feed_Parser
*/
class XML_Feed_Parser implements Iterator
{
/**
* This is where we hold the feed object
* @var Object
*/
private $feed;
/**
* To allow for extensions, we make a public reference to the feed model
* @var DOMDocument
*/
public $model;
/**
* A map between entry ID and offset
* @var array
*/
protected $idMappings = array();
/**
* A storage space for Namespace URIs.
* @var array
* @deprecated This is being moved to the factory for now.
*/
private $feedNamespaces = array(
'rss2' => array(
'http://backend.userland.com/rss',
'http://backend.userland.com/rss2',
'http://blogs.law.harvard.edu/tech/rss'));
/**
* Detects feed types and instantiate appropriate objects.
*
* Our constructor takes care of detecting feed types and instantiating
* appropriate classes. For now we're going to treat Atom 0.3 as Atom 1.0
* but raise a warning. I do not intend to introduce full support for
* Atom 0.3 as it has been deprecated, but others are welcome to.
*
* @param string $feed XML serialization of the feed
* @param bool $strict Whether or not to validate the feed
* @param bool $suppressWarnings Trigger errors for deprecated feed types?
* @param bool $tidy Whether or not to try and use the tidy library on input
* @deprecated XML_Feed_Parser2
*/
function __construct($feed, $strict = false, $suppressWarnings = false, $tidy = false)
{
$this->model = new DOMDocument;
$this->initialize($feed, $strict, $suppressWarnings, $tidy);
}
/**
* Detects feed types and instantiate appropriate objects.
*
* Our constructor takes care of detecting feed types and instantiating
* appropriate classes. For now we're going to treat Atom 0.3 as Atom 1.0
* but raise a warning. I do not intend to introduce full support for
* Atom 0.3 as it has been deprecated, but others are welcome to.
*
* @param string $feed XML serialization of the feed
* @param bool $strict Whether or not to validate the feed
* @param bool $suppressWarnings Trigger errors for deprecated feed types?
* @param bool $tidy Whether or not to try and use the tidy library on input
* @todo No work in the constructor :(
*/
function initialize($feed, $strict = false, $suppressWarnings = false, $tidy = false)
{
$factory = new XML_Feed_Parser_Factory();
$this->setFeed($factory->build($this->model, $feed, $strict, $suppressWarnings, $tidy));
}
public function setFeed($feed) {
$this->feed = $feed;
}
/**
* Proxy to allow feed element names to be used as method names
*
* For top-level feed elements we will provide access using methods or
* attributes. This function simply passes on a request to the appropriate
* feed type object.
*
* @param string $call - the method being called
* @param array $attributes
*/
function __call($call, $attributes)
{
$attributes = array_pad($attributes, 5, false);
list($a, $b, $c, $d, $e) = $attributes;
return $this->feed->$call($a, $b, $c, $d, $e);
}
/**
* Proxy to allow feed element names to be used as attribute names
*
* To allow variable-like access to feed-level data we use this
* method. It simply passes along to __call() which in turn passes
* along to the relevant object.
*
* @param string $val - the name of the variable required
*/
function __get($val)
{
return $this->feed->$val;
}
/**
* Provides iteration functionality.
*
* Of course we must be able to iterate... This function simply increases
* our internal counter.
*/
function next()
{
if (isset($this->current_item) &&
$this->current_item <= $this->feed->numberEntries - 1) {
++$this->current_item;
} else if (! isset($this->current_item)) {
$this->current_item = 0;
} else {
return false;
}
}
/**
* Return XML_Feed_Type object for current element
*
* @return XML_Feed_Parser_Type Object
*/
function current()
{
return $this->getEntryByOffset($this->current_item);
}
/**
* For iteration -- returns the key for the current stage in the array.
*
* @return int
*/
function key()
{
return $this->current_item;
}
/**
* For iteration -- tells whether we have reached the
* end.
*
* @return bool
*/
function valid()
{
return $this->current_item < $this->feed->numberEntries;
}
/**
* For iteration -- resets the internal counter to the beginning.
*/
function rewind()
{
$this->current_item = 0;
}
/**
* Provides access to entries by ID if one is specified in the source feed.
*
* As well as allowing the items to be iterated over we want to allow
* users to be able to access a specific entry. This is one of two ways of
* doing that, the other being by offset. This method can be quite slow
* if dealing with a large feed that hasn't yet been processed as it
* instantiates objects for every entry until it finds the one needed.
*
* @param string $id Valid ID for the given feed format
* @return XML_Feed_Parser_Type|false
*/
function getEntryById($id)
{
if (isset($this->idMappings[$id])) {
return $this->getEntryByOffset($this->idMappings[$id]);
}
/*
* Since we have not yet encountered that ID, let's go through all the
* remaining entries in order till we find it.
* This is a fairly slow implementation, but it should work.
*/
return $this->feed->getEntryById($id);
}
/**
* Retrieve entry by numeric offset, starting from zero.
*
* As well as allowing the items to be iterated over we want to allow
* users to be able to access a specific entry. This is one of two ways of
* doing that, the other being by ID.
*
* @param int $offset The position of the entry within the feed, starting from 0
* @return XML_Feed_Parser_Type|false
*/
function getEntryByOffset($offset)
{
if ($offset < $this->feed->numberEntries) {
if (isset($this->feed->entries[$offset])) {
return $this->feed->entries[$offset];
} else {
try {
$this->feed->getEntryByOffset($offset);
} catch (Exception $e) {
return false;
}
$id = $this->feed->entries[$offset]->getID();
$this->idMappings[$id] = $offset;
return $this->feed->entries[$offset];
}
} else {
return false;
}
}
/**
* Retrieve version details from feed type class.
*
* @return void
* @author James Stewart
*/
function version()
{
return $this->feed->version;
}
/**
* Returns a string representation of the feed.
*
* @return String
**/
function __toString()
{
return $this->feed->__toString();
}
}
?>
This diff is collapsed.
<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
/**
* AtomElement class for XML_Feed_Parser package
*
* PHP versions 5
*
* LICENSE: This source file is subject to version 3.0 of the PHP license
* that is available through the world-wide-web at the following URI:
* http://www.php.net/license/3_0.txt. If you did not receive a copy of
* the PHP License and are unable to obtain it through the web, please
* send a note to license@php.net so we can mail you a copy immediately.
*
* @category XML
* @package XML_Feed_Parser
* @author James Stewart <james@jystewart.net>
* @copyright 2005 James Stewart <james@jystewart.net>
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
* @version CVS: $Id$
* @link http://pear.php.net/package/XML_Feed_Parser/
*/
/**
* This class provides support for atom entries. It will usually be called by
* XML_Feed_Parser_Atom with which it shares many methods.
*
* @author James Stewart <james@jystewart.net>
* @version Release: @package_version@
* @package XML_Feed_Parser
*/
class XML_Feed_Parser_AtomElement extends XML_Feed_Parser_Atom
{
/**
* This will be a reference to the parent object for when we want
* to use a 'fallback' rule
* @var XML_Feed_Parser_Atom
*/
protected $parent;
/**
* When performing XPath queries we will use this prefix
* @var string
*/
private $xpathPrefix = '';
/**
* xml:base values inherited by the element
* @var string
*/
protected $xmlBase;
/**
* Here we provide a few mappings for those very special circumstances in
* which it makes sense to map back to the RSS2 spec or to manage other
* compatibilities (eg. with the Univeral Feed Parser). Key is the other version's
* name for the command, value is an array consisting of the equivalent in our atom
* api and any attributes needed to make the mapping.
* @var array
*/
protected $compatMap = array(
'guid' => array('id'),
'links' => array('link'),
'tags' => array('category'),
'contributors' => array('contributor'));
/**
* Our specific element map
* @var array
*/
protected $map = array(
'author' => array('Person', 'fallback'),
'contributor' => array('Person'),
'id' => array('Text', 'fail'),
'published' => array('Date'),
'updated' => array('Date', 'fail'),
'title' => array('Text', 'fail'),
'rights' => array('Text', 'fallback'),
'summary' => array('Text'),
'content' => array('Content'),
'link' => array('Link'),
'enclosure' => array('Enclosure'),
'category' => array('Category'));
/**
* Store useful information for later.
*
* @param DOMElement $element - this item as a DOM element
* @param XML_Feed_Parser_Atom $parent - the feed of which this is a member
*/
function __construct(DOMElement $element, $parent, $xmlBase = '')
{
$this->setSanitizer($parent->getSanitizer());
$this->model = $element;
$this->parent = $parent;
$this->xmlBase = $xmlBase;
// $this->id is doing magic work in the constructor :(
$this->xpathPrefix = "//atom:entry[atom:id='" . $this->id . "']/";
$this->xpath = $this->parent->xpath;
}
/**
* Provides access to specific aspects of the author data for an atom entry
*
* Author data at the entry level is more complex than at the feed level.
* If atom:author is not present for the entry we need to look for it in
* an atom:source child of the atom:entry. If it's not there either, then
* we look to the parent for data.
*
* @param array
* @return string
*/
function getAuthor($arguments)
{
/* Find out which part of the author data we're looking for */
if (isset($arguments['param'])) {
$parameter = $arguments['param'];
} else {
$parameter = 'name';
}
$test = $this->model->getElementsByTagName('author');
if ($test->length > 0) {
$item = $test->item(0);
return $item->getElementsByTagName($parameter)->item(0)->nodeValue;
}
$source = $this->model->getElementsByTagName('source');
if ($source->length > 0) {
$test = $this->model->getElementsByTagName('author');
if ($test->length > 0) {
$item = $test->item(0);
return $item->getElementsByTagName($parameter)->item(0)->nodeValue;
}
}
return $this->parent->getAuthor($arguments);
}
/**
* Returns the content of the content element or info on a specific attribute
*
* This element may or may not be present. It cannot be present more than
* once. It may have a 'src' attribute, in which case there's no content
* If not present, then the entry must have link with rel="alternate".
* If there is content we return it, if not and there's a 'src' attribute
* we return the value of that instead. The method can take an 'attribute'
* argument, in which case we return the value of that attribute if present.
* eg. $item->content("type") will return the type of the content. It is
* recommended that all users check the type before getting the content to
* ensure that their script is capable of handling the type of returned data.
* (data carried in the content element can be either 'text', 'html', 'xhtml',
* or any standard MIME type).
*
* @return string|false
*/
protected function getContent($method, $arguments = array())
{
$attribute = empty($arguments[0]) ? false : $arguments[0];
$tags = $this->model->getElementsByTagName('content');
if ($tags->length == 0) {
return false;
}
$content = $tags->item(0);
if (! $content->hasAttribute('type')) {
$content->setAttribute('type', 'text');
}
if (! empty($attribute)) {
return $content->getAttribute($attribute);
}
$type = $content->getAttribute('type');
if (! empty($attribute)) {
if ($content->hasAttribute($attribute))
{
return $content->getAttribute($attribute);
}
return false;
}
if ($content->hasAttribute('src')) {
return $content->getAttribute('src');
}
return $this->parseTextConstruct($content);
}
/**
* For compatibility, this method provides a mapping to access enclosures.
*
* The Atom spec doesn't provide for an enclosure element, but it is
* generally supported using the link element with rel='enclosure'.
*
* @param string $method - for compatibility with our __call usage
* @param array $arguments - for compatibility with our __call usage
* @return array|false
*/
function getEnclosure($method, $arguments = array())
{
$offset = isset($arguments[0]) ? $arguments[0] : 0;
$query = "//atom:entry[atom:id='" . $this->getText('id', false) .
"']/atom:link[@rel='enclosure']";
$encs = $this->parent->xpath->query($query);
if ($encs->length > $offset) {
try {
if (! $encs->item($offset)->hasAttribute('href')) {
return false;
}
$attrs = $encs->item($offset)->attributes;
$length = $encs->item($offset)->hasAttribute('length') ?
$encs->item($offset)->getAttribute('length') : false;
return array(
'url' => $attrs->getNamedItem('href')->value,
'type' => $attrs->getNamedItem('type')->value,
'length' => $length);
} catch (Exception $e) {
return false;
}
}
return false;
}
/**
* Get details of this entry's source, if available/relevant
*
* Where an atom:entry is taken from another feed then the aggregator
* is supposed to include an atom:source element which replicates at least
* the atom:id, atom:title, and atom:updated metadata from the original
* feed. Atom:source therefore has a very similar structure to atom:feed
* and if we find it we will return it as an XML_Feed_Parser_Atom object.
*
* @return XML_Feed_Parser_Atom|false
*/
function getSource()
{
$test = $this->model->getElementsByTagName('source');
if ($test->length == 0) {
return false;
}
$source = new XML_Feed_Parser_Atom($test->item(0));
}
/**
* Get the entry as an XML string
*
* Return an XML serialization of the feed, should it be required. Most
* users however, will already have a serialization that they used when
* instantiating the object.
*
* @return string XML serialization of element
*/
function __toString()
{
$simple = simplexml_import_dom($this->model);
return $simple->asXML();
}
}
?>
<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
/**
* Keeps the exception class for XML_Feed_Parser.
*
* PHP versions 5