Commit f307844a authored by Evan Goldenberg's avatar Evan Goldenberg
Browse files

add pluggable anti spam features for forms



This is a pluggable anti-spam infrastructure for protecting forms in
Mahara again spam.

The contact and register forms have been modified to include the
following anti-spam features:
    - hashed field names
    - honey pot (invisible) fields
    - submission time monitoring

Additionally, content checking is provided by the following spam traps:
    - NoneSpamTrap: the base class for other spam traps, does no
      checking on its own
    - SimpleSpamTrap: provides basic checks that don't require an
      internet connection
    - AdvancedSpamTrap: provides better checks that require an internet
      connection, including checking for the existence of mailservers
      and the presence of blacklisted URLs

New spam traps can easily be created by adding a file to lib/antispam
with the name MyTrapNameSpamTrap.php. Added spam traps will be
automatically detected and available for selection in the site options
page.
Signed-off-by: default avatarEvan Goldenberg <evang@catalyst.net.nz>
parent 8fa27005
......@@ -35,6 +35,7 @@ define('SECTION_PAGE', 'siteoptions');
require(dirname(dirname(dirname(__FILE__))) . '/init.php');
require_once('pieforms/pieform.php');
require_once('searchlib.php');
require_once('antispam.php');
define('TITLE', get_string('siteoptions', 'admin'));
$langoptions = get_languages();
......@@ -46,6 +47,8 @@ $searchpluginoptions = get_search_plugins();
$countries = getoptions_country();
$spamtraps = available_spam_traps();
$siteoptionform = array(
'name' => 'siteoptions',
'jsform' => true,
......@@ -250,6 +253,27 @@ $siteoptionform = array(
'title' => get_string('userscanhiderealnames', 'admin'),
'description' => get_string('userscanhiderealnamesdescription', 'admin'),
'defaultvalue' => get_config('userscanhiderealnames'),
'antispam' => array(
'type' => 'select',
'title' => get_string('antispam', 'admin'),
'description' => get_string('antispamdescription', 'admin'),
'defaultvalue' => get_config('antispam'),
'options' => $spamtraps,
'help' => true,
),
'spamhaus' => array(
'type' => 'checkbox',
'title' => get_string('spamhaus', 'admin'),
'description' => get_string('spamhausdescription', 'admin'),
'defaultvalue' => get_config('spamhaus'),
'help' => true,
),
'surbl' => array(
'type' => 'checkbox',
'title' => get_string('surbl', 'admin'),
'description' => get_string('surbldescription', 'admin'),
'defaultvalue' => get_config('surbl'),
'help' => true,
),
)
);
......@@ -276,7 +300,7 @@ function siteoptions_submit(Pieform $form, $values) {
'registration_sendweeklyupdates', 'institutionexpirynotification', 'institutionautosuspend',
'showselfsearchsideblock', 'showtagssideblock',
'tagssideblockmaxtags', 'country', 'viewmicroheaders', 'userscanchooseviewthemes',
'remoteavatars', 'userscanhiderealnames'
'remoteavatars', 'userscanhiderealnames', 'antispam', 'spamhaus', 'surbl',
);
$oldlanguage = get_config('lang');
$oldtheme = get_config('theme');
......
......@@ -79,4 +79,9 @@ $cfg->dbprefix = '';
// this is a big security hole.
$cfg->dataroot = '/path/to/uploaddir';
// If set, this email address will be displayed in the error message if a form
// submission is suspected of being spam. This reduces the frustration for the
// user in the event of a false positive.
$cfg->emailcontact = '';
// closing php tag intentionally omitted to prevent whitespace issues
......@@ -32,7 +32,9 @@ define('SECTION_PLUGINNAME', 'site');
define('SECTION_PAGE', 'contact');
require('init.php');
require_once('pieforms/pieform.php');
require_once('lib/antispam.php');
define('TITLE', get_string('contactus'));
define('SPAM_SCORE', 3);
if ($USER->is_logged_in()) {
$userid = $USER->get('id');
......@@ -45,9 +47,22 @@ else {
$email = '';
}
// we're in the middle of processing the form, so read the time
// from the form rather than getting a new one
if ($_POST) {
$time = $_POST['timestamp'];
}
else {
$time = time();
}
$fields = array('name', 'email', 'subject', 'message', 'userid', 'submit', 'invisiblefield', 'invisiblesubmit');
$hashed_fields = hash_fieldnames($fields, $time);
$elements = array(
'name' => array(
'type' => 'text',
'name' => $hashed_fields['name'],
'title' => get_string('name'),
'defaultvalue' => $name,
'rules' => array(
......@@ -56,19 +71,23 @@ $elements = array(
),
'email' => array(
'type' => 'text',
'name' => $hashed_fields['email'],
'title' => get_string('email'),
'defaultvalue' => $email,
'rules' => array(
'required' => true
'required' => true,
'email' => true,
),
),
'subject' => array(
'type' => 'text',
'name' => $hashed_fields['subject'],
'title' => get_string('subject'),
'defaultvalue' => '',
),
'message' => array(
'type' => 'textarea',
'name' => $hashed_fields['message'],
'rows' => 10,
'cols' => 60,
'title' => get_string('message'),
......@@ -79,15 +98,41 @@ $elements = array(
)
);
$elements['invisiblefield'] = array(
'type' => 'text',
'name' => $hashed_fields['invisiblefield'],
'title' => get_string('spamtrap'),
'defaultvalue' => '',
'class' => 'dontshow',
);
$elements['userid'] = array(
'type' => 'hidden',
'name' => $hashed_fields['userid'],
'value' => $userid,
);
$elements['timestamp'] = array(
'type' => 'hidden',
'value' => $time,
);
$elements['invisiblesubmit'] = array(
'type' => 'submit',
'name' => $hashed_fields['invisiblesubmit'],
'value' => get_string('spamtrap'),
'class' => 'dontshow',
);
$elements['submit'] = array(
'type' => 'submit',
'name' => $hashed_fields['submit'],
'value' => get_string('sendmessage'),
);
// swap the name and email fields at random
if (rand(0,1)) {
$name = array_shift($elements);
$email = array_shift($elements);
array_unshift($elements, $email, $name);
}
$contactform = pieform(array(
'name' => 'contactus',
'method' => 'post',
......@@ -95,15 +140,80 @@ $contactform = pieform(array(
'elements' => $elements
));
function contactus_validate(Pieform $form, $values) {
global $SESSION;
$error = false;
$currenttime = time();
// read the timestamp field
$timestamp = $values['timestamp'];
// recompute the field names
$fields = array('name', 'email', 'subject', 'message', 'userid', 'submit', 'invisiblefield', 'invisiblesubmit');
$hashed = hash_fieldnames($fields, $timestamp);
// make sure the submission is less than a day, and more than 5 seconds old
if ($currenttime - $timestamp < 5 || $currenttime - $timestamp > 86400) {
$error = true;
}
// make sure the real submit button was used. If it wasn't, it won't exist.
elseif (!isset($values[$hashed['submit']]) || isset($values[$hashed['invisiblesubmit']])) {
$error = true;
}
// make sure the invisible field is empty
elseif (!isset($values[$hashed['invisiblefield']]) || $values[$hashed['invisiblefield']] != '') {
$error = true;
}
// make sure all the other data fields exist
elseif (!(isset($values[$hashed['name']]) && isset($values[$hashed['email']]) &&
isset($values[$hashed['subject']]) && isset($values[$hashed['message']]))) {
$error = true;
}
else {
$spamtrap = new_spam_trap(array(
array(
'type' => 'name',
'value' => $values[$hashed['name']],
),
array(
'type' => 'email',
'value' => $values[$hashed['email']],
),
array(
'type' => 'subject',
'value' => $values[$hashed['subject']],
),
array(
'type' => 'body',
'value' => $values[$hashed['message']],
),
));
if ($spamtrap->is_spam()) {
$error = true;
}
}
if ($error) {
$msg = get_string('formerror');
$emailcontact = get_config('emailcontact');
if (!empty($emailcontact)) {
$msg .= ' ' . get_string('formerroremail', 'mahara', $emailcontact, $emailcontact);
}
$SESSION->add_error_msg($msg);
$form->set_error($hashed['submit'], '');
}
}
function contactus_submit(Pieform $form, $values) {
global $SESSION;
// read the timestamp field
$timestamp = $values['timestamp'];
// recompute the field names
$fields = array('name', 'email', 'subject', 'message', 'userid', 'submit', 'invisiblefield', 'invisiblesubmit');
$hashed = hash_fieldnames($fields, $timestamp);
$data = new StdClass;
$data->fromname = $values['name'];
$data->fromemail = $values['email'];
$data->subject = $values['subject'];
$data->message = $values['message'];
if ($values['userid']) {
$data->fromuser = $values['userid'];
$data->fromname = $values[$hashed['name']];
$data->fromemail = $values[$hashed['email']];
$data->subject = $values[$hashed['subject']];
$data->message = $values[$hashed['message']];
if ($values[$hashed['userid']]) {
$data->fromuser = $values[$hashed['userid']];
}
require_once('activity.php');
activity_occurred('contactus', $data);
......
......@@ -144,10 +144,13 @@ $string['reopensitedetail'] = 'Your site is closed. Site administrators may sta
// Site options
$string['adminsonly'] = 'Administrators only';
$string['adminsandstaffonly'] = 'Administrators and Staff only';
$string['advanced'] = 'Advanced';
$string['allowpublicviews'] = 'Allow public views';
$string['allowpublicviewsdescription'] = 'If set to yes, users will be able to create portfolio Views that are accessable to the public rather than only to logged in users';
$string['allowpublicprofiles'] = 'Allow public profiles';
$string['allowpublicprofilesdescription'] = 'If set to yes, users will be able to set their profile Views to be accessable to the public rather than only to logged in users';
$string['antispam'] = 'Anti-spam';
$string['antispamdescription'] = 'The type of anti-spam measures used on publicly visible forms';
$string['defaultaccountinactiveexpire'] = 'Default account inactivity time';
$string['defaultaccountinactiveexpiredescription'] = 'How long a user account will remain active without the user logging in';
$string['defaultaccountinactivewarn'] = 'Warning time for inactivity/expiry';
......@@ -162,6 +165,7 @@ $string['institutionautosuspenddescription'] = 'If checked, expired institutions
$string['institutionexpirynotification'] = 'Warning time for institution expiry';
$string['institutionexpirynotificationdescription'] = 'A notification message will be sent to site and institutional admins the long before a site expires';
$string['language'] = 'Language';
$string['none'] = 'None';
$string['country'] = 'Country';
$string['pathtoclam'] = 'Path to clam';
$string['pathtoclamdescription'] = 'The filesystem path to clamscan or clamdscan';
......@@ -176,6 +180,7 @@ $string['showselfsearchsideblock'] = 'Enable Portfolio Search';
$string['showselfsearchsideblockdescription'] = 'Display the "Search My Portfolio" side block in the My Portfolio section of the site';
$string['showtagssideblock'] = 'Enable Tag Cloud';
$string['showtagssideblockdescription'] = 'If enabled, users will see a side block in the My Portfolio section of the site with a list of their most frequently used tags';
$string['simple'] = 'Simple';
$string['sitedefault'] = 'Site Default';
$string['sitelanguagedescription'] = 'The default language for the site';
$string['sitecountrydescription'] = 'The default country for the site';
......@@ -186,6 +191,10 @@ $string['siteoptionsset'] = 'Site options have been updated.';
$string['sitethemedescription'] = 'The default theme for the site';
$string['smallviewheaders'] = 'Small View page headers';
$string['smallviewheadersdescription'] = 'If enabled, a small header and site navigation block will be displayed when viewing or editing Views.';
$string['spamhaus'] = 'Enable Spamhaus URL blacklist';
$string['spamhausdescription'] = 'If enabled, URLs will be checked against the Spamhaus DNSBL';
$string['surbl'] = 'Enable SURBL URL blacklist';
$string['surbldescription'] = 'If enabled, URLs will be checked against the SURBL DNSBL';
$string['tagssideblockmaxtags'] = 'Maximum Tags in Cloud';
$string['tagssideblockmaxtagsdescription'] = 'The default number of tags to display in user tag clouds';
$string['trustedsites'] = 'Trusted sites';
......
<h3>Anti-spam</h3>
<p>There are three levels of anti-spam protection available for publicly
visible forms, such as the Contact Us and Registration forms. A form
submission is never silently rejected. Rather, an error message is displayed
asking the user to try again if the submission is classified as spam.</p>
<p><b>None: </b>No anti-spam checks will be performed on form submissions.</p>
<p><b>Simple: </b>Some basic checks are performed. Form submissions with email
addresses that are not well-formed, or that have an excessive number of URLs,
are rejected.</p>
<p><b>Advanced: </b>(Requires an internet connection). Performs additional
checks to determine whether email addresses are real, or contained URLs are
blacklisted.</p>
<h3>Enable Spamhaus URL blacklist</h3>
<p>The Spamhaus Project provides a URL blacklist that is free for non-commercial,
low-traffic use. A professional use datafeed service is also available, but not supported
in Mahara. Please read the Spamhaus DNSBL
<a href="http://www.spamhaus.org/organization/dnsblusage.html">usage terms</a> before enabling this option.</p>
<h3>Enable SURBL URL blacklist</h3>
<p>SURBL provides a URL blacklist that is free for organizations with fewer than
1000 users. A professional use datafeed service is also available, but not supported
in Mahara. Please read the SURBL
<a href="http://www.surbl.org/usage-policy.html">usage terms</a> before enabling this option.</p>
......@@ -516,6 +516,9 @@ $string['Invitations'] = 'Invitations';
$string['config'] = 'Config';
$string['sendmessage'] = 'Send message';
$string['spamtrap'] = 'Spam trap';
$string['formerror'] = 'There was an error processing your submission. Please try again.';
$string['formerroremail'] = 'Contact us at %s if you continue to have problems.';
$string['notinstallable'] = 'Not installable!';
$string['installedplugins'] = 'Installed plugins';
......
<?php
/**
* Mahara: Electronic portfolio, weblog, resume builder and social networking
* Copyright (C) 2006-2010 Catalyst IT Ltd and others; see:
* http://wiki.mahara.org/Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @package mahara
* @subpackage antispam
* @author Catalyst IT Ltd
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL
* @copyright (C) 2006-2010 Catalyst IT Ltd http://catalyst.net.nz
*
*/
defined('INTERNAL') || die();
function get_ip() {
if (!empty($_SERVER['HTTP_CLIENT_IP'])) {
return $_SERVER['HTTP_CLIENT_IP'];
}
if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) {
return $_SERVER['HTTP_X_FORWARDED_FOR'];
}
return $_SERVER['REMOTE_ADDR'];
}
function hash_fieldnames($names, $time) {
$ip = get_ip();
$secret = get_config('formsecret');
$hashed = array();
foreach ($names as $name) {
// prefix the hash with an underscore to ensure it is always a valid pieforms element name
$hashed[$name] = '_' . sha1($name . $time . $ip . $secret);
}
return $hashed;
}
function available_spam_traps() {
$results = array();
$handle = opendir(get_config('docroot') . 'lib/antispam');
while ($file = readdir($handle)) {
preg_match("/(.+)SpamTrap\.php/", $file, $name);
if ($name) {
$results[strtolower($name[1])] = $name[1];
}
}
return $results;
}
function new_spam_trap($fields) {
$spamclass = ucfirst(get_config('antispam')) . 'SpamTrap';
require_once('antispam/' . $spamclass . '.php');
return new $spamclass($fields);
}
// windows has no checkdnsrr until PHP 5.3
if (!function_exists('checkdnsrr')) {
function checkdnsrr($host, $type='MX') {
if (empty($host)) {
return false;
}
exec('nslookup -type=' . $type . ' ' . escapeshellcmd($host), $output);
foreach ($output as $line) {
if (preg_match('/^' . $host . '/', $line)) {
return true;
}
}
return false;
}
}
<?php
/**
* Mahara: Electronic portfolio, weblog, resume builder and social networking
* Copyright (C) 2006-2010 Catalyst IT Ltd and others; see:
* http://wiki.mahara.org/Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @package mahara
* @subpackage antispam
* @author Catalyst IT Ltd
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL
* @copyright (C) 2006-2010 Catalyst IT Ltd http://catalyst.net.nz
*
*/
defined('INTERNAL') || die();
require_once('SimpleSpamTrap.php');
/**
* AdvancedSpamTrap implements stronger checks of email,
* subject, and body, by determining whether or not the email
* actually exists, and checking any URLs against internet
* blacklists. It should only be used if an internet connection
* is available.
*/
class AdvancedSpamTrap extends SimpleSpamTrap {
// TODO: in addition to checking for an MX record, connect to the mailserver
// and see if the email address exists. This will detect the case where
// a nonexistant email at a valid domain is used.
protected function valid_email($email) {
if (!parent::valid_email($email)) {
return false;
}
list($local, $domain) = explode('@', $email);
return checkdnsrr($domain);
}
protected function blacklisted_url($url) {
$blacklists = array(
'black.uribl.com',
);
if (get_config('spamhaus')) {
$blacklists[] = 'dbl.spamhaus.org';
}
if (get_config('surbl')) {
$blacklists[] = 'multi.surbl.com';
}
// extract the hostname from the url
preg_match('#://([^\s\'"<>()\.]*\.)*([^\s\'"<>()]+\.[a-zA-Z]+)/?#', $url, $match);
$domain = $match[2];
foreach ($blacklists as $bl) {
if (checkdnsrr($domain . '.' . $bl, 'A')) {
return true;
}
}
return false;
}
protected function evaluate_body($body) {
$score = parent::evaluate_body($body);
$urls = $this->get_urls($body);
foreach ($urls as $url) {
if ($this->blacklisted_url($url)) {
$score += 5;
}
}
return $score;
}
}
<?php
/**
* Mahara: Electronic portfolio, weblog, resume builder and social networking
* Copyright (C) 2006-2010 Catalyst IT Ltd and others; see:
* http://wiki.mahara.org/Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @package mahara
* @subpackage antispam
* @author Catalyst IT Ltd
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL
* @copyright (C) 2006-2010 Catalyst IT Ltd http://catalyst.net.nz
*
*/
defined('INTERNAL') || die();
/**
* Base class for spam traps. Defines no evaluation schemes, so its
* is_spam() method will always return false.
*/
class NoneSpamTrap {
public function __construct($fields) {
$this->fields = $fields;
}
public function is_spam() {
// if no spam score threshold is defined, never call something spam
if (!defined('SPAM_SCORE')) {
return false;
}
$score = 0;
foreach ($this->fields as $field) {
$method = 'evaluate_' . $field['type'];
if (method_exists($this, $method)) {
$score += $this->$method($field['value']);
}
}
if ($score > SPAM_SCORE) {
return true;
}
return false;
}
}
<?php
/**
* Mahara: Electronic portfolio, weblog, resume builder and social networking
* Copyright (C) 2006-2010 Catalyst IT Ltd and others; see:
* http://wiki.mahara.org/Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @package mahara
* @subpackage antispam
* @author Catalyst IT Ltd
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL
* @copyright (C) 2006-2010 Catalyst IT Ltd http://catalyst.net.nz
*
*/
defined('INTERNAL') || die();
require_once('NoneSpamTrap.php');
/**
* SimpleSpamTrap implements basic checks of name, email,
* subject, and body, but does not perform any checks that
* require an internet connection.
*/
class SimpleSpamTrap extends NoneSpamTrap {
protected function email_form($email) {
// pieforms does some email validation, but it's somewhat imperfect.
// it allows multiple @ characters, for example
if (eregi("^[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$", $email)) {
return true;
}
return false;
}
protected function valid_email($email) {
return $this->email_form($email);
}
protected function get_urls($text) {
preg_match_all('#(?:https?|ftp)://[^\s\'"<>()]+#S', $text, $urls);
return $urls[0];