csvfile.php 6.33 KB
Newer Older
1 2 3 4 5 6
<?php
/**
 *
 * @package    mahara
 * @subpackage core
 * @author     Catalyst IT Ltd
7 8
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL version 3 or later
 * @copyright  For copyright information on Mahara, please see the README file distributed with this software.
9 10 11 12
 *
 */

defined('INTERNAL') || die();
13 14

define('MAX_LINE_LENGTH', 1024);
15 16 17 18 19 20 21 22

/**
 * TODO: Document how this class should be used.
 */
class CsvFile {
    protected $allowedkeys = array();
    protected $data;
    protected $errors = array();
23
    protected $filehandle = false;
24 25 26 27 28 29
    protected $format = array();
    protected $headerExists = true;
    protected $mandatoryfields;

    public function __construct($filename = '') {
        if (!empty($filename) && file_exists($filename)) {
30 31 32
            if (($this->filehandle = fopen($filename, 'r')) !== false) {
                return;
            }
33
        }
34
        $this->errors['file'] = get_string('invalidfilename', 'admin', $filename);
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
    }

    public function get($field) {
        if (!property_exists($this, $field)) {
            throw new InvalidArgumentException("Field $field wasn't found in class " . get_class($this));
        }
        return $this->{$field};
    }

    public function set($field, $value) {
        if (property_exists($this, $field)) {
            if ($this->{$field} != $value) {
                // only set it to dirty if it's changed
                $this->dirty = true;
            }
            $this->{$field} = $value;
            if ($field == 'parent') {
                $this->parentdirty = true;
            }
            $this->mtime = time();
            return true;
        }
        throw new InvalidArgumentException("Field $field wasn't found in class " . get_class($this));
    }

    public function get_data() {
        $csvfile = new StdClass;
        if (!empty($this->errors)) {
            $csvfile->errors = $this->errors;
            return $csvfile;
        }
        $this->parse_data();
67 68 69
        if ($this->filehandle !== false) {
            fclose($this->filehandle);
        }
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
        $csvfile->errors = $this->errors;
        if (empty($this->format) && empty($this->errors)) {
            throw new SystemException('CSV File has no headers');
        }
        else {
            $csvfile->format = $this->format;
        }
        $csvfile->data = $this->data;

        return $csvfile;
    }

    public function add_error($key, $value) {
        $this->errors[$key] = $value;
    }

    private function parse_data() {
87 88 89 90
        if (false === $this->filehandle) {
            return; // file is not open
        }

91
        $delimiter = $this->detectDelimiter();
92
        $i = 0;
93
        while (($line = fgetcsv($this->filehandle, MAX_LINE_LENGTH, $delimiter)) !== false) {
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
            $i++;
            // Get the format of the file
            if ($this->headerExists && $i == 1) {
                foreach ($line as &$potentialkey) {
                    $potentialkey = trim($potentialkey);
                    if (!in_array($potentialkey, $this->allowedkeys)) {
                        $this->add_error('file', get_string('uploadcsverrorinvalidfieldname', 'admin', $potentialkey));
                        return;
                    }
                }

                // Now we know all of the field names are valid, we need to make
                // sure that the required fields are included
                foreach ($this->mandatoryfields as $field) {
                    if (!in_array($field, $line)) {
                        $this->add_error('file', get_string('uploadcsverrorrequiredfieldnotspecified', 'admin', $field));
                        return;
                    }
                }

                // The format line is valid
                $this->format = $line;
116 117
                log_info('FORMAT:');
                log_info($this->format);
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
            }
            else {
                // Trim non-breaking spaces -- they get left in place by File_CSV
                foreach ($line as &$field) {
                    $field = preg_replace('/^(\s|\xc2\xa0)*(.*?)(\s|\xc2\xa0)*$/', '$2', $field);
                }

                // All OK!
                $this->data[] = $line;
            }

        }

        if ($this->headerExists && $i == 1) {
            // There was only the title row :(
            $this->add_error('file', get_string('uploadcsverrornorecords', 'admin'));
            return;
        }

        if ($this->data === null) {
            // Oops! Couldn't get CSV data for some reason
139
            $this->add_error('file', get_string('uploadcsverrorunspecifiedproblem1', 'admin'));
140 141 142
        }
    }

143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
    /**
     * detect the delimiter using the first line that should consist only of
     * the header fields, which strictly consist of the characters [a-zA-Z0-9_]
     * so the known delimiters (so far comma and semicolon) don't appear in those
     * fields. <br/>
     * Background is that Microsoft separates the fields in csv-files with
     * semicolons when the System language is set to German
     * @return string the delimiter used to separate the fields in the file
     */
    private function detectDelimiter() {
        static $knowndelimiters = array(
            ',',
            ';',
            ':',
            "\t",
            ' '
        );
        $firstline = fgets($this->filehandle);
        fseek($this->filehandle, 0);
        foreach ($knowndelimiters as $delimiter) {
            if (strpos($firstline, $delimiter) > 0) {
                return $delimiter;
            }
        }
        // Default: the comma. In case we have a file with only one field per
        // line, we cannot detect the delimiter. Luckily Mahara always expects
        // more than one mandatory fields, so getting here usually means the
        // file cannot be imported anyway
        return ',';
    }
173
}
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198

class CSVErrors {

    private $csverrors = array();

    function add($line, $msg) {
        if (!isset($this->csverrors[$line])) {
            $this->csverrors[$line] = array();
        }
        $this->csverrors[$line][] = $msg;
    }

    function process() {
        if (empty($this->csverrors)) {
            return;
        }
        ksort($this->csverrors);
        $errorstring = implode("<br>\n", array_shift($this->csverrors));
        while ($lineerrors = array_shift($this->csverrors)) {
            $errorstring .= "<br>\n" . implode("<br>\n", $lineerrors);
        }
        return $errorstring;
    }

}