lib.php 115 KB
Newer Older
1
2
3
4
5
6
<?php
/**
 *
 * @package    mahara
 * @subpackage search-elasticsearch
 * @author     Catalyst IT Ltd
7
8
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL version 3 or later
 * @copyright  For copyright information on Mahara, please see the README file distributed with this software.
9
10
11
12
13
 *
 */
defined('INTERNAL') || die();
// Required because we use the PluginSearchInternal class for some functions
require_once(get_config('docroot') . 'search/internal/lib.php');
14
require_once(get_config('libroot') . '/elasticsearch/autoload.php');
15

16
use Elasticsearch\ClientBuilder;
17

18
19
function __autoload_elasticsearchtypes ($class) {
    if (substr($class, 0, 18) == 'ElasticsearchType_') {
20
21
22
23
24
25
        $file = __DIR__ . '/type/' . $class . '.php';
        if (file_exists($file)) {
            require_once($file);
        }
    }
}
26
27
spl_autoload_register('__autoload_elasticsearchtypes', true);

28
29
30
31
32
33
/**
 * The internal search plugin which searches against the
 * Mahara database.
 */
class PluginSearchElasticsearch extends PluginSearch {

34
    /**
35
     * The minimum version of elasticsearch this plugin is compatible with.
36
37
38
39
40
41
42
43
     */
    const elasticsearch_version = '5.0';

    /**
     * The version of elasticsearch-php this plugin is compatible with.
     */
    const elasticsearchphp_version = '5.0';

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
    /**
     * Records in search_elasticsearch_queue that haven't been sent to Elasticsearch yet.
     */
    const queue_status_new = 0;
    /**
     * Records in search_elasticsearch_queue that have been sent in bulk to Elasticsearch.
     * These are deleted after being successfully sent, so they'll only be seen in the table
     * if the request to send them failed.
     */
    const queue_status_sent_in_bulk = 1;
    /**
     * Records  in search_elasticsearch_queue that have been sent individually to Elasticsearch.
     * These are deleted after being successfully sent, so they'll only be seen in the table
     * if the individual request to send them failed.
     */
    const queue_status_sent_individually = 2;

61
62
63
64
65
66
    /**
     * This function indicates whether the plugin should take the raw $query string
     * when its group_search_user function is called, or whether it should get the
     * parsed query string.
     *
     * @return boolean
67
     */        // log contents of the result of var_dump( $object )
68
69
70
71
72
73
74
75
76
77
78
79
80
    public static function can_process_raw_group_search_user_queries() {
        // We're just going to pass our data on to PluginSearchInternal, and that needs the raw query
        return true;
    }

    /**
     * Returns search results for users in a particular group
     *
     * It's called by and tightly coupled with get_group_user_search_results() in searchlib.php. Look there for
     * the exact meaning of its parameters and expected return values.
     *
     * Since I haven't had the time to figure them out, we'll just use PluginSearchInternal's version.
     */
81
82
    public static function group_search_user($group, $queries, $constraints, $offset, $limit, $membershiptype, $order, $friendof, $sortoptionidx=null) {
        return PluginSearchInternal::group_search_user($group, $queries, $constraints, $offset, $limit, $membershiptype, $order, $friendof, $sortoptionidx);
83
84
85
86
87
88
89
90
91
92
93
94
95
    }

    /**
     * Returns search results for users in a particular institution
     *
     * We are going to pass this on to PluginSearchInternal to handle returning the correct results
     * in the correct format.
     */
    public static function institutional_admin_search_user($query, $institution, $limit) {
        return PluginSearchInternal::institutional_admin_search_user($query, $institution, $limit);
    }

    /**
96
     * This function determines whether the plugin is current        // log contents of the result of var_dump( $object )ly available to be chosen
97
98
99
     * as the sitewide search plugin (i.e. get_config('searchplugin'))
     */
    public static function is_available_for_site_setting() {
100
        return true;
101
102
    }

103
    /**
104
     * This function determines if we can connect to the elasticsearch server with supplied host and port
105
106
     */
    public static function can_connect() {
107
108
109
110
111
112
        list ($status, $info) = self::elasticsearch_server();
        return $status;
    }

    /**
     * This function returns elasticsearch server information at supplied host and port
113
     * We can't use the $ESClient as we need to check if we are trying to connect to either an older or current server so will run curl commands directly
114
115
     * @param string $option An optional param to get status about a specific status, eg cluster health
     * @param string $index  An optional param to get status about a specific status for a particular index, eg indices status
116
117
118
     * @return array containing $canconnect bool    - whether we can connect to elasticsearch at host/port
     *                          $server     object  - information about the server request
     */
119
    public static function elasticsearch_server($option=null, $index=null) {
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
        $clientops = self::get_client_config('write');
        $host = $clientops['hosts'][0];
        $url = $host['host'] . ':' . $host['port'];
        if (!empty($host['username'])) {
            $url = $host['username'] . ':' . $host['password'] . '@' . $url;
        }
        if (!empty($host['scheme'])) {
            $url = $host['scheme'] . '://' . $url;
        }

        switch ($option) {
            case "clusterhealth":
                $url .= '/_cluster/health';
                break;
            case "indexhealth":
                $url .= '/_cat/indices?format=json';
                break;
            default:
                $clientopts['curlopts'][CURLOPT_NOBODY] = true;
        }

        $url .= '/' . get_config_plugin('search', 'elasticsearch', 'indexname') . '?format=json';
        $curlopts = array(CURLOPT_URL => $url) + $clientops['curlopts'];
        $server = mahara_http_request($curlopts, true);
        $canconnect = false;
        if (!empty($server->info) && !empty($server->info['http_code'])) {
            if ($server->info['http_code'] != '200') {
                $server->error = get_string('servererror', 'search.elasticsearch', $server->info['http_code']);
            }
            else {
                $canconnect = true;
151
            }
152
        }
153
154
        if (!empty($server->data)) {
            $server->data = json_decode($server->data);
155
156
            if (!empty($server->data->error)) {
                $server->error = $server->data->status . ': ' . $server->data->error->reason;
157
            }
158
159
160
161
162
163
164
165
166
167
168
            else {
                if ($index && is_array($server->data)) {
                    // we need to find the data for particular index
                    $thisindex = null;
                    foreach ($server->data as $key => $data) {
                        if (isset($data->index) && $data->index == $index) {
                            $thisindex = $server->data[$key];
                            break;
                        }
                    }
                    $server->data = $thisindex;
169
                }
170
171
172
173
                if (!empty($server->data->version) && !empty($server->data->version->number)) {
                    if (version_compare($server->data->version->number, self::elasticsearch_version) === -1) {
                        $server->error = get_string('elasticsearchtooold', 'search.elasticsearch', $server->data->version->number, self::elasticsearch_version);
                    }
174
175
176
177
                }
            }
        }
        return array($canconnect, $server);
178
179
    }

180
181
182
183
184
185
186
187
188
    /**
     * This function determines whether the plugin allows a search box to display for
     * non-logged in users - only useful if results returned by search are allowed to
     * be seen by the public
     */
    public static function publicform_allowed() {
        return true;
    }

189
    /**        // log contents of the result of var_dump( $object )
190
191
192
193
194
195
196
197
198
199
200
     * Generates the search form used in the page headers
     * @return string
     */
    public static function header_search_form() {
        return pieform(array(
                'name'                => 'usf',
                'action'              => get_config('wwwroot') . 'search/elasticsearch/index.php',
                'renderer'            => 'oneline',
                'autofocus'           => false,
                'validate'            => false,
                'presubmitcallback'   => '',
201
                'class'               => 'header-search-form',
202
203
204
                'elements'            => array(
                        'query' => array(
                                'type'           => 'text',
205
                                'defaultvalue'   => '',
206
                                'title'          => get_string('pagetitle', 'search.elasticsearch'),
207
                                'placeholder'    => get_string('pagetitle', 'search.elasticsearch'),
208
                                'hiddenlabel'    => true,
209
210
                        ),
                        'submit' => array(
211
                            'type' => 'button',
212
                            'class' => 'btn-default input-group-btn',
213
214
                            'usebuttontag' => true,
                            'value' => '<span class="icon icon-search icon-lg" role="presentation" aria-hidden="true"></span><span class="sr-only">'. get_string('go') . '</span>',
215
216
217
218
219
220
221
222
223
224
225
226
227
                        )
                )
        ));
    }

    public static function can_be_disabled() {
        return true;
    }

    public static function has_config() {
        return true;
    }

228
229
230
231
232
233
234
    public static function get_formatted_notice($notice, $type) {
        $smarty = smarty_core();
        $smarty->assign('notice', $notice);
        $html = $smarty->fetch('Search:elasticsearch:config' . $type . '.tpl');
        unset($smarty);
        return $html;
    }
235

236
237
    public static function get_config_options() {
        $enabledhtml = '';
238
        $state = 'ok';
239
240
        list($status, $server) = self::elasticsearch_server();
        if (!$status) {
241
            $state = 'notice';
242
243
244
245
246
247
            $notice = get_string('noticenotactive', 'search.elasticsearch', get_config_plugin('search', 'elasticsearch', 'host'), get_config_plugin('search', 'elasticsearch', 'port'));
            if (!empty($server->error)) {
                $notice = $server->error;
            }
            $enabledhtml .= self::get_formatted_notice($notice, 'warning');
        }
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
        else {
            list($status, $health) = self::elasticsearch_server('clusterhealth');
            if (!empty($health->data) && $health->data->status != 'green') {
                $enabledhtml .= self::get_formatted_notice(get_string('clusterstatus', 'search.elasticsearch', $health->data->status, $health->data->unassigned_shards), 'warning');
                $state = 'notice';
            }
            $index = get_config_plugin('search', 'elasticsearch', 'indexname');
            list($status, $health) = self::elasticsearch_server('indexhealth', $index);
            if (!empty($health->data)) {
                if (isset($health->data->status) && $health->data->status == '403') {
                    $enabledhtml .= self::get_formatted_notice(get_string('indexstatusunknown', 'search.elasticsearch', $index, $health->data->status), 'warning');
                }
                else if (isset($health->data->health) && $health->data->health != 'green') {
                    $enabledhtml .= self::get_formatted_notice(get_string('indexstatusbad', 'search.elasticsearch', $index, $health->data->health), 'warning');
                }
                $state = 'notice';
            }
265
266
267
268
269
270
        }
        if (get_config('searchplugin') == 'elasticsearch') {
            $enabledhtml .= self::get_formatted_notice(get_string('noticeenabled', 'search.elasticsearch', get_config('wwwroot') . 'admin/site/options.php?fs=searchsettings'), $state);
        }
        else {
            $enabledhtml .= self::get_formatted_notice(get_string('noticenotenabled', 'search.elasticsearch', get_config('wwwroot').'admin/site/options.php?fs=searchsettings'), 'warning');
271
        }
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292

        $config = array(
            'elements' => array(
                'enablednotice' => array(
                    'type'         => 'html',
                    'value'        => $enabledhtml,
                ),
                'host' => array(
                    'title'        => get_string('host', 'search.elasticsearch'),
                    'description'  => get_string('hostdescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'value'        => get_config_plugin('search', 'elasticsearch', 'host'),
                    'help'         => true,
                ),
                'port' => array(
                    'title'        => get_string('port', 'search.elasticsearch'),
                    'description'  => get_string('portdescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'value'        => get_config_plugin('search', 'elasticsearch', 'port'),
                    'help'         => true,
                ),
293
294
295
296
297
298
299
300
301
302
303
                'scheme' => array(
                    'title'        => get_string('scheme', 'search.elasticsearch'),
                    'description'  => get_string('schemedescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'value'        => (
                            get_config_plugin('search', 'elasticsearch', 'scheme')
                            ? get_config_plugin('search', 'elasticsearch', 'scheme')
                            : get_string('confignotset', 'search.elasticsearch')
                    ),
                    'help'         => true,
                ),
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
                'username' => array(
                    'title'        => get_string('username', 'search.elasticsearch'),
                    'description'  => get_string('usernamedescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'value'        => (
                            get_config_plugin('search', 'elasticsearch', 'username')
                            ? get_config_plugin('search', 'elasticsearch', 'username')
                            : get_string('confignotset', 'search.elasticsearch')
                    ),
                    'help'         => true,
                ),
                'password' => array(
                    'title'        => get_string('password', 'search.elasticsearch'),
                    'description'  => get_string('passworddescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'value'        => (
                            get_config_plugin('search', 'elasticsearch', 'password')
                            ? get_string('passwordlength', 'search.elasticsearch', strlen(get_config_plugin('search', 'elasticsearch', 'password')))
                            : get_string('confignotset', 'search.elasticsearch')
                    ),
                    'help'         => true,
                ),
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
                'indexingusername' => array(
                    'title'        => get_string('indexingusername', 'search.elasticsearch'),
                    'description'  => get_string('indexingusernamedescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'value'        => (
                            get_config_plugin('search', 'elasticsearch', 'indexingusername')
                            ? get_config_plugin('search', 'elasticsearch', 'indexingusername')
                            : get_string('confignotset', 'search.elasticsearch')
                    ),
                    'help'         => true,
                ),
                'indexingpassword' => array(
                    'title'        => get_string('indexingpassword', 'search.elasticsearch'),
                    'description'  => get_string('indexingpassworddescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'value'        => (
                            get_config_plugin('search', 'elasticsearch', 'indexingpassword')
                            ? get_string('passwordlength', 'search.elasticsearch', strlen(get_config_plugin('search', 'elasticsearch', 'indexingpassword')))
                            : get_string('confignotset', 'search.elasticsearch')
                    ),
                    'help'         => true,
                ),
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
                'indexname' => array(
                    'title'        => get_string('indexname', 'search.elasticsearch'),
                    'description'  => get_string('indexnamedescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'value'        => get_config_plugin('search', 'elasticsearch', 'indexname'),
                    'help'         => true,
                ),
                'bypassindexname' => array(
                    'title'        => get_string('bypassindexname', 'search.elasticsearch'),
                    'description'  => get_string('bypassindexnamedescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'help'         => true,
                    'value'        => (
                            get_config_plugin('search', 'elasticsearch', 'bypassindexname')
                            ? get_config_plugin('search', 'elasticsearch', 'bypassindexname')
                            : get_string('confignotset', 'search.elasticsearch')
                    ),
                ),
                'analyzer' => array(
                    'title'        => get_string('analyzer', 'search.elasticsearch'),
                    'description'  => get_string('analyzerdescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'value'        => get_config_plugin('search', 'elasticsearch', 'analyzer'),
                    'help'         => true,
                ),
                'types' =>  array(
                    'title'        => get_string('types', 'search.elasticsearch'),
                    'description'  => get_string('typesdescription', 'search.elasticsearch'),
                    'type'         => 'html',
                    'size'         => '80',
                    'value' => get_config_plugin('search', 'elasticsearch', 'types'),
                    'help'         => true,
                ),
                'cronlimit' => array(
                    'title'        => get_string('cronlimit', 'search.elasticsearch'),
                    'description'  => get_string('cronlimitdescription', 'search.elasticsearch'),
                    'type'         => 'text',
                    'defaultvalue' => get_config_plugin('search', 'elasticsearch', 'cronlimit'),
                ),
387
388
389
390
391
392
393
394
395
396
397
398
                'shards' => array(
                    'title'        => get_string('shards', 'search.elasticsearch'),
                    'description'  => get_string('shardsdescription', 'search.elasticsearch'),
                    'type'         => 'text',
                    'defaultvalue' => get_config_plugin('search', 'elasticsearch', 'shards'),
                ),
                'replicashards' => array(
                    'title'        => get_string('replicashards', 'search.elasticsearch'),
                    'description'  => get_string('replicashardsdescription', 'search.elasticsearch'),
                    'type'         => 'text',
                    'defaultvalue' => get_config_plugin('search', 'elasticsearch', 'replicashards'),
                ),
399
400
401
402
403
404
405
406
407
408
409
            ),
        );

        $types = explode(',', get_config_plugin('search', 'elasticsearch', 'types'));


        // if artefact => show list of artefacttype
        if (in_array('artefact', $types)) {

            $rs = get_recordset_sql('SELECT DISTINCT name AS artefacttype FROM {artefact_installed_type} ORDER BY name ASC');
            $artefacttypes = explode(',', get_config_plugin('search', 'elasticsearch', 'artefacttypes'));
410
            // the following artefacttypes are auto ticked because the info is already being indexed by the usr table
411
            $artefacttypes_toexclude = array('firstname', 'lastname', 'preferredname', 'email');
412
            $artefacttypes = array_merge($artefacttypes, $artefacttypes_toexclude);
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
            // to be valid, artefact types need a hierarchy
            $artefacttypesmap_array = self::elasticsearchartefacttypesmap_to_array();
            $types_checkbox = array();
            foreach (recordset_to_array($rs) as $record) {
                $types_checkbox[] = array(
                                        'title'        => $record->artefacttype,
                                        'value'        => $record->artefacttype,
                                        'defaultvalue' => in_array($record->artefacttype, $artefacttypes) ? true : false,
                                        'disabled'     => in_array($record->artefacttype, $artefacttypes_toexclude) OR
                                                            !in_array($record->artefacttype, array_keys($artefacttypesmap_array)) ? true : false,
                                    );
            }

            $config['elements']['artefacttypes'] = array(
                'type'         => 'checkboxes',
428
                'class'        => 'stacked',
429
430
431
432
433
434
435
436
437
                'title'        => get_string('artefacttypelegend', 'search.elasticsearch'),
                'description'  => get_string('artefacttypedescription', 'search.elasticsearch'),
                'elements'     => $types_checkbox,
            );

            $config['elements']['artefacttypesmap'] = array(
                'type'         => 'textarea',
                'rows'         => 10,
                'cols'         => 100,
438
                'class'        => 'under-label',
439
440
441
442
443
444
445
446
447
448
                'title'        => get_string('artefacttypemaplegend', 'search.elasticsearch'),
                'description'  => get_string('artefacttypemapdescription', 'search.elasticsearch'),
                'defaultvalue' => implode("\n", $artefacttypesmap_array),
            );

        }


        if (count($types) > 0) {
            $item_by_type_in_queue = array();
449
            $rs = get_records_sql_array('SELECT type, count(*) AS total FROM {search_elasticsearch_queue} GROUP BY type', array());
450
451
            if ($rs) {
                foreach ($rs as $record) {
452
                    $item_by_type_in_queue[$record->type] = $record->total;
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
                }
            }

            // Create the buttons that let an admin reset individual sub-indexes.
            $resetelements = array();
            // TODO: Make single-searchtype reset work properly. For now we'll just comment it out, leaving only
            // "reset all" available.
            $resetelements['resetdescription'] = array(
                'type' => 'html',
                'value' => get_string('resetdescription','search.elasticsearch')
            );
            foreach ($types as $type) {
                $key = $type;
                $keyreset = $type . 'reset';
                $count_in_queue = isset($item_by_type_in_queue[$type]) ? ' (' . $item_by_type_in_queue[$type] . ')' : '(0)';
                $resetelements[$keyreset] =  array(
                    'title' => $type,
                    'type' => 'html',
                    'value' => $count_in_queue,
                );
            }
            // And on the end, a special one to reset all the indexes.
            $resetelements['allreset'] = array(
                'title' => get_string('resetallindexes', 'search.elasticsearch'),
                'type' => 'submit',
478
                'class' => 'btn-default',
479
                'defaultvalue' => get_string('reset', 'search.elasticsearch'),
480
481
482
483
            );

            $config['elements']['resetindex'] = array(
                'type' => 'fieldset',
484
                'class' => 'last',
485
486
487
488
489
490
491
492
493
                'legend' => get_string('resetlegend', 'search.elasticsearch'),
                'elements' => $resetelements,
                'collapsible' => true
            );
        }

        return $config;
    }

494
495
496
497
498
499
500
    public static function validate_config_options(Pieform $form, $values) {
        // First check that there isn't an elasticsearch cron indexing the site
        if (get_record('config', 'field', '_cron_lock_search_elasticsearch_cron')) {
            $form->set_error(null, get_string('indexingrunning', 'search.elasticsearch'));
        }
    }

Son Nguyen's avatar
Son Nguyen committed
501
    public static function save_config_options(Pieform $form, $values) {
502
        require_once(get_config('docroot') . 'artefact/lib.php');
503
        set_config_plugin('search', 'elasticsearch', 'cronlimit', $values['cronlimit']);
504
505
506
507
508
        // Set the shard / replica values
        $shards = (int) $values['shards'];
        $shards = empty($shards) ? 5 : $shards; // we can't have no shards so set to default
        set_config_plugin('search', 'elasticsearch', 'shards', (int) $shards);
        set_config_plugin('search', 'elasticsearch', 'replicashards', (int) $values['replicashards']);
509
510
511
512
513
514
515
516

        // Changes in artefact types:
        //       - we need to add the newly selected artefact types (for indexing)
        //       - we need to removed artefact types that have been unchecked (to remove them from the index)
        // I wanted to use the "delete by query" feature of Elastic search (http://www.elasticsearch.org/guide/reference/api/delete-by-query/)
        // but it was not very reliable. According to the docs:  it is not recommended to delete "large chunks of the data in an index".
        // So I decided to remove data by Id.

517
        set_config_plugin('search', 'elasticsearch', 'artefacttypesmap', $values['artefacttypesmap']);
518
519
        // to be valid, artefact types need a hierarchy
        $artefacttypesmap_array = self::elasticsearchartefacttypesmap_to_array();
520
521
522
523
524
525
526
        // the following artefacttypes are already being indexed by the usr table so we don't want to save them
        $artefacttypes_toexclude = array('firstname', 'lastname', 'preferredname', 'email');
        foreach ($artefacttypes_toexclude as $exclude) {
            if (!empty($values['artefacttypes'][$exclude])) {
                unset($values['artefacttypes'][$exclude]);
            }
        }
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
        $values['artefacttypes'] = array_intersect($values['artefacttypes'], array_keys($artefacttypesmap_array));

        $types = explode(',', $values['types']);
        if (in_array('artefact', $types)) {
            $artefacttypes_old = explode(',', get_config_plugin('search', 'elasticsearch', 'artefacttypes'));
            $result = array_diff($artefacttypes_old, $values['artefacttypes']) + array_diff($values['artefacttypes'], $artefacttypes_old);
            // result now contains the artefacttypes that have been checked and unchecked
            foreach ($result as $artefacttype) {
                ElasticsearchIndexing::requeue_searchtype_contents('artefact', $artefacttype);
            }
            set_config_plugin('search', 'elasticsearch', 'artefacttypes', implode(',', $values['artefacttypes']));
        }

        // If they chose to reset all the indexes, do that.
        if (isset($values['allreset'])) {
542
543
544
545
            // set the cron lock before beginning re index to stop the cron indexing at same time
            $start = time();
            insert_record('config', (object) array('field' => '_cron_lock_search_elasticsearch_cron', 'value' => $start));

546
            self::reset_all_searchtypes();
547
548
549

            error_log("finished resetting.");

550
551
            // Send the first batch of records to the elasticsearch server now, for instant gratification
            self::index_queued_items();
552

553
554
            error_log("finished indexing queued items");

555
556
            // free the cron lock
            delete_records('config', 'field', '_cron_lock_search_elasticsearch_cron', 'value', $start);
557
558
559
560
561
562
563
564
565
566
567
568
        }
        return true;
    }

    /**
     * This function gets called when the sitewide search plugin is switched to
     * this one. It's the chance for the plugin to do any post-configuration
     * initialization it might need. (The same stuff you'd probably do after
     * changing the plugin's configuration via its extension config page.)
     *
     */
    public static function initialize_sitewide() {
569
570
571
572
573
        if (self::can_connect()) {
            self::reset_all_searchtypes();
            return true;
        }
        return false;
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
    }

    /**
     * This function gets called when the sitewide search plugin is switched
     * away from this one.
     *
     * We'll use this opportunity to disable the triggers and clear out the queue
     * table. Otherwise, it will forever swell since we're no longer running
     * the cron.
     */
    public static function cleanup_sitewide() {
        $enabledtypes = explode(',', get_config_plugin('search', 'elasticsearch', 'types'));
        // (re)create the mappings and the overall site index
        foreach ($enabledtypes as $type) {
            ElasticsearchIndexing::drop_triggers($type);
        }
        ElasticSearchIndexing::drop_trigger_functions();
        delete_records('search_elasticsearch_queue');
    }

    /**
     * Resets all the searchtypes in the following ways:
     *  - Deletes and re-creates the elasticsearch index on the server
     *  - Re-creates the trigger functions
     *    - This will also drop the triggers for all types (even those that aren't in use)
     *  - Creates all triggers for those types that are in use
     *  - Tells the elasticsearch server to drop and re-create the index
     *  - Tells the elasticsearch server to re-create the "mapping" for each type
     *  - Loads every record for that type into the queue table, for the cron to chug away at them
     */
    public static function reset_all_searchtypes() {
        ElasticSearchIndexing::create_index();
        ElasticsearchIndexing::create_trigger_functions();
        $enabledtypes = explode(',', get_config_plugin('search', 'elasticsearch', 'types'));
608
        $mappings = array();
609
610
611
612
        // (re)create the mappings and the overall site index
        foreach ($enabledtypes as $type) {
            ElasticsearchIndexing::create_triggers($type);
            ElasticsearchIndexing::requeue_searchtype_contents($type);
613
614
615
616
617
618
619
            error_log("fetching mapping for $type");
            $ES_class = 'ElasticsearchType_' . $type;
            if ($ES_class::$mappingconfv6 === false) {
                error_log("mapping $type missing - will ignore");
                continue;
            }
            $mappings[]= $ES_class::$mappingconfv6;
620
        }
621
        self::set_mapping($mappings);
622
623
624
625
626
627
628
629
    }

    public static function postinst($prevversion) {
        if ($prevversion == 0) {
            set_config_plugin('search', 'elasticsearch', 'host', '127.0.0.1');
            set_config_plugin('search', 'elasticsearch', 'port', '9200');
            set_config_plugin('search', 'elasticsearch', 'indexname', 'mahara');
            set_config_plugin('search', 'elasticsearch', 'analyzer', 'mahara_analyzer');
630
            set_config_plugin('search', 'elasticsearch', 'types', 'usr,interaction_instance,interaction_forum_post,group,view,artefact,block_instance,collection');
631
            set_config_plugin('search', 'elasticsearch', 'cronlimit', '50000');
632
633
            set_config_plugin('search', 'elasticsearch', 'shards', 5);
            set_config_plugin('search', 'elasticsearch', 'replicashards', 1);
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
            $elasticsearchartefacttypesmap = file_get_contents(__DIR__ . '/elasticsearchartefacttypesmap.txt');
            set_config_plugin('search', 'elasticsearch', 'artefacttypesmap', $elasticsearchartefacttypesmap);
        }
    }

    public static function elasticsearchartefacttypesmap_to_array() {
        $artefacttypesmap_array = explode("\n", get_config_plugin('search', 'elasticsearch', 'artefacttypesmap'));
        $tmp = array();
        foreach ($artefacttypesmap_array as $key => $value) {
            $tmpkey = explode("|", $value);
            if (count($tmpkey) == 3) {
                $tmp[$tmpkey[0]] = $value;
            }
        }
        ksort($tmp, SORT_STRING);
        return $tmp;
    }


    public static function get_cron() {
        return array(
            (object)array(
                'callfunction' => 'cron',
                'hour'         => '*',
658
                'minute'       => '4-59/5',
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
            ),
        );
    }

    public static function cron() {

        // Only run the cron if this plugin is the active search plugin
        if (get_config('searchplugin') !== 'elasticsearch') {
            return;
        }

        // store the last time the function was executed: eg: 2013-04-11 16:45:30
        $timestamp = date('Y-m-d H:i:s');
        $last_run = get_config_plugin('search', 'elasticsearch', 'lastrun');
        if (isset($last_run)) {
            ElasticsearchIndexing::add_to_queue_access($last_run, $timestamp);
        }

        set_config_plugin('search', 'elasticsearch', 'lastrun', $timestamp);
        // process the queue
        self::index_queued_items();

    }

    /**
684
     * Creates the "mapping" for the 'doc' mappingtype on the elasticsearch server
685
     *
686
     * @param array $mappings Array of old style mapping types that we want to merge to make one type
687
     */
688
689
690
691
    private static function set_mapping($mappings) {

        if (!is_array($mappings) || empty($mappings)) {
            error_log('wrong mapping info');
692
693
            return false;
        }
694
695
696
697
698
699
700
701
702
703
704
        $docmapping = array();
        foreach ($mappings as $maptype) {
            foreach ($maptype as $k => $v) {
                if (!isset($docmapping[$k])) {
                    $docmapping[$k] = $v;
                }
                else {
                    $docmapping[$k] = array_replace_recursive($docmapping[$k], $v);
                }
            }
        }
705
        $ESAnalyzer = get_config_plugin('search', 'elasticsearch', 'analyzer');
706
707
708
709
710
711
712
713
        // In version 6.x there is no catchall '_all' field so we now map '$type_all' for the different types
        // and instead of doing full search on '_all' we do multi_match on 'catch_all' to achieve same functionality.
        $docmapping['catch_all'] = array(
            'type' => 'text',
            'analyzer'  => $ESAnalyzer,
            'search_analyzer' => $ESAnalyzer,
            'store' => true
        );
714

715
        $mappingparams = array(
716
            'index' => PluginSearchElasticsearch::get_write_indexname(),
717
            'type' => 'doc', // Only allowed one type mapping in version 6.x
718
            'body' => array(
719
                'doc' => array(
720
721
722
                    '_source' => array(
                        'enabled' => true
                    ),
723
                    'properties' => $docmapping
724
                )
725
            )
726
        );
727
728
        error_log("setting merged mappings");
        $ESClient = self::make_client('write');
729
730
        // Set mapping on index type.
        $ESClient->indices()->putMapping($mappingparams);
731
732
733
734
735
736
737
    }

    /**
     * Sends records from the queue table into the elasticsearch server
     */
    public static function index_queued_items() {

738
739
740
        $cronlimit = intval(get_config_plugin('search', 'elasticsearch', 'cronlimit'));
        if ($cronlimit <= 0) {
            $limitfrom = $limitto = '';
741
742
743
        }
        else {
            $limitfrom = 0;
744
745
746
747
748
749
750
751
            $limitto = $cronlimit;
        }

        $requestlimit = intval(get_config_plugin('search', 'elasticsearch', 'requestlimit'));
        if ($requestlimit <= 0) {
            // If they specified no request limit, just use a really big number. This is easier
            // than writing special code just to handle the case where there's no limit.
            $requestlimit = 1000;
752
753
        }

754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
        $redolimit = intval(get_config_plugin('search', 'elasticsearch', 'redolimit'));
        if ($redolimit <= 0) {
            // If they've set redolimit to 0, they don't want to retry failed records at all
            $redolimit = 0;
            $redoablecount = 0;
        }
        else {
            // Find out how many failed records there are
            // (Since any sent in bulk will be deleted if the request processed successfully, any remaining ones
            // are failed records.)
            $redoablecount = count_records('search_elasticsearch_queue', 'status', self::queue_status_sent_in_bulk);
            $redolimit = min($redolimit, $redoablecount);
            if ($limitto) {
                $redolimit = min($redolimit, $limitto);
                $limitto -= $redolimit;
            }
        }
        $records = get_records_array('search_elasticsearch_queue', 'status', self::queue_status_new, 'id', '*', $limitfrom, $limitto);

        if (!$records && !$redolimit) {
774
775
776
            return;
        }

777
        $ESClient = self::make_client('write');
778
        /*
779
780
        $indexname = self::get_write_indexname();
        $elasticaIndex = $elasticaClient->getIndex($indexname);
781
        */
782
783
        $artefacttypesmap_array = self::elasticsearchartefacttypesmap_to_array();

784
        if ($records) {
785
            // TODO: translate preprocess_queued_items also.
786
787
788
789
790
791
792
793
794
795
796
            list($documents, $deletions) = self::preprocess_queued_items($records, $artefacttypesmap_array);

            // Delete in bulk
            if ($deletions) {
                $delcount = 0;
                foreach ($deletions as $docs) {
                    $delcount += count($docs);
                }
                log_info("  {$delcount} deletions to index in bulk...");
                self::send_queued_items_in_bulk(
                    $deletions,
797
798
799
800
801
802
                    function($records, $type) use ($ESClient) {
                        $params = array();
                        foreach ($records as $record) {
                            $params['body'][] = [
                                    'delete' => [
                                            '_index' => PluginSearchElasticsearch::get_write_indexname(),
803
804
                                            '_type'  => 'doc',
                                            '_id'    => $type.$record,
805
806
807
808
809
                                    ],
                            ];
                        }

                    return $ESClient->bulk($params);
810
                    },
811
                    $requestlimit
812
813
814
815
816
817
818
819
820
                );
            }
            // Send in bulk
            if ($documents) {
                $doccount = 0;
                foreach ($documents as $docs) {
                    $doccount += count($docs);
                }
                log_info("  {$doccount} documents to index in bulk...");
821
                // TODO: translate send_queued_items also.
822
823
                self::send_queued_items_in_bulk(
                    $documents,
824
825
826
827
828
829
                    function($records, $type) use ($ESClient) {
                        $params = array();
                        foreach ($records as $record) {
                            $params['body'][] = [
                                'index' => [
                                    '_index' => PluginSearchElasticsearch::get_write_indexname(),
830
831
                                    '_type'  => 'doc',
                                    '_id'    => $type.$record['id'],
832
833
                                ],
                            ];
834
                            $record['body']['type'] = $record['type'];
835
836
837
838
                            $params['body'][] = (array)$record['body'];
                        }

                        return $ESClient->bulk($params);
839
                    },
840
                    $requestlimit
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
                );
            }
        }

        // Now, pick up any failed ones
        $records = get_records_array('search_elasticsearch_queue', 'status', self::queue_status_sent_in_bulk, 'id', '*', 0, $redolimit);
        if ($records) {
            list($documents, $deletions) = self::preprocess_queued_items($records, $artefacttypesmap_array);

            // Delete individually
            if ($deletions) {
                $delcount = 0;
                foreach ($deletions as $docs) {
                    $delcount += count($docs);
                }
                log_info("  {$delcount} deletions to index individually...");
                self::send_queued_items_individually(
                    $deletions,
859
860
861
                    function($record, $type) use ($ESClient) {
                        $params = array(
                                'index' => PluginSearchElasticsearch::get_write_indexname(),
862
863
                                'type'  => 'doc',
                                'id'    => $type.$record,
864
865
866
867
                        );

                        return $ESClient->delete($params);
                    }
868
869
870
871
872
873
874
875
876
877
878
879
                );
            }

            // Send individually
            if ($documents) {
                $doccount = 0;
                foreach ($documents as $docs) {
                    $doccount += count($docs);
                }
                log_info("  {$doccount} documents to index individually...");
                self::send_queued_items_individually(
                    $documents,
880
                    function($record, $type) use ($ESClient) {
881
                        $record['body']['type'] = $record['type'];
882
883
                        $params = array(
                            'index' => PluginSearchElasticsearch::get_write_indexname(),
884
885
                            'type'  => 'doc',
                            'id'    => $type.$record['id'],
886
887
888
889
890
                            'body'  => (array)$record['body'],
                        );

                        return $ESClient->index($params);
                    }
891
892
893
894
895
                );
            }
        }

        // Refresh Index
896
        $ESClient->indices()->refresh(array('index' => PluginSearchElasticsearch::get_write_indexname()));
897
898
899
900
901
902
903
904
905
906
    }

    /**
     * Process a set of records from search_elasticsearch_queue and sort them into
     * items to insert and delete into the Elasticsearch index.
     * @param array $records
     * @param array $artefacttypesmap_array
     * @return array()
     */
    private static function preprocess_queued_items($records, $artefacttypesmap_array) {
907
        $documents = array();
908
        $deletions = array();
909
910
911
912
        foreach ($records as $record) {
            $deleteitem = false;
            $ES_class = 'ElasticsearchType_' . $record->type;
            if ($record->type == 'artefact') {
913
                $dbrecord = $ES_class::getRecordById($record->type, $record->itemid, $artefacttypesmap_array);
914
915
            }
            else {
916
                $dbrecord = $ES_class::getRecordById($record->type, $record->itemid);
917
918
919
            }

            // If the record has been physically deleted from the DB or if its artefacttype is not selected
920
            if ($dbrecord == false) {
921
922
923
                $deleteitem = true;
            }
            else {
924
                $item = new $ES_class($dbrecord);
925
926
927
                $deleteitem = $item->getisDeleted();
            }

928
            // Mark item for bulk deletion from index
929
            if ($deleteitem == true) {
930
                $deletions[$record->type][$record->id] = $record->itemid;
931
            }
932
            // Add item for bulk index
933
            else {
934
935
936
937
938
939
                $documents[$record->type][$record->id] = array(
                    'index' => PluginSearchElasticsearch::get_write_indexname(),
                    'type'  => $record->type,
                    'id'    => $record->itemid,
                    'body'  => $item->getMapping(),
                );
940
941
            }
        }
942
943
944
945
946
947
948
949
950
951
952
953
954
        return array(
            $documents,
            $deletions
        );
    }

    /**
     * Uploat a set of items to Elasticsearch in bulk
     * @param array $documents A multi-dimensional array. The top level has keys representing elasticsearch document types.
     * Each of these has a value which is an array of actual Elasticsearch documents or deletion requests, with their
     * key being the matching record in the search_elasticsearch_queue table.
     * @param callback $processfunction A callback function  to bulk-request each slice of documetns
     */
955
    private static function send_queued_items_in_bulk($documents, $processfunction, $requestlimit) {
956
957
958
959
960
        $uploadcount = 0;
        $batchcount = 0;
        $errorcount = 0;

        // Bulk insert into index
961
        foreach ($documents as $type => $docs) {
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
            for ($i = 0; $i < count($docs); $i += $requestlimit) {
                $requestdocs = array_slice($docs, $i, $requestlimit, true);
                $ids = array_keys($requestdocs);
                $questionmarks = implode(',', array_fill(0, count($ids), '?'));
                $time = db_format_timestamp(time());

                // Mark them before sending, in case the request fails.
                $sql = 'UPDATE {search_elasticsearch_queue} SET status = ?, lastprocessed = ? WHERE id IN (' . $questionmarks . ')';
                execute_sql(
                        $sql,
                        array_merge(
                                array(
                                        self::queue_status_sent_in_bulk,
                                        $time
                                ),
                                $ids
                        )
                );

                // Send them
                try {
                    $batchcount++;
                    $uploadcount += count($requestdocs);
                    if ($batchcount % 10 == 0) {
                        log_info("    batches: {$batchcount}; records: {$uploadcount}; errors: {$errorcount}");
                    }
988
                    $response = $processfunction($requestdocs, $type);
989

990
991
992
993
994
995
996
                    $ESError=false;
                    if ( isset( $response['errors'] ) ) {
                        $ESError=$response['errors'];
                    }

                    if (!empty($ESError)) {
                        log_warn("Error from Elasticsearch trying to send bulk request at time {$time}: " . $ESError);
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
                        $errorcount++;
                    }
                    else {
                        // Delete them (since they've been sent successfully)
                        delete_records_select('search_elasticsearch_queue', 'id IN (' . $questionmarks. ')', $ids);
                    }
                }
                catch (Exception $e) {
                    $errorcount++;
                    log_warn('Exception sending elasticsearch request at time ' . $time . ': ' . $e->getMessage() );
                }
            }
1009
        }
1010
1011
1012
1013
1014
        log_info("    batches: {$batchcount}; records: {$uploadcount}; errors: {$errorcount}");
        if ($errorcount) {
            log_info("    The records in the {$errorcount} errored batches will be queued for individual indexing");
        }
     }
1015

1016

1017
1018
1019
1020
1021
1022
1023
    /**
     * Upload a set of items to Elasticsearch individually
     * @param array $documents A multi-dimensional array. The top level has keys representing elasticsearch document types.
     * Each of these has a value which is an array of actual Elasticsearch documents or deletion requests, with their
     * key being the matching record in the search_elasticsearch_queue table.
     * @param callback $processfunction A callback function  to bulk-request each slice of documetns
     */
1024
    private static function send_queued_items_individually($documents, $processfunction) {
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
        $uploadcount = 0;
        $errorcount = 0;

        // Bulk insert into index
        foreach ($documents as $type => $docs) {
            foreach ($docs as $queueid => $doc) {
                update_record(
                    'search_elasticsearch_queue',
                    (object) array(
                        'id' => $queueid,
                        'status' => self::queue_status_sent_individually,
                        'lastprocessed' => db_format_timestamp(time())
                    )
                );
                // Send it
                try {
                    $uploadcount++;
                    if ($uploadcount % 20 == 0) {
                        log_info("    uploads: {$uploadcount}; errors: {$errorcount}");
                    }
1045
                    $response = $processfunction($doc, $type);
1046
1047
1048
1049
                    $ESError=false;
                    if ( isset( $response['errors'] ) ) {
                        $ESError=$response['errors'];
                    }
1050

1051
                    if ( !empty( $ESError ) ) {
1052
                        $errorcount++;
1053
                        log_warn("Error from Elasticsearch trying to send individual record {$queueid}: " . $ESError);
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
                    }
                    else {
                        // No errors! Go ahead and delete it from the queue
                        delete_records('search_elasticsearch_queue', 'id', $queueid);
                    }
                }
                catch (Exception $e) {
                    $errorcount++;
                    log_warn('Exception sending elasticsearch record ' . $queueid . ': ' . $e->getMessage() );
                }
            }
        }
        log_info("    uploads: {$uploadcount}; errors: {$errorcount}");
1067
1068
    }

1069

1070
1071
1072
1073
1074
    public static function search_all ($query_string, $limit, $offset = 0, $options=array(), $mainfacetterm = null, $subfacet = null) {
        global $USER;
        return ElasticsearchPseudotype_all::search($query_string, $limit, $offset, $options, $mainfacetterm, $USER);
    }

1075
1076
1077
1078
1079
    public static function search_events ($options=array(), $limit = 10, $offset = 0) {
        global $USER;
        return ElasticsearchType_event_log::search($options, $limit, $offset, $USER);
    }

1080
1081
1082
1083
    public static function search_user($query_string, $limit, $offset = 0, $data=array()) {
        return PluginSearchInternal::search_user($query_string, $limit, $offset, $data);
    }

1084
    public static function search_group($query_string, $limit, $offset=0, $type='member', $category='', $institution='all') {
1085
1086
        // Given the results depends on the user the SQL search makes more sense here than Elastic Search
        // So  I'll just call the PluginSearchInternal related function
1087
        return PluginSearchInternal::search_group($query_string, $limit, $offset, $type, $category, $institution);
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
    }

    public static function self_search($query_string, $limit, $offset, $type = 'all') {
        // call the PluginSearchInternal related function
        return PluginSearchInternal::self_search($query_string, $limit, $offset, $type);
    }

    public static function admin_search_user($query_string, $constraints, $offset, $limit,
                                             $sortfield, $sortdir) {

        // We need to fudge some stuff before sending it on, because get_admin_user_search_results()
        // in lib/searchlib.php has some hard-coded special functionality for the internal search plugin
        if (is_array($query_string) && count($query_string)>0) {
            $query_string = $query_string[0]['string'];
        }
        else {
            $query_string = "";
        }

        return PluginSearchInternal::admin_search_user($query_string, $constraints, $offset, $limit,
                                             $sortfield, $sortdir);

   }

1112
   public static function get_client_config($type='read') {
1113
1114
1115
       $host = get_config_plugin('search', 'elasticsearch', 'host');
       $port = get_config_plugin('search', 'elasticsearch', 'port');

1116
1117
1118
1119
1120
1121
       $hosts = array(
                    array(
                        'host' => $host,
                        'port' => $port
                    )
       );
1122
1123
1124
1125
1126

       // Build array of curlopts
       $elasticclientcurlopts = [];
       $elasticclientcurlopts[CURLOPT_CONNECTTIMEOUT] = 3;

1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
       if ($username = get_config_plugin('search', 'elasticsearch', 'username')) {
           $password = get_config_plugin('search', 'elasticsearch', 'password');
           if ($type == 'write' && $indexingusername = get_config_plugin('search', 'elasticsearch', 'indexingusername')) {
               $username = $indexingusername;
               $password = get_config_plugin('search', 'elasticsearch', 'indexingpassword');
           }
           $hosts[0]['username'] = $username;
           $hosts[0]['password'] = $password;
           $elasticclientcurlopts[CURLOPT_USERPWD] = $username . ':' . $password;
       }

       if (get_config_plugin('search', 'elasticsearch', 'scheme')) {
           $hosts[0]['scheme'] = get_config_plugin('search', 'elasticsearch', 'scheme');
           if (!get_config('productionmode') && get_config_plugin('search', 'elasticsearch', 'ignoressl')) {
               // Ignore verifying the SSL certificate
               $elasticclientcurlopts[CURLOPT_SSL_VERIFYHOST] = false;
               $elasticclientcurlopts[CURLOPT_SSL_VERIFYPEER] = false;
           }
       }

1147
1148
1149
1150
1151
1152
1153
1154
       if (get_config('proxyaddress')) {
           $elasticclientcurlopts[CURLOPT_PROXY] = get_config('proxyaddress');
           $elasticclientcurlopts[CURLOPT_HTTPHEADER] = ['Transfer-Encoding: chunked'];
           if (get_config('proxyauthmodel') && get_config('proxyauthcredentials')) {
               // @TODO: actually do something with $proxy_authmodel.
               $elasticclientcurlopts[CURLOPT_PROXYUSERPWD] = get_config('proxyauthcredentials');
           }
       }
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
       return array('hosts' => $hosts, 'curlopts' => $elasticclientcurlopts);
   }

   /**
    * Creates an \Elastica\Client object, filling in the host and
    * port with the values from the elasticsearch plugin's admin screen.
    * If you wanted to make other changes to how we connect to elasticsearch,
    * this would be a good place to do it.
    *
    * @return \Elastica\Client
    */
   public static function make_client($type='read') {
       $clientopts = self::get_client_config($type);
1168
1169

       $clientBuilder = ClientBuilder::create();
1170
1171
1172
1173
1174
1175
1176
1177

       // php versions < 5.6.6 dont have JSON_PRESERVE_ZERO_FRACTION defined
       if (version_compare(phpversion(), '5.6.6', '<') || !defined('JSON_PRESERVE_ZERO_FRACTION')) {
           $clientBuilder->setHosts($clientopts['hosts'])->setConnectionParams(['client' => ['curl' => $clientopts['curlopts']]])->allowBadJSONSerialization();
       }
       else {
           $clientBuilder->setHosts($clientopts['hosts'])->setConnectionParams(['client' => ['curl' => $clientopts['curlopts']]]);
       }
1178
1179
1180
1181
       $client = $clientBuilder->build();

       return $client;
   }
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206

   /**
    * Return the name of the index to use for writing. Basically, this is bypassindexname
    * if that is supplied, and indexname if not.
    *
    * @return string
    */
   public static function get_write_indexname() {
       // If they provided bypassindexname, then use that, otherwise use indexname.
       // The purpose of bypassindexname is to allow a site to reindex data into a new
       // index, while still using the old index for searching.
       $indexname = get_config_plugin('search', 'elasticsearch', 'bypassindexname');
       if (!$indexname) {
           $indexname = get_config_plugin('search', 'elasticsearch', 'indexname');
       }
       return $indexname;
   }

   /**
    * Builds the "results" table seen on the universal search results page
    * @param unknown_type $data
    */
   public static function build_results_html(&$data) {

       $smarty = smarty_core();
1207
       $smarty->assign('data', !empty($data['data']) ? $data['data'] : null);
1208
1209
1210
1211
1212
1213
1214

       $params = array();
       if (isset($data['query'])) {
           $params['query'] = $data['query'];
       }
       if (isset($data['selected'])) {
           $params['mainfacetterm'] = $data['selected'];
1215
           $smarty->assign('selected', $data['selected']);
1216
1217
1218
       }
       if (isset($data['content-filter-selected'])) {
           $params['secfacetterm'] = $data['content-filter-selected'];
1219
           $smarty->assign('contentfilterselected', $data['content-filter-selected']);
1220
1221
1222
       }
       if (isset($data['owner-filter-selected'])) {
           $params['owner'] = $data['owner-filter-selected'];
1223
           $smarty->assign('owner', $data['owner-filter-selected']);
1224
1225
1226
       }
       if (isset($data['tagsonly'])) {
           $params['tagsonly'] = $data['tagsonly'];
1227
           $smarty->assign('tagsonly', $data['tagsonly']);
1228
1229
1230
       }
       if (isset($data['sort'])) {
           $params['sort'] = $data['sort'];
1231
           $smarty->assign('sort', $data['sort']);
1232
1233
1234
       }
       if (isset($data['license'])) {
           $params['license'] = $data['license'];
1235
           $smarty->assign('license', $data['license']);
1236
1237
1238
1239
       }
       if (!isset($data['count'])) {
           $data['count'] = 0;
       }
1240

1241
1242
1243
       if (!isset($data['limit'])) {
           $data['limit'] = 0;
       }
1244
       $smarty->assign('limit', $data['limit']);
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
       if (!isset($data['offset'])) {
           $data['offset'] = 0;
       }
       $smarty->assign('offset', $data['offset']);

       $resultcounttextsingular = get_string('record', 'search.elasticsearch');
       $resultcounttextplural = get_string('records', 'search.elasticsearch');

       if (isset($data['facets'])) {
           $smarty->assign('facets', $data['facets']);
       }
       if (isset($data['content-filter'])) {
           $smarty->assign('contentfilter', $data['content-filter']);
       }
       if (isset($data['owner-filter'])) {
           $smarty->assign('ownerfilter', $data['owner-filter']);
       }
       if (isset($data['totalresults'])) {
           $smarty->assign('totalresults', $data['totalresults']);
       }

       // Only show licence if Text or Media tab is selected and license metadata site config is set
       if (isset($data['license_on']) && isset($data['license_options']) && isset($data['selected']) && ($data['selected'] == 'Media' || $data['selected'] == 'Text')) {
           $smarty->assign('license_on', $data['license_on']);
           $smarty->assign('license_options', $data['license_options']);
       }

       if (isset($data['type'])) {
           $smarty->assign('type', $data['type']);
       }
       $smarty->assign('query', $params['query']);

       $data['tablerows'] = $smarty->fetch('Search:elasticsearch:searchresults.tpl');

       $pagination = build_pagination(array(
               'id' => 'elasticsearch_pagination',
               'url' => get_config('wwwroot') . 'search/elasticsearch/index.php?' . http_build_query($params),
               'jsonscript' => 'search/elasticsearch/json/elasticsearch.php',
               'datatable' => 'universalsearchresult',
               'count' => $data['count'],
               'setlimit' => $data['limit'],
               'limit' => $data['limit'],
               'offset' => $data['offset'],
               'jumplinks' => 6,
               'numbersincludeprevnext' => 2,
               'resultcounttextsingular' => $resultcounttextsingular,
               'resultcounttextplural' => $resultcounttextplural,
               'extradata' => array('page' => 'index'),
       ));
       $data['pagination'] = $pagination['html'];
       $data['pagination_js'] = $pagination['javascript'];
   }

    /**
     * Fix the $query string for things that can break elasticsearch.
     * @param string $query
     *
     * @return string
     */
    public function clean_query($query) {
        $query = stripslashes($query); // to remove any backslashes
        $badchars = array(
            '"',
            '[',
            ']',
            '{',
            '}',
            '~',
            '^',
            '(',
            ')',
            '-',
            '+',
            '/',
            '!',
            ':'
        );
        foreach ($badchars as $bad) {
1323
1324
            // Replace with a space.
            $query = preg_replace('/\\'.$bad.'/',' ',$query);
1325
1326
1327
1328
1329
1330
1331
1332
        }
        return $query;
    }
}

/**
 *    This class encapsulates the ACL filters
 */
1333
class ElasticsearchFilterAcl
1334
1335
{
    private $user;
1336
    private $params = array();
1337
1338
1339
1340

    public function __construct($user) {
        $this->user = $user;

1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
        // GENERAL         (public - loggedin - friends)
        // public
        $elasticaFilterGeneral = array(
                'term' => array(
                        'access.general' => 'public',
                ),
        );
        $this->params['should'][] = $elasticaFilterGeneral;

        // loggedin
        if ($this->user->is_logged_in()) {
            $elasticaFilterGeneral = array(
                    'term' => array(
                            'access.general' => 'loggedin',
                    ),
            );
            $this->params['should'][] = $elasticaFilterGeneral;

            // friends: pass a list of friends => check if access.general = friends and the owner is a friend of the current user
            if ($friends = $this->getFriendsList()) {
                $elasticaFilterGeneral = array(
                        'bool' => array(
                                'must' => array(
                                        array(
                                                'term' => array(
                                                        'access.general' => 'friends',
                                                ),
                                        ),
                                        array(
                                                'terms' => array(
                                                        'owner' => $friends,
                                                ),
                                        ),
                                ),
                        ),
                );
                $this->params['should'][] = $elasticaFilterGeneral;
            }

            if ($members = $this->getMembersList()) {
                // All groups: pass a list of groups => check if access.general = groups
                //                                      and the owner is a member of the same group as the current user.

                $elasticaFilterGeneral = array(
                        'bool' => array(
                                'must' => array(
                                        array(
                                                'term' => array(
                                                        'access.general' => 'groups',
                                                ),
                                        ),
                                        array(
                                                'terms' => array(
                                                        'owner' => $members,
                                                ),
                                        ),
                                ),
                        ),
                );
                $this->params['should'][] = $elasticaFilterGeneral;
            }

            //    INSTITUTIONS     (array of institutions that have access to the artefact)
            $user_institutions = array_keys($this->user->get('institutions'));
            if ($user_institutions && count($user_institutions) > 0) {
                $elasticaFilterInstitutions = array(
                        'terms' => array(
                                'access.institutions' => $user_institutions,
                        ),
                );
                $this->params['should'][] = $elasticaFilterInstitutions;
            }

            // GROUPS (array of groups that have access to the artefact)
            if ($groups = $this->getGroupsList()) {
                $elasticaFilterGroup = [];
1417
1418
1419
1420
1421
                $roles = $this->getExistingRoles();
                foreach($roles AS $role){
                    if (isset($groups[$role]) && count($groups[$role])) {
                        $elasticaFilterGroup[] = array('terms' => array('access.groups.' . $role => $groups[$role]));
                    }
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
                }
                $this->params['should'][] = $elasticaFilterGroup;
            }

            // USRS (array of user ids that have access to the artefact)
            // if owner
            $elasticaFilterOwner = array(
                    'term' => array(
                            'owner' => $this->user->get('id'),
                    ),
            );
            $this->params['should'][] = $elasticaFilterOwner;

            // in access.usrs list
            $elasticaFilterUsr = array(
                    'term' => array(
                            'access.usrs' => $this->user->get('id'),
                    ),
            );
            $this->params['should'][] = $elasticaFilterUsr;
        }

    }
1445

1446
1447
    public function get_params() {
       return $this->params;
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
    }

    private function getFriendsList(){
        $list = array();
        $friends = get_friends($this->user->get('id'), 0, 0);
        if ($friends && array_key_exists('data', $friends) && is_array($friends['data'])) {
            foreach ($friends['data'] as $friend) {
                $list[] = $friend->id;
            }
        }
        return $list;
    }

    private function getGroupsList(){
        $list = array();
1463
        foreach (group_get_user_groups($this->user->get('id')) as $group) {
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
            $list[$group->role][] = $group->id;
            $list['member'][] = $group->id;
        }
        return $list;
    }

    private function getExistingRoles(){
        $rs = get_recordset_sql('SELECT DISTINCT role FROM {grouptype_roles}');
        $roles = array('all');
        foreach (recordset_to_array($rs) as $record) {
            $roles[] = $record->role;
        }
        return $roles;
    }
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490

    /** NEW: from totara.  Might not be needed in mahara? **/
    private function getMembersList() {
        $list = get_column_sql('SELECT DISTINCT gm2.member FROM {group_member} gm1
                               JOIN {group_member} gm2 ON gm1.group = gm2.group
                               WHERE gm1.member = ? AND gm2.member <> ?',
                array($this->user->get('id'), $this->user->get('id')));
        if (!empty($list)) {
            return $list;
        }
        return array();
    }

1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
}


/**
 * Represents one of the "types" that elasticsearch can search against. These are "types"
 * in the elasticsearch sense: http://www.elasticsearch.org/guide/reference/api/search/indices-types/
 *
 * The currently active types are stored in the "search->elasticsearch->types" config variable.
 *
 * This isn't quite a fully fleshed-out Mahara plugin type, although it is an expandable area.
 * One noteable limitation is that under the current implementation, the type name must match
 * up exactly with a Mahara table. Though since all the operations are read-only, you could
 * work around that with a view.
 */
abstract class ElasticsearchType
{
    /**
     * @var string The name of this search type. Should match the name of the class, and the name of a DB table
     */
    public static $type = null;

    protected $item_to_index;
    protected $mapping;
1514
1515
1516

    private static $mysqltriggeroperations = array('insert', 'update', 'delete');

1517
1518
1519