"use strict";
require("core-js/modules/es.object.keys.js");
require("core-js/modules/es.symbol.js");
require("core-js/modules/es.object.get-own-property-descriptor.js");
require("core-js/modules/es.object.get-own-property-descriptors.js");
require("core-js/modules/es.array.slice.js");
require("core-js/modules/es.array.from.js");
require("core-js/modules/es.symbol.description.js");
require("core-js/modules/es.symbol.iterator.js");
require("core-js/modules/es.weak-map.js");
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
var _typeof = require("@babel/runtime/helpers/typeof");
Object.defineProperty(exports, "__esModule", {
value: true
});
exports._bulkIndexEntities = _bulkIndexEntities;
exports.autocomplete = autocomplete;
exports.checkIfExists = checkIfExists;
exports.deleteEntity = deleteEntity;
exports.generateIndex = generateIndex;
exports.getDocumentToIndex = getDocumentToIndex;
exports.indexEntity = indexEntity;
exports.init = init;
exports.refreshIndex = refreshIndex;
exports.searchByName = searchByName;
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
require("core-js/modules/es.array.map.js");
require("core-js/modules/es.date.to-json.js");
require("core-js/modules/web.url.to-json.js");
require("core-js/modules/es.array.concat.js");
require("core-js/modules/es.regexp.exec.js");
require("core-js/modules/es.string.replace.js");
require("core-js/modules/es.array.includes.js");
require("core-js/modules/es.string.includes.js");
require("core-js/modules/es.array.iterator.js");
require("core-js/modules/es.object.to-string.js");
require("core-js/modules/es.promise.js");
require("core-js/modules/es.string.iterator.js");
require("core-js/modules/web.dom-collections.iterator.js");
require("core-js/modules/es.function.name.js");
require("core-js/modules/es.array.join.js");
require("core-js/modules/es.string.search.js");
require("core-js/modules/es.array.reduce.js");
require("core-js/modules/es.array.find.js");
require("core-js/modules/es.array.flat.js");
require("core-js/modules/es.array.unscopables.flat.js");
require("core-js/modules/web.dom-collections.for-each.js");
require("core-js/modules/es.array.filter.js");
require("core-js/modules/es.regexp.to-string.js");
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
var _upperFirst2 = _interopRequireDefault(require("lodash/upperFirst"));
var _snakeCase2 = _interopRequireDefault(require("lodash/snakeCase"));
var _isString2 = _interopRequireDefault(require("lodash/isString"));
var _camelCase2 = _interopRequireDefault(require("lodash/camelCase"));
var commonUtils = _interopRequireWildcard(require("./utils"));
var _elasticsearch = _interopRequireDefault(require("@elastic/elasticsearch"));
var _httpStatus = _interopRequireDefault(require("http-status"));
var _log = _interopRequireDefault(require("log"));
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function _interopRequireWildcard(e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != _typeof(e) && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (var _t in e) { "default" !== _t && {}.hasOwnProperty.call(e, _t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, _t)) && (i.get || i.set) ? o(f, _t, i) : f[_t] = e[_t]); } return f; })(e, t); }
function _createForOfIteratorHelper(r, e) { var t = "undefined" != typeof Symbol && r[Symbol.iterator] || r["@@iterator"]; if (!t) { if (Array.isArray(r) || (t = _unsupportedIterableToArray(r)) || e && r && "number" == typeof r.length) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: !0 } : { done: !1, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = !0, u = !1; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = !0, o = r; }, f: function f() { try { a || null == t.return || t.return(); } finally { if (u) throw o; } } }; }
function _unsupportedIterableToArray(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? Array.from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0; } }
function _arrayLikeToArray(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) { n[e] = r[e]; } return n; }
function ownKeys(e, r) { var t = Object.keys(e); if (Object.getOwnPropertySymbols) { var o = Object.getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return Object.getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
function _objectSpread(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys(Object(t), !0).forEach(function (r) { (0, _defineProperty2.default)(e, r, t[r]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(e, Object.getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) { Object.defineProperty(e, r, Object.getOwnPropertyDescriptor(t, r)); }); } return e; } /* eslint-disable lines-around-comment, sort-keys, camelcase */ /*
* Copyright (C) 2016 Sean Burke
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
var _index = 'bookbrainz';
var _bulkIndexSize = 10000;
// In milliseconds
var _retryDelay = 10;
var _maxJitter = 75;
var _client = null;
function sanitizeEntityType(type) {
if (!type) {
return null;
}
if (Array.isArray(type)) {
return type.map(_snakeCase2.default);
}
if ((0, _snakeCase2.default)(type) === 'all_entities') {
return ['author', 'edition', 'edition_group', 'series', 'work', 'publisher'];
}
return (0, _snakeCase2.default)(type);
}
var commonProperties = ['bbid', 'id', 'name', 'type', 'disambiguation'];
/*
Index settings and mappings for ElasticSearch. We use a single index for all entity types, and differentiate them by a `type` field on the documents.
Customizing this is a painful process of trial and error, but you can find some great hints here:
https://web.archive.org/web/20251015115752/https://developer.ibm.com/articles/elasticsearch-ultimate-guide/
https://web.archive.org/web/20260303170636/https://oneuptime.com/blog/post/2026-01-25-elasticsearch-mappings/view
*/
var indexSettings = {
index: _index,
settings: {
index: {
max_ngram_diff: 4,
'mapping.ignore_malformed': true
},
analysis: {
/* This filter is used to sanitize identifiers and other 'keyword' field types. Removes non-alphanumeric characters */
char_filter: {
identifier_cleaner: {
type: 'pattern_replace',
pattern: '[^a-zA-Z0-9]',
replacement: ''
}
},
/*
Filter out/deprioritize stop words for most popular languages.
TODO: check whether we can surface the alias language to make this more precise
*/
filter: {
custom_stop_words_filter: {
type: 'stop',
ignore_case: true,
stopwords: ['_english_', '_french_', '_german_', '_spanish_', '_italian_', '_portuguese_', '_russian_', '_arabic_', '_chinese_', '_japanese_', '_norwegian_', '_hindi_']
}
},
/*
Analysers are used for text fields both at indexing and at search time to break up and clean up the text.
We don't use differente analysers for indexing vs. searching.
*/
analyzer: {
/*
Customize the built-in standard tokenizer (grammar based tokenization), see https://www.elastic.co/docs/reference/text-analysis/analysis-standard-analyzer
Also deburs accents, lowercases and filters stop words for more languages.
This is the main analyser used for most text fields such as name and disambiguation.
*/
custom_standard: {
type: 'custom',
tokenizer: 'standard',
filter: ['asciifolding', 'lowercase', 'custom_stop_words_filter']
},
/* Deburs accents, lowercases, filter stop words and uses an n-gram tokenizer to allow parital matches within the words
Ref: https://www.elastic.co/docs/reference/text-analysis/analysis-ngram-tokenizer
*/
ngrams_analyzer: {
type: 'custom',
tokenizer: 'ngram_tokenizer',
filter: ['asciifolding', 'lowercase', 'custom_stop_words_filter']
},
/* Cleans up keyword fields (removes non-alphanumeric tokens) such as identifiers and lowercases it */
identifier_analyzer: {
type: 'custom',
char_filter: ['identifier_cleaner'],
tokenizer: 'keyword',
filter: ['lowercase']
}
},
normalizer: {
/* Used only for keyword type when filtering by entity type, keywords types require a 'normalizer', not an 'analyser' */
keyword_normalizer: {
type: 'custom',
char_filter: ['identifier_cleaner'],
filter: ['lowercase']
}
},
/* Allows for partial matches within the text. Values modified by trial and error, can be reviewed */
tokenizer: {
ngram_tokenizer: {
type: 'ngram',
min_gram: 3,
max_gram: 6
}
}
}
},
mappings: {
properties: {
/*
Main search field for entity names. Analyse with trigrams to allow for typos, and using the built-in search-as-you-type for the autocomplete endpoint.
Use the customized standard analyser first, then also analyse with n-grams for typos/partial matches and autocompletion
Ref: https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/search-as-you-type
*/
aliases: {
properties: {
name: {
type: 'text',
analyzer: 'custom_standard',
fields: {
trigrams: {
type: 'text',
analyzer: 'ngrams_analyzer'
},
suggest: {
type: 'search_as_you_type'
}
}
}
}
},
/* Search works by author name; author names are attached to the document at indexing time*/
authors: {
type: 'text',
analyzer: 'custom_standard',
fields: {
trigrams: {
type: 'text',
analyzer: 'ngrams_analyzer'
}
}
},
/* Used only for filtering by entity type; uses a keyword 'type' field on the documents */
type: {
type: 'keyword',
normalizer: 'keyword_normalizer'
},
/* Disambiguation comments are searchable too, but are ranked much lower */
disambiguation: {
type: 'text',
analyzer: 'custom_standard'
},
/* Used to search by identifier such as ISBNs */
identifiers: {
properties: {
value: {
type: 'text',
analyzer: 'identifier_analyzer',
fields: {
keyword: {
type: 'keyword'
}
}
}
}
}
}
}
};
// Helper to normalize indices.exists response across client versions
function _indexExists(_x) {
return _indexExists2.apply(this, arguments);
} // Helper to add a `type` filter into a dslQuery body, since ES 7+ removed document types
function _indexExists2() {
_indexExists2 = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee(indexName) {
return _regenerator.default.wrap(function _callee$(_context) {
while (1) {
switch (_context.prev = _context.next) {
case 0:
_context.next = 2;
return _client.indices.exists({
index: indexName
});
case 2:
return _context.abrupt("return", _context.sent);
case 3:
case "end":
return _context.stop();
}
}
}, _callee);
}));
return _indexExists2.apply(this, arguments);
}
function _applyTypeFilterToDSL(dslQuery, type) {
var sanitizedType = sanitizeEntityType(type);
if (!sanitizedType) {
return;
}
/*
Filtering by type is done using the 'type' attribute on documents, which is indexed as a keyword (lowercased and dashes removed),
and using a terms query to match the sanitized type(s) against it.
We can filter by multiple entity types in the same query, so we normalize to an array of terms for simplicity
*/
var sanitizedTypes = Array.isArray(sanitizedType) ? sanitizedType : [sanitizedType];
var typeFilter = {
terms: {
type: sanitizedTypes
}
};
var existingQuery = dslQuery.query;
var newQuery = {
filter: typeFilter,
must: existingQuery
};
dslQuery.query = {
bool: newQuery
};
}
/* We don't currently want to index the entire Model in ElasticSearch,
which contains a lot of fields we don't use as well as some internal props (_pivot props)
This utility function prepares the Model into a minimal object that will be indexed
*/
function getDocumentToIndex(entity) {
var _entity$related, _entity$related$relat, _entity$related2, _entity$related2$rela;
var additionalProperties = [];
var entityType = entity.get('type');
switch (entityType) {
case 'Work':
additionalProperties.push('authors');
break;
default:
break;
}
var aliases = (_entity$related = entity.related('aliasSet')) === null || _entity$related === void 0 ? void 0 : (_entity$related$relat = _entity$related.related('aliases')) === null || _entity$related$relat === void 0 ? void 0 : _entity$related$relat.toJSON({
ignorePivot: true,
visible: 'name'
});
if (!aliases) {
// Some models don't have the same aliasSet structure, i.e. Collection, Editor, Area, …
var name = entity.get('name');
aliases = {
name: name
};
}
var identifiers = (_entity$related2 = entity.related('identifierSet')) === null || _entity$related2 === void 0 ? void 0 : (_entity$related2$rela = _entity$related2.related('identifiers')) === null || _entity$related2$rela === void 0 ? void 0 : _entity$related2$rela.toJSON({
ignorePivot: true,
visible: 'value'
});
return _objectSpread(_objectSpread({}, entity.toJSON({
ignorePivot: true,
visible: commonProperties.concat(additionalProperties)
})), {}, {
aliases: aliases,
identifiers: identifiers !== null && identifiers !== void 0 ? identifiers : null
});
}
function _fetchEntityModelsForESResults(_x2, _x3) {
return _fetchEntityModelsForESResults2.apply(this, arguments);
} // Returns the results of a search translated to entity objects
function _fetchEntityModelsForESResults2() {
_fetchEntityModelsForESResults2 = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee4(orm, results) {
var Area, Editor, UserCollection, processedResults;
return _regenerator.default.wrap(function _callee4$(_context4) {
while (1) {
switch (_context4.prev = _context4.next) {
case 0:
Area = orm.Area, Editor = orm.Editor, UserCollection = orm.UserCollection;
if (results !== null && results !== void 0 && results.hits) {
_context4.next = 3;
break;
}
return _context4.abrupt("return", null);
case 3:
_context4.next = 5;
return Promise.all(results.hits.map( /*#__PURE__*/function () {
var _ref = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee3(hit) {
var entityStub, _areaJSON$areaType, area, areaJSON, areaParents, editor, editorJSON, collection, collectionJSON, Model, entity, entityJSON;
return _regenerator.default.wrap(function _callee3$(_context3) {
while (1) {
switch (_context3.prev = _context3.next) {
case 0:
entityStub = hit._source; // Special cases first
if (!(entityStub.type === 'Area')) {
_context3.next = 13;
break;
}
_context3.next = 4;
return new Area({
gid: entityStub.id
}).fetch({
withRelated: ['areaType']
});
case 4:
area = _context3.sent;
areaJSON = area.toJSON({
omitPivot: true
});
_context3.next = 8;
return area.parents();
case 8:
areaParents = _context3.sent;
areaJSON.defaultAlias = {
name: areaJSON.name
};
areaJSON.type = 'Area';
areaJSON.disambiguation = {
comment: "".concat((_areaJSON$areaType = areaJSON.areaType) === null || _areaJSON$areaType === void 0 ? void 0 : _areaJSON$areaType.name).concat(areaParents !== null && areaParents !== void 0 && areaParents.length ? ' - ' : '').concat(areaParents === null || areaParents === void 0 ? void 0 : areaParents.map(function (parent) {
return parent.name;
}).join(', '))
};
return _context3.abrupt("return", areaJSON);
case 13:
if (!(entityStub.type === 'Editor')) {
_context3.next = 22;
break;
}
_context3.next = 16;
return new Editor({
id: entityStub.id
}).fetch();
case 16:
editor = _context3.sent;
editorJSON = editor.toJSON({
omitPivot: true
});
editorJSON.defaultAlias = {
name: editorJSON.name
};
editorJSON.type = 'Editor';
editorJSON.id = entityStub.id;
return _context3.abrupt("return", editorJSON);
case 22:
if (!(entityStub.type === 'Collection')) {
_context3.next = 31;
break;
}
_context3.next = 25;
return new UserCollection({
id: entityStub.id
}).fetch();
case 25:
collection = _context3.sent;
collectionJSON = collection.toJSON({
omitPivot: true
});
collectionJSON.defaultAlias = {
name: collectionJSON.name
};
collectionJSON.type = 'Collection';
collectionJSON.id = entityStub.id;
return _context3.abrupt("return", collectionJSON);
case 31:
// Regular entity
Model = commonUtils.getEntityModelByType(orm, entityStub.type);
_context3.next = 34;
return new Model({
bbid: entityStub.bbid
}).fetch({
require: false,
withRelated: ['defaultAlias.language', 'disambiguation', 'aliasSet.aliases', 'identifierSet.identifiers', 'relationshipSet.relationships.source', 'relationshipSet.relationships.target', 'relationshipSet.relationships.type', 'annotation']
});
case 34:
entity = _context3.sent;
entityJSON = entity === null || entity === void 0 ? void 0 : entity.toJSON({
omitPivot: true
});
if (!(entityJSON && entityJSON.relationshipSet)) {
_context3.next = 40;
break;
}
_context3.next = 39;
return Promise.all(entityJSON.relationshipSet.relationships.map( /*#__PURE__*/function () {
var _ref2 = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee2(rel) {
return _regenerator.default.wrap(function _callee2$(_context2) {
while (1) {
switch (_context2.prev = _context2.next) {
case 0:
_context2.next = 2;
return commonUtils.getEntity(orm, rel.source.bbid, rel.source.type);
case 2:
rel.source = _context2.sent;
_context2.next = 5;
return commonUtils.getEntity(orm, rel.target.bbid, rel.target.type);
case 5:
rel.target = _context2.sent;
return _context2.abrupt("return", rel);
case 7:
case "end":
return _context2.stop();
}
}
}, _callee2);
}));
return function (_x18) {
return _ref2.apply(this, arguments);
};
}()));
case 39:
entityJSON.relationshipSet.relationships = _context3.sent;
case 40:
if (entityStub.authors) {
entityJSON.authors = entityStub.authors;
}
return _context3.abrupt("return", entityJSON);
case 42:
case "end":
return _context3.stop();
}
}
}, _callee3);
}));
return function (_x17) {
return _ref.apply(this, arguments);
};
}())).catch(function (err) {
return _log.default.error(err);
});
case 5:
processedResults = _context4.sent;
return _context4.abrupt("return", processedResults);
case 7:
case "end":
return _context4.stop();
}
}
}, _callee4);
}));
return _fetchEntityModelsForESResults2.apply(this, arguments);
}
function _searchForEntities(_x4, _x5) {
return _searchForEntities2.apply(this, arguments);
}
function _searchForEntities2() {
_searchForEntities2 = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee5(orm, dslQuery) {
var searchResponse, hits, results, total, _hits$total$value, _hits$total;
return _regenerator.default.wrap(function _callee5$(_context5) {
while (1) {
switch (_context5.prev = _context5.next) {
case 0:
_context5.prev = 0;
_context5.next = 3;
return _client.search(dslQuery);
case 3:
searchResponse = _context5.sent;
hits = searchResponse.hits;
_context5.next = 7;
return _fetchEntityModelsForESResults(orm, hits);
case 7:
results = _context5.sent;
if (typeof hits.total === 'number') {
total = hits.total;
} else {
total = (_hits$total$value = (_hits$total = hits.total) === null || _hits$total === void 0 ? void 0 : _hits$total.value) !== null && _hits$total$value !== void 0 ? _hits$total$value : 0;
}
return _context5.abrupt("return", {
results: results,
total: total || 0
});
case 12:
_context5.prev = 12;
_context5.t0 = _context5["catch"](0);
_log.default.error(_context5.t0);
case 15:
return _context5.abrupt("return", {
results: [],
total: 0
});
case 16:
case "end":
return _context5.stop();
}
}
}, _callee5, null, [[0, 12]]);
}));
return _searchForEntities2.apply(this, arguments);
}
function _bulkIndexEntities(_x6) {
return _bulkIndexEntities2.apply(this, arguments);
}
function _bulkIndexEntities2() {
_bulkIndexEntities2 = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee7(entities) {
var entitiesToIndex, operationSucceeded, bulkOperations, bulkResponse;
return _regenerator.default.wrap(function _callee7$(_context7) {
while (1) {
switch (_context7.prev = _context7.next) {
case 0:
if (entities.length) {
_context7.next = 2;
break;
}
return _context7.abrupt("return");
case 2:
// Proxy the list of entities to index in case we need to retry
entitiesToIndex = entities;
operationSucceeded = false;
case 4:
if (operationSucceeded) {
_context7.next = 25;
break;
}
bulkOperations = entitiesToIndex.reduce(function (accumulator, entity) {
var _entity$bbid2;
accumulator.push({
index: {
_id: (_entity$bbid2 = entity.bbid) !== null && _entity$bbid2 !== void 0 ? _entity$bbid2 : entity.id,
_index: _index
}
});
accumulator.push(entity);
return accumulator;
}, []);
operationSucceeded = true;
_context7.prev = 7;
_context7.next = 10;
return _client.bulk({
body: bulkOperations
}).catch(function (error) {
_log.default.error('error bulk indexing entities for search:', error);
});
case 10:
bulkResponse = _context7.sent;
if (bulkResponse) {
_context7.next = 13;
break;
}
throw new Error('No response from bulk indexing operation');
case 13:
if (!((bulkResponse === null || bulkResponse === void 0 ? void 0 : bulkResponse.errors) === true)) {
_context7.next = 17;
break;
}
entitiesToIndex = bulkResponse.items.reduce(function (accumulator, item) {
// We currently only handle queue overrun
if (item.index.status === _httpStatus.default.TOO_MANY_REQUESTS) {
var failedEntity = entities.find(function (element) {
var _element$bbid;
return ((_element$bbid = element.bbid) !== null && _element$bbid !== void 0 ? _element$bbid : element.id) === item.index._id;
});
accumulator.push(failedEntity);
}
return accumulator;
}, []);
if (!entitiesToIndex.length) {
_context7.next = 17;
break;
}
return _context7.delegateYield( /*#__PURE__*/_regenerator.default.mark(function _callee6() {
var jitter;
return _regenerator.default.wrap(function _callee6$(_context6) {
while (1) {
switch (_context6.prev = _context6.next) {
case 0:
operationSucceeded = false;
jitter = Math.random() * _maxJitter; // eslint-disable-next-line no-await-in-loop
_context6.next = 4;
return new Promise(function (resolve) {
return setTimeout(resolve, _retryDelay + jitter);
});
case 4:
case "end":
return _context6.stop();
}
}
}, _callee6);
})(), "t0", 17);
case 17:
_context7.next = 23;
break;
case 19:
_context7.prev = 19;
_context7.t1 = _context7["catch"](7);
_log.default.error('error bulk indexing entities for search:', _context7.t1);
operationSucceeded = false;
case 23:
_context7.next = 4;
break;
case 25:
case "end":
return _context7.stop();
}
}
}, _callee7, null, [[7, 19]]);
}));
return _bulkIndexEntities2.apply(this, arguments);
}
function _processEntityListForBulk(_x7) {
return _processEntityListForBulk2.apply(this, arguments);
} // eslint-disable-next-line consistent-return
function _processEntityListForBulk2() {
_processEntityListForBulk2 = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee8(entityList) {
var indexOperations, bulkQueue, _iterator, _step, entity;
return _regenerator.default.wrap(function _callee8$(_context8) {
while (1) {
switch (_context8.prev = _context8.next) {
case 0:
indexOperations = [];
bulkQueue = [];
_iterator = _createForOfIteratorHelper(entityList);
try {
for (_iterator.s(); !(_step = _iterator.n()).done;) {
entity = _step.value;
bulkQueue.push(entity);
if (bulkQueue.length >= _bulkIndexSize) {
indexOperations.push(_bulkIndexEntities(bulkQueue));
bulkQueue = [];
}
}
} catch (err) {
_iterator.e(err);
} finally {
_iterator.f();
}
indexOperations.push(_bulkIndexEntities(bulkQueue));
_context8.next = 7;
return Promise.all(indexOperations);
case 7:
case "end":
return _context8.stop();
}
}
}, _callee8);
}));
return _processEntityListForBulk2.apply(this, arguments);
}
function indexEntity(entity) {
var document = getDocumentToIndex(entity);
if (entity) {
return _client.index({
body: document,
id: entity.get('bbid') || entity.get('id'),
index: _index
// Document `type` is stored inside the document itself; no ES mapping types in 7+
}).catch(function (error) {
_log.default.error('error indexing entity for search:', error);
});
}
}
function deleteEntity(entity) {
var _entity$bbid;
return _client.delete({
id: (_entity$bbid = entity.bbid) !== null && _entity$bbid !== void 0 ? _entity$bbid : entity.id,
index: _index
// No document types in ES 7+
}).catch(function (error) {
_log.default.error('error deleting entity from index:', error);
});
}
function refreshIndex() {
return _client.indices.refresh({
index: _index
}).catch(function (error) {
_log.default.error('Error refreshing search index:', error);
});
}
function generateIndex(_x8) {
return _generateIndex.apply(this, arguments);
}
function _generateIndex() {
_generateIndex = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee10(orm) {
var entityType,
recreateIndex,
Area,
Author,
Edition,
EditionGroup,
Editor,
Publisher,
Series,
UserCollection,
Work,
allEntities,
mainIndexExists,
shouldRecreateIndex,
entityBehaviors,
baseRelations,
entityLists,
_entityLists$find,
_entityLists$find2,
authorCollection,
workCollection,
listIndexes,
areaCollection,
areas,
processedAreas,
editorCollection,
editors,
processedEditors,
userCollections,
userCollectionsJSON,
processedCollections,
_args10 = arguments;
return _regenerator.default.wrap(function _callee10$(_context10) {
while (1) {
switch (_context10.prev = _context10.next) {
case 0:
entityType = _args10.length > 1 && _args10[1] !== undefined ? _args10[1] : 'allEntities';
recreateIndex = _args10.length > 2 && _args10[2] !== undefined ? _args10[2] : false;
Area = orm.Area, Author = orm.Author, Edition = orm.Edition, EditionGroup = orm.EditionGroup, Editor = orm.Editor, Publisher = orm.Publisher, Series = orm.Series, UserCollection = orm.UserCollection, Work = orm.Work;
allEntities = entityType === 'allEntities';
_context10.next = 6;
return _indexExists(_index);
case 6:
mainIndexExists = _context10.sent;
shouldRecreateIndex = !mainIndexExists || recreateIndex || allEntities;
if (!shouldRecreateIndex) {
_context10.next = 16;
break;
}
if (!mainIndexExists) {
_context10.next = 13;
break;
}
_log.default.notice('Deleting search index');
_context10.next = 13;
return _client.indices.delete({
index: _index
});
case 13:
_log.default.notice('Creating new search index');
_context10.next = 16;
return _client.indices.create(indexSettings);
case 16:
_log.default.notice("Starting indexing of ".concat(entityType));
entityBehaviors = [];
baseRelations = ['annotation', 'defaultAlias', 'aliasSet.aliases', 'identifierSet.identifiers'];
if (allEntities || entityType === 'Author' || entityType === 'Work') {
entityBehaviors.push({
model: Author,
relations: ['gender', 'beginArea', 'endArea'],
type: 'Author'
});
}
if (allEntities || entityType === 'Edition') {
entityBehaviors.push({
model: Edition,
relations: ['editionGroup', 'editionFormat', 'editionStatus'],
type: 'Edition'
});
}
if (allEntities || entityType === 'EditionGroup') {
entityBehaviors.push({
model: EditionGroup,
relations: [],
type: 'EditionGroup'
});
}
if (allEntities || entityType === 'Publisher') {
entityBehaviors.push({
model: Publisher,
relations: ['area'],
type: 'Publisher'
});
}
if (allEntities || entityType === 'Series') {
entityBehaviors.push({
model: Series,
relations: ['seriesOrderingType'],
type: 'Series'
});
}
if (allEntities || entityType === 'Work') {
_log.default.info('Also indexing Author entities');
entityBehaviors.push({
model: Work,
relations: ['relationshipSet.relationships.type'],
type: 'Work'
});
}
// Update the indexed entries for each entity type
_context10.next = 27;
return Promise.all(entityBehaviors.map( /*#__PURE__*/function () {
var _ref3 = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee9(behavior) {
var totalCount, maxChunk, collectionsPromises, _loop, i, collections, allModels;
return _regenerator.default.wrap(function _callee9$(_context9) {
while (1) {
switch (_context9.prev = _context9.next) {
case 0:
_log.default.info("Fetching ".concat(behavior.type, " models from the database"));
_context9.next = 3;
return behavior.model.query(function (qb) {
qb.where('master', true);
qb.whereNotNull('data_id');
}).count();
case 3:
totalCount = _context9.sent;
_log.default.info("".concat(totalCount, " ").concat(behavior.type, " models in total"));
maxChunk = 50000;
collectionsPromises = []; // Fetch by chunks of 50.000 entities
_loop = function _loop(i) {
var collection = behavior.model.forge().query(function (qb) {
qb.where('master', true);
qb.whereNotNull('data_id');
qb.limit(maxChunk);
qb.offset(i);
_log.default.info("Fetching ".concat(maxChunk, " ").concat(behavior.type, " models with offset ").concat(i));
}).fetchAll({
withRelated: baseRelations.concat(behavior.relations)
}).catch(function (err) {
_log.default.error(err);
throw err;
});
collectionsPromises.push(collection);
};
for (i = 0; i < totalCount; i += maxChunk) {
_loop(i);
}
_context9.next = 11;
return Promise.all(collectionsPromises);
case 11:
collections = _context9.sent;
// Put all models back into a single collection
allModels = collections.map(function (col) {
return col.models;
}).flat();
return _context9.abrupt("return", {
collection: behavior.model.collection(allModels),
type: behavior.type
});
case 14:
case "end":
return _context9.stop();
}
}
}, _callee9);
}));
return function (_x19) {
return _ref3.apply(this, arguments);
};
}()));
case 27:
entityLists = _context10.sent;
_log.default.info("Finished fetching entities from database for types ".concat(entityBehaviors.map(function (_ref4) {
var type = _ref4.type;
return type;
}).join(', ')));
if (allEntities || entityType === 'Work') {
_log.default.info('Attaching author names to Work entities');
authorCollection = (_entityLists$find = entityLists.find(function (result) {
return result.type === 'Author';
})) === null || _entityLists$find === void 0 ? void 0 : _entityLists$find.collection;
workCollection = (_entityLists$find2 = entityLists.find(function (result) {
return result.type === 'Work';
})) === null || _entityLists$find2 === void 0 ? void 0 : _entityLists$find2.collection;
workCollection === null || workCollection === void 0 ? void 0 : workCollection.forEach(function (workEntity) {
var relationshipSet = workEntity.related('relationshipSet');
if (relationshipSet) {
var _relationshipSet$rela;
var authorWroteWorkRels = (_relationshipSet$rela = relationshipSet.related('relationships')) === null || _relationshipSet$rela === void 0 ? void 0 : _relationshipSet$rela.filter(function (relationshipModel) {
return relationshipModel.get('typeId') === 8;
});
var authorNames = [];
authorWroteWorkRels.forEach(function (relationshipModel) {
var _source$related;
// Search for the Author in the already fetched BookshelfJS Collection
var sourceBBID = relationshipModel.get('sourceBbid');
var source = authorCollection.get(sourceBBID);
var name = source === null || source === void 0 ? void 0 : (_source$related = source.related('defaultAlias')) === null || _source$related === void 0 ? void 0 : _source$related.get('name');
if (name) {
authorNames.push(name);
}
});
workEntity.set('authors', authorNames);
}
});
}
listIndexes = []; // Index all the entities
entityLists.forEach(function (entityList) {
var listArray = entityList.collection.map(getDocumentToIndex);
listIndexes.push(_processEntityListForBulk(listArray));
});
if (!listIndexes.length) {
_context10.next = 37;
break;
}
_log.default.info("Indexing documents for entity type ".concat(entityType));
_context10.next = 36;
return Promise.all(listIndexes);
case 36:
_log.default.info("Finished indexing entity documents for entity type ".concat(entityType));
case 37:
if (!(allEntities || entityType === 'Area')) {
_context10.next = 47;
break;
}
_log.default.info('Indexing Areas');
_context10.next = 41;
return new Area().fetchAll();
case 41:
areaCollection = _context10.sent;
areas = areaCollection.toJSON({
omitPivot: true
});
/** To index names, we use aliases.name and type, which Areas don't have.
* We massage the area to return a similar format as BB entities
*/
processedAreas = areas.map(function (area) {
return {
aliases: [{
name: area.name
}],
id: area.gid,
type: 'Area'
};
});
_context10.next = 46;
return _processEntityListForBulk(processedAreas);
case 46:
_log.default.info('Finished indexing Areas');
case 47:
if (!(allEntities || entityType === 'Editor')) {
_context10.next = 57;
break;
}
_log.default.info('Indexing Editors');
_context10.next = 51;
return new Editor()
// no bots
.where('type_id', 1).fetchAll();
case 51:
editorCollection = _context10.sent;
editors = editorCollection.toJSON({
omitPivot: true
});
/** To index names, we use aliases.name and type, which Editors don't have.
* We massage the editor to return a similar format as BB entities
*/
processedEditors = editors.map(function (editor) {
return {
aliases: [{
name: editor.name
}],
id: editor.id,
type: 'Editor'
};
});
_context10.next = 56;
return _processEntityListForBulk(processedEditors);
case 56:
_log.default.info('Finished indexing Editors');
case 57:
if (!(allEntities || entityType === 'Collection')) {
_context10.next = 67;
break;
}
_log.default.info('Indexing Collections');
_context10.next = 61;
return new UserCollection().where({
public: true
}).fetchAll();
case 61:
userCollections = _context10.sent;
userCollectionsJSON = userCollections.toJSON({
omitPivot: true
});
/** To index names, we use aliases.name and type, which UserCollections don't have.
* We massage the editor to return a similar format as BB entities
*/
processedCollections = userCollectionsJSON.map(function (collection) {
return {
aliases: [{
name: collection.name
}],
id: collection.id,
type: 'Collection'
};
});
_context10.next = 66;
return _processEntityListForBulk(processedCollections);
case 66:
_log.default.info('Finished indexing Collections');
case 67:
_log.default.info('Refreshing search index');
_context10.next = 70;
return refreshIndex();
case 70:
_log.default.notice('Search indexing finished succesfully');
case 71:
case "end":
return _context10.stop();
}
}
}, _callee10);
}));
return _generateIndex.apply(this, arguments);
}
function checkIfExists(_x9, _x10, _x11) {
return _checkIfExists.apply(this, arguments);
}
function _checkIfExists() {
_checkIfExists = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee12(orm, name, type) {
var bookshelf, formattedType, bbids, baseRelations, processedResults;
return _regenerator.default.wrap(function _callee12$(_context12) {
while (1) {
switch (_context12.prev = _context12.next) {
case 0:
bookshelf = orm.bookshelf;
formattedType = (0, _upperFirst2.default)((0, _camelCase2.default)(type));
_context12.next = 4;
return new Promise(function (resolve, reject) {
bookshelf.transaction( /*#__PURE__*/function () {
var _ref5 = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee11(transacting) {
var result;
return _regenerator.default.wrap(function _callee11$(_context11) {
while (1) {
switch (_context11.prev = _context11.next) {
case 0:
_context11.prev = 0;
_context11.next = 3;
return orm.func.alias.getBBIDsWithMatchingAlias(transacting, formattedType, name);
case 3:
result = _context11.sent;
resolve(result);
_context11.next = 10;
break;
case 7:
_context11.prev = 7;
_context11.t0 = _context11["catch"](0);
reject(_context11.t0);
case 10:
case "end":
return _context11.stop();
}
}
}, _callee11, null, [[0, 7]]);
}));
return function (_x20) {
return _ref5.apply(this, arguments);
};
}());
});
case 4:
bbids = _context12.sent;
// Follow-up: Fetch all entities in a single transaction from the postgres server
baseRelations = ['aliasSet.aliases.language', 'defaultAlias', 'disambiguation', 'identifierSet.identifiers.type', 'relationshipSet.relationships.type', 'revision.revision'];
_context12.next = 8;
return Promise.all(bbids.map(function (bbid) {
return orm.func.entity.getEntity(orm, formattedType, bbid, baseRelations);
}));
case 8:
processedResults = _context12.sent;
return _context12.abrupt("return", processedResults);
case 10:
case "end":
return _context12.stop();
}
}
}, _callee12);
}));
return _checkIfExists.apply(this, arguments);
}
function autocomplete(_x12, _x13, _x14) {
return _autocomplete.apply(this, arguments);
}
function _autocomplete() {
_autocomplete = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee13(orm, query, type) {
var size,
queryBody,
dslQuery,
searchResponse,
_args13 = arguments;
return _regenerator.default.wrap(function _callee13$(_context13) {
while (1) {
switch (_context13.prev = _context13.next) {
case 0:
size = _args13.length > 3 && _args13[3] !== undefined ? _args13[3] : 42;
if (commonUtils.isValidBBID(query)) {
queryBody = {
ids: {
/* Find by BBID directly instead of performing a text search */
values: [query]
}
};
} else {
queryBody = {
multi_match: {
query: query.toLowerCase(),
type: 'bool_prefix',
/* Uses the built-in search_as_you_type mapping
https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/search-as-you-type*/
fields: ['aliases.name.suggest', 'aliases.name.suggest._2gram', 'aliases.name.suggest._3gram']
}
};
}
dslQuery = {
size: size,
query: queryBody,
index: _index
};
_applyTypeFilterToDSL(dslQuery, type);
_context13.next = 6;
return _searchForEntities(orm, dslQuery);
case 6:
searchResponse = _context13.sent;
return _context13.abrupt("return", searchResponse.results);
case 8:
case "end":
return _context13.stop();
}
}
}, _callee13);
}));
return _autocomplete.apply(this, arguments);
}
function searchByName(orm, name, type) {
var size = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : 20;
var from = arguments.length > 4 && arguments[4] !== undefined ? arguments[4] : 0;
var sanitizedEntityType = sanitizeEntityType(type);
var queryBody;
var query = name.toLowerCase();
if (commonUtils.isValidBBID(name)) {
queryBody = {
ids: {
values: [query]
}
};
} else {
queryBody = {
bool: {
should: [{
/* We match both the name and the disambguation text but rank the latter much much lower.
Values chosen semi-randomly by trial and error, can be tweaked if needed */
multi_match: {
query: query,
fields: ['aliases.name^15', 'disambiguation'],
type: 'best_fields'
}
}, {
/* Alternatively, do partial matches within text fields, rank higher than disambiguation but lower than name
Reauires a minimum threshold of 75% of the terms to be matched to remove too laxe matches*/
multi_match: {
query: query,
fields: ['aliases.name.trigrams^2'],
type: 'most_fields',
minimum_should_match: '75%'
}
}, {
/* Finally, allow searching by identifier (for example ISBN), with a very high boost since hits should be exact matches of identifiers */
term: {
'identifiers.value': {
value: query.replace(/[^a-zA-Z0-9]/g, ''),
boost: 30
}
}
}]
}
};
var isWorkTypeOnly = sanitizedEntityType === 'work';
var containsWorkType = Array.isArray(sanitizedEntityType) && sanitizedEntityType.includes('work');
if (isWorkTypeOnly || containsWorkType) {
/*
If this is a work search or for all entities, also search by author name.
If it's a work-only search, give matches more boost since they are more likely to be relevant (search for work by author name).
Otherwise use a regular boost value as it is less relevant.
*/
var authorBoost = isWorkTypeOnly ? 5 : 1;
queryBody.bool.should[0].multi_match.fields.push("authors^".concat(authorBoost));
queryBody.bool.should[1].multi_match.fields.push("authors.trigrams^".concat(authorBoost));
}
}
var dslQuery = {
from: from,
size: size,
query: queryBody,
index: _index
};
_applyTypeFilterToDSL(dslQuery, type);
return _searchForEntities(orm, dslQuery);
}
/**
* Search init
* @description Sets up the search server connection with defaults,
* and returns a connection status boolean
* @param {ORM} orm the BookBrainz ORM
* @param {ClientOptions} [options] Optional (but recommended) connection settings, will provide defaults if missing
* @returns {Promise<boolean>} A Promise which resolves to the connection status boolean
*/
function init(_x15, _x16) {
return _init.apply(this, arguments);
}
function _init() {
_init = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee14(orm, options) {
var defaultOptions, mainIndexExists;
return _regenerator.default.wrap(function _callee14$(_context14) {
while (1) {
switch (_context14.prev = _context14.next) {
case 0:
if (!(0, _isString2.default)(options.node)) {
defaultOptions = {
node: 'http://localhost:9200',
requestTimeout: 60000
};
_log.default.warning('ElasticSearch configuration not provided. Using default settings.');
_client = new _elasticsearch.default.Client(defaultOptions);
} else {
_client = new _elasticsearch.default.Client(options);
}
_context14.prev = 1;
_context14.next = 4;
return _client.ping();
case 4:
_context14.next = 10;
break;
case 6:
_context14.prev = 6;
_context14.t0 = _context14["catch"](1);
_log.default.warning('Could not connect to ElasticSearch:', _context14.t0.toString());
return _context14.abrupt("return", false);
case 10:
_context14.next = 12;
return _indexExists(_index);
case 12:
mainIndexExists = _context14.sent;
if (!mainIndexExists) {
// Automatically index on app startup if we haven't already, but don't block app setup
generateIndex(orm).catch(_log.default.error);
} else {
_log.default.notice('Search index already exists, skipping generation');
}
return _context14.abrupt("return", true);
case 15:
case "end":
return _context14.stop();
}
}
}, _callee14, null, [[1, 6]]);
}));
return _init.apply(this, arguments);
}
//# sourceMappingURL=search.js.map