server/helpers/wikimedia.js

"use strict";

require("core-js/modules/es.array.slice.js");
require("core-js/modules/es.function.name.js");
require("core-js/modules/es.array.from.js");
require("core-js/modules/es.symbol.js");
require("core-js/modules/es.symbol.description.js");
require("core-js/modules/es.symbol.iterator.js");
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.getAvailableWikipediaArticles = getAvailableWikipediaArticles;
exports.getWikipediaExtract = getWikipediaExtract;
exports.selectWikipediaPage = selectWikipediaPage;
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
require("core-js/modules/es.array.iterator.js");
require("core-js/modules/es.object.to-string.js");
require("core-js/modules/es.string.iterator.js");
require("core-js/modules/web.dom-collections.iterator.js");
require("core-js/modules/web.url.js");
require("core-js/modules/web.url-search-params.js");
require("core-js/modules/es.regexp.exec.js");
require("core-js/modules/es.string.search.js");
require("core-js/modules/es.regexp.to-string.js");
require("core-js/modules/es.array.map.js");
require("core-js/modules/es.array.filter.js");
require("core-js/modules/es.object.values.js");
require("core-js/modules/es.string.ends-with.js");
require("core-js/modules/es.string.replace.js");
require("core-js/modules/es.array.find.js");
require("core-js/modules/es.array.concat.js");
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
var _uniq2 = _interopRequireDefault(require("lodash/uniq"));
var _toLower2 = _interopRequireDefault(require("lodash/toLower"));
var _cache = require("../../common/helpers/cache");
var _dateFns = require("date-fns");
var _superagent = _interopRequireDefault(require("superagent"));
var _info = require("../info");
function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; } /*
                                                                                                                                                                                         * Copyright (C) 2023  David Kellner
                                                                                                                                                                                         *
                                                                                                                                                                                         * This program is free software; you can redistribute it and/or modify
                                                                                                                                                                                         * it under the terms of the GNU General Public License as published by
                                                                                                                                                                                         * the Free Software Foundation; either version 2 of the License, or
                                                                                                                                                                                         * (at your option) any later version.
                                                                                                                                                                                         *
                                                                                                                                                                                         * This program is distributed in the hope that it will be useful,
                                                                                                                                                                                         * but WITHOUT ANY WARRANTY; without even the implied warranty of
                                                                                                                                                                                         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                                                                                                                                                                                         * GNU General Public License for more details.
                                                                                                                                                                                         *
                                                                                                                                                                                         * You should have received a copy of the GNU General Public License along
                                                                                                                                                                                         * with this program; if not, write to the Free Software Foundation, Inc.,
                                                                                                                                                                                         * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
                                                                                                                                                                                         */
// incomplete, only the parts we need

/** Maximum age of cached results in seconds. */
var cacheMaxAge = {
  articles: (0, _dateFns.hoursToSeconds)(24 * 7),
  extract: (0, _dateFns.hoursToSeconds)(24 * 3)
};

/**
 * Fetches a list of Wikipedia articles in all available languages for the given Wikidata item.
 * @param {string} wikidataId - Wikidata item ID.
 */
function getAvailableWikipediaArticles(_x) {
  return _getAvailableWikipediaArticles.apply(this, arguments);
}
/**
 * Tries to find a Wikipedia article for the given Wikidata item in the first preferred language which is available.
 * @param {string} wikidataId - Wikidata item ID.
 * @param {string[]} preferredLanguages - List of language codes, preference in descending order.
 */
function _getAvailableWikipediaArticles() {
  _getAvailableWikipediaArticles = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee(wikidataId) {
    var _result$entities;
    var _ref,
      _ref$forceCache,
      forceCache,
      cacheKey,
      cachedArticles,
      apiUrl,
      response,
      result,
      item,
      articles,
      _args = arguments;
    return _regenerator.default.wrap(function _callee$(_context) {
      while (1) {
        switch (_context.prev = _context.next) {
          case 0:
            _ref = _args.length > 1 && _args[1] !== undefined ? _args[1] : {}, _ref$forceCache = _ref.forceCache, forceCache = _ref$forceCache === void 0 ? false : _ref$forceCache;
            cacheKey = "wiki:articles:".concat(wikidataId);
            _context.next = 4;
            return (0, _cache.getCachedJSON)(cacheKey);
          case 4:
            cachedArticles = _context.sent;
            if (!(cachedArticles || forceCache)) {
              _context.next = 7;
              break;
            }
            return _context.abrupt("return", cachedArticles || []);
          case 7:
            apiUrl = new URL('https://www.wikidata.org/w/api.php');
            apiUrl.search = new URLSearchParams({
              action: 'wbgetentities',
              format: 'json',
              ids: wikidataId,
              props: 'sitelinks'
            }).toString();
            _context.next = 11;
            return _superagent.default.get(apiUrl.href).set('User-Agent', _info.userAgent);
          case 11:
            response = _context.sent;
            result = response.body;
            item = (_result$entities = result.entities) === null || _result$entities === void 0 ? void 0 : _result$entities[wikidataId];
            if (item) {
              _context.next = 16;
              break;
            }
            throw new Error("Failed to fetch Wikidata item ".concat(wikidataId));
          case 16:
            articles = Object.values(item.sitelinks)
            // only keep Wikipedia pages
            .filter(function (link) {
              return link.site.endsWith('wiki');
            }).map(function (page) {
              return {
                // drop project suffix
                language: page.site.replace(/wiki$/, ''),
                title: page.title
              };
            });
            (0, _cache.cacheJSON)(cacheKey, articles, {
              expireTime: cacheMaxAge.articles
            });
            return _context.abrupt("return", articles);
          case 19:
          case "end":
            return _context.stop();
        }
      }
    }, _callee);
  }));
  return _getAvailableWikipediaArticles.apply(this, arguments);
}
function selectWikipediaPage(_x2) {
  return _selectWikipediaPage.apply(this, arguments);
}
/**
 * Fetches the page extract of the given Wikipedia article.
 * @param {object} article - Title and language of the article.
 */
function _selectWikipediaPage() {
  _selectWikipediaPage = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee2(wikidataId) {
    var _ref2,
      _ref2$forceCache,
      forceCache,
      _ref2$preferredLangua,
      preferredLanguages,
      articles,
      result,
      _iterator,
      _step,
      _loop,
      _ret,
      _args2 = arguments;
    return _regenerator.default.wrap(function _callee2$(_context2) {
      while (1) {
        switch (_context2.prev = _context2.next) {
          case 0:
            _ref2 = _args2.length > 1 && _args2[1] !== undefined ? _args2[1] : {}, _ref2$forceCache = _ref2.forceCache, forceCache = _ref2$forceCache === void 0 ? false : _ref2$forceCache, _ref2$preferredLangua = _ref2.preferredLanguages, preferredLanguages = _ref2$preferredLangua === void 0 ? ['en'] : _ref2$preferredLangua;
            _context2.next = 3;
            return getAvailableWikipediaArticles(wikidataId, {
              forceCache: forceCache
            });
          case 3:
            articles = _context2.sent;
            _iterator = _createForOfIteratorHelper((0, _uniq2.default)(preferredLanguages));
            _context2.prev = 5;
            _loop = function _loop() {
              var language = _step.value;
              result = articles.find(function (page) {
                return page.language === (0, _toLower2.default)(language);
              });
              if (result) {
                return "break";
              }
            };
            _iterator.s();
          case 8:
            if ((_step = _iterator.n()).done) {
              _context2.next = 14;
              break;
            }
            _ret = _loop();
            if (!(_ret === "break")) {
              _context2.next = 12;
              break;
            }
            return _context2.abrupt("break", 14);
          case 12:
            _context2.next = 8;
            break;
          case 14:
            _context2.next = 19;
            break;
          case 16:
            _context2.prev = 16;
            _context2.t0 = _context2["catch"](5);
            _iterator.e(_context2.t0);
          case 19:
            _context2.prev = 19;
            _iterator.f();
            return _context2.finish(19);
          case 22:
            return _context2.abrupt("return", result);
          case 23:
          case "end":
            return _context2.stop();
        }
      }
    }, _callee2, null, [[5, 16, 19, 22]]);
  }));
  return _selectWikipediaPage.apply(this, arguments);
}
function getWikipediaExtract(_x3) {
  return _getWikipediaExtract.apply(this, arguments);
}
function _getWikipediaExtract() {
  _getWikipediaExtract = (0, _asyncToGenerator2.default)( /*#__PURE__*/_regenerator.default.mark(function _callee3(article) {
    var _result$query, _result$query$pages;
    var _ref3,
      _ref3$forceCache,
      forceCache,
      cacheKey,
      cachedExtract,
      apiUrl,
      response,
      result,
      pageExtract,
      _args3 = arguments;
    return _regenerator.default.wrap(function _callee3$(_context3) {
      while (1) {
        switch (_context3.prev = _context3.next) {
          case 0:
            _ref3 = _args3.length > 1 && _args3[1] !== undefined ? _args3[1] : {}, _ref3$forceCache = _ref3.forceCache, forceCache = _ref3$forceCache === void 0 ? false : _ref3$forceCache;
            cacheKey = "wiki:extract:".concat(article.language, ":").concat(article.title);
            _context3.next = 4;
            return (0, _cache.getCachedJSON)(cacheKey);
          case 4:
            cachedExtract = _context3.sent;
            if (!(cachedExtract || forceCache)) {
              _context3.next = 7;
              break;
            }
            return _context3.abrupt("return", cachedExtract);
          case 7:
            apiUrl = new URL("https://".concat(article.language, ".wikipedia.org/w/api.php"));
            apiUrl.search = new URLSearchParams({
              action: 'query',
              format: 'json',
              formatversion: '2',
              prop: 'extracts',
              // eslint-disable-next-line sort-keys -- `exintro` only allowed with `prop: 'extracts'`
              exintro: '1',
              redirects: '1',
              titles: article.title
            }).toString();
            _context3.next = 11;
            return _superagent.default.get(apiUrl.href).set('User-Agent', _info.userAgent);
          case 11:
            response = _context3.sent;
            result = response.body;
            pageExtract = (_result$query = result.query) === null || _result$query === void 0 ? void 0 : (_result$query$pages = _result$query.pages) === null || _result$query$pages === void 0 ? void 0 : _result$query$pages[0];
            (0, _cache.cacheJSON)(cacheKey, pageExtract, {
              expireTime: cacheMaxAge.extract
            });
            return _context3.abrupt("return", pageExtract);
          case 16:
          case "end":
            return _context3.stop();
        }
      }
    }, _callee3);
  }));
  return _getWikipediaExtract.apply(this, arguments);
}
//# sourceMappingURL=wikimedia.js.map