Source code for tibiawikisql.api

#  Copyright 2021 Allan Galarza
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""API to fetch information from `TibiaWiki <https://tibiawiki.fandom.com>`_ through MediaWiki's API."""

import datetime
import json
import urllib.parse

import requests

from tibiawikisql import __version__
from tibiawikisql.utils import parse_templatates_data

BASE_URL = "https://tibia.fandom.com"


[docs]class WikiEntry: """A TibiaWiki entry. This is a partial object that is obtained when fetching category members. The following classes implement this: - :class:`Article` - :class:`Image` Attributes ---------- article_id: :class:`int` The entry's id. title: :class:`str` The entry's title. timestamp : :class:`int` The date of the entry's last edit, represented as a unix timestamp. """ def __init__(self, article_id, title, timestamp=None): self.article_id = article_id self.title = title if isinstance(timestamp, str): self.timestamp = int(datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ").timestamp()) if isinstance(timestamp, datetime.datetime): self.timestamp = int(timestamp.timestamp()) def __repr__(self): return f"{self.__class__.__name__}(article_id={self.article_id},title={self.title!r})" def __eq__(self, other): if isinstance(other, self.__class__): return self.article_id == other.article_id return False @property def url(self): """:class:`str`: The URL to the article's display page.""" return f"{BASE_URL}/wiki/{urllib.parse.quote(self.title)}"
[docs]class Article(WikiEntry): """ Represents a text article. Attributes ---------- article_id: :class:`int` The article's internal id. title : :class:`str` The article's title. timestamp : :class:`int` The date of the entry's last edit, represented as a unix timestamp. content: :class:`str` The article's source content. """ def __init__(self, article_id, title, *, timestamp=None, content=None): super().__init__(article_id, title, timestamp) self.content = content @property def infobox_attributes(self): """:class:`dict`: Returns a mapping of the template attributes.""" return parse_templatates_data(self.content)
[docs]class Image(WikiEntry): """Represents an image info. Attributes ---------- article_id: int The image's internal id. title : str The image's title. timestamp : :class:`int` The date of the entry's last edit, represented as a unix timestamp. file_url: str The image's url. """ def __init__(self, article_id, title, *, timestamp=None, file_url=None): super().__init__(article_id, title, timestamp) self.file_url = file_url @property def extension(self): """:class:`str`: The image's file extension.""" parts = self.title.split(".") if len(parts) == 1: return None return f".{parts[-1]}" @property def file_name(self): """:class:`str`: The image's file name.""" return self.title.replace("File:", "") @property def clean_name(self): """:class:`str`: The image's name without extension and prefix.""" return self.file_name.replace(self.extension, "")
[docs]class WikiClient: """Contains methods to communicate with TibiaWiki's API.""" ENDPOINT = f"{BASE_URL}/api.php" headers = { 'User-Agent': f'tibiawikisql {__version__}', }
[docs] @classmethod def get_category_members(cls, name, skip_index=True): """Create a generator that obtains entries in a certain category. Parameters ---------- name: :class:`str` The category's name. ``Category:`` prefix is not necessary. skip_index: :class:`bool` Whether to skip index articles or not. Yields ------- :class:`WikiEntry` Articles in this category. """ s = requests.Session() s.headers.update(cls.headers) cmcontinue = None params = { "action": "query", "list": "categorymembers", "cmtitle": f"Category:{name}", "cmlimit": 500, "cmtype": "page", "cmprop": "ids|title|sortkeyprefix|timestamp", "format": "json", } while True: params["cmcontinue"] = cmcontinue r = s.get(cls.ENDPOINT, params=params) data = json.loads(r.text) for member in data["query"]["categorymembers"]: if member["sortkeyprefix"] == "*" and skip_index: continue member = WikiEntry(member["pageid"], member["title"], timestamp=member["timestamp"]) yield member try: cmcontinue = data["continue"]["cmcontinue"] except KeyError: # If there's no "cmcontinue", means we reached the end of the list. break
[docs] @classmethod def get_category_members_titles(cls, name, skip_index=True): """Create a generator that obtains a list of article titles in a category. Parameters ---------- name: :class:`str` The category's name. ``Category:`` prefix is not necessary. skip_index: :class:`bool` Whether to skip index articles or not. Yields ------- :class:`str` Titles of articles in this category. """ for member in cls.get_category_members(name, skip_index): yield member.title
[docs] @classmethod def get_image_info(cls, name): """Get an image's info. It is not required to prefix the name with ``File:``, but the extension is required. Parameters ---------- name: :class:`str` The name of the image. Returns ------- :class:`Image` The image's information. """ gen = cls.get_images_info([name]) return next(gen)
[docs] @classmethod def get_images_info(cls, names): """Get the information of a list of image names. It is not required to prefix the name with ``File:``, but the extension is required. .. warning :: The order of the returned images might not match the order of the provided names due to an API limitation. Parameters ---------- names: :class:`list` of :class:`str` A list of names of images to get the info of. Yields ------- :class:`Image` An image's information. """ i = 0 s = requests.Session() s.headers.update(cls.headers) params = { "action": "query", "prop": "imageinfo", "iiprop": "url|timestamp", "format": "json", } while True: if i >= len(names): break params["titles"] = "|".join(f"File:{n}" for n in names[i:min(i + 50, len(names))]) r = s.get(cls.ENDPOINT, params=params) if r.status_code >= 400: continue data = json.loads(r.text) i += 50 for _, image in data["query"]["pages"].items(): if "missing" in image: yield None continue try: image = Image(image["pageid"], image["title"], timestamp=image["imageinfo"][0]["timestamp"], file_url=image["imageinfo"][0]["url"]) yield image except KeyError: continue
[docs] @classmethod def get_articles(cls, names): """Create a generator that obtains a list of articles given their titles. .. warning :: The order of the returned articles might not match the order of the provided names due to an API limitation. Parameters ---------- names: :class:`list` of :class:`str` A list of names of articles to get the info of. Yields ------- :class:`Article` An article in the list of names. """ i = 0 s = requests.Session() s.headers.update(cls.headers) params = { "action": "query", "prop": "revisions", "rvprop": "content|timestamp", "format": "json", } while True: if i >= len(names): break params["titles"] = "|".join(names[i:min(i + 50, len(names))]) i += 50 r = s.get(cls.ENDPOINT, params=params) data = json.loads(r.text) for _, article in data["query"]["pages"].items(): if "missing" in article: yield None continue article = Article(article["pageid"], article["title"], timestamp=article["revisions"][0]["timestamp"], content=article["revisions"][0]["*"]) yield article
[docs] @classmethod def get_article(cls, name): """Get an article's info. Parameters ---------- name: str The name of the Article. Returns ------- :class:`Article` The article matching the title. """ gen = cls.get_articles([name]) return next(gen)