Source code for libacbf.libacbf

import re
import warnings
import magic
import distutils.util
import dateutil.parser
import langcodes
from io import UnsupportedOperation
from pathlib import Path
from datetime import date
from typing import List, Dict, Optional, Set, Union, Literal, IO
from base64 import b64encode
from lxml import etree
from zipfile import ZipFile
from py7zr import SevenZipFile
import tarfile as tar

import libacbf.helpers as helpers
import libacbf.constants as consts
import libacbf.metadata as metadata
import libacbf.body
from libacbf.bookdata import BookData
from libacbf.archivereader import ArchiveReader, get_archive_type
from libacbf.exceptions import InvalidBook, EditRARArchiveError, UnsupportedArchive


def _validate_acbf(tree, ns: str):
    """Validate XML tree with XSD.
    """
    version = re.split(r'/', ns)[-1]
    xsd_path = Path(__file__).parent / f"schema/acbf-{version}.xsd"

    with open(xsd_path, encoding="utf-8") as file:
        acbf_root = etree.fromstring(bytes(file.read(), encoding="utf-8"))

    acbf_tree = acbf_root.getroottree()
    acbf_schema = etree.XMLSchema(acbf_tree)

    if version == "1.0":
        try:
            acbf_schema.assertValid(tree)
        except etree.DocumentInvalid as err:
            warnings.warn("Validation failed. Books with 1.0 schema are not fully supported.\n"
                          "Change the ACBF tag at the top of the `.acbf` XML file to "
                          '`<ACBF xmlns="http://www.acbf.info/xml/acbf/1.1">` to use the 1.1 schema.', UserWarning)
            warnings.warn(str(err), UserWarning)
    else:
        acbf_schema.assertValid(tree)


def _update_authors(author_items, nsmap) -> List[metadata.Author]:
    """Takes a list of etree elements and returns a list of Author objects.
    """
    authors = []

    for au in author_items:
        first_name = None
        last_name = None
        nickname = None
        if au.find("first-name", namespaces=nsmap) is not None:
            first_name = au.find("first-name", namespaces=nsmap).text
        if au.find("last-name", namespaces=nsmap) is not None:
            last_name = au.find("last-name", namespaces=nsmap).text
        if au.find("nickname", namespaces=nsmap) is not None:
            nickname = au.find("nickname", namespaces=nsmap).text

        author: metadata.Author = metadata.Author(first_name, last_name, nickname)

        if "activity" in au.keys():
            author.activity = au.attrib["activity"]
        if "lang" in au.keys():
            author.lang = au.attrib["lang"]

        # Optional
        if au.find("middle-name", namespaces=nsmap) is not None:
            author.middle_name = au.find("middle-name", namespaces=nsmap).text
        if au.find("home-page", namespaces=nsmap) is not None:
            author.home_page = au.find("home-page", namespaces=nsmap).text
        if au.find("email", namespaces=nsmap) is not None:
            author.email = au.find("email", namespaces=nsmap).text

        authors.append(author)

    return authors


def _edit_date(section, attr_s: str, attr_d: str, dt: Union[str, date], include_date: bool = True):
    """Common function to edit a date property.
    """
    if isinstance(dt, str):
        date_text = dt
    else:
        date_text = dt.isoformat()
    setattr(section, attr_s, date_text)

    date_val = None
    if include_date:
        date_val = dt
        if isinstance(dt, str):
            date_val = dateutil.parser.parse(dt, fuzzy=True).date()
    setattr(section, attr_d, date_val)


def _fill_page(pg, page, nsmap, book):
    """Fill Page data from XML tree.
    """
    for fr in pg.findall("frame", namespaces=nsmap):
        frame = libacbf.body.Frame(helpers.pts_to_vec(fr.attrib["points"]))
        if "bgcolor" in fr.keys():
            frame.bgcolor = fr.attrib["bgcolor"]
        page.frames.append(frame)

    for jp in pg.findall("jump", namespaces=nsmap):
        jump = libacbf.body.Jump(helpers.pts_to_vec(jp.attrib["points"]), int(jp.attrib["page"]), book)
        page.jumps.append(jump)

    # Text Layers
    for tl in pg.findall("text-layer", namespaces=nsmap):
        lang = langcodes.standardize_tag(tl.attrib["lang"])
        layer = libacbf.body.TextLayer()
        page.text_layers[lang] = layer

        if "bgcolor" in tl.keys():
            layer.bgcolor = tl.attrib["bgcolor"]

        # Text Areas
        for ta in tl.findall("text-area", namespaces=nsmap):
            text = helpers.tree_to_para(ta, nsmap)
            pts = helpers.pts_to_vec(ta.attrib["points"])
            area = libacbf.body.TextArea(text, pts)
            layer.text_areas.append(area)

            if "bgcolor" in ta.keys():
                area.bgcolor = ta.attrib["bgcolor"]

            if "text-rotation" in ta.keys():
                rot = int(ta.attrib["text-rotation"])
                if 0 <= rot <= 360:
                    area.rotation = rot
                else:
                    raise ValueError("Rotation must be an integer from 0 to 360.")

            if "type" in ta.keys():
                area.type = consts.TextAreas[ta.attrib["type"]]

            if "inverted" in ta.keys():
                area.inverted = bool(distutils.util.strtobool(ta.attrib["inverted"]))

            if "transparent" in ta.keys():
                area.transparent = bool(distutils.util.strtobool(ta.attrib["transparent"]))


def _get_root_template(nsmap: Dict):
    """Get the lxml root tree for a basic ACBF book.

    Parameters
    ----------
    nsmap : dict
        Namespaces
    """
    ns = f"{{{nsmap[None]}}}"

    root = etree.Element(f"{ns}ACBF", nsmap=nsmap)
    meta = etree.SubElement(root, f"{ns}meta-data", nsmap=nsmap)
    etree.SubElement(root, f"{ns}body", nsmap=nsmap)

    etree.SubElement(meta, f"{ns}book-info", nsmap=nsmap)

    publish_info = etree.SubElement(meta, f"{ns}publish-info", nsmap=nsmap)
    etree.SubElement(publish_info, f"{ns}publisher", nsmap=nsmap)
    etree.SubElement(publish_info, f"{ns}publish-date", nsmap=nsmap)

    document_info = etree.SubElement(meta, f"{ns}document-info", nsmap=nsmap)
    etree.SubElement(document_info, f"{ns}creation-date", nsmap=nsmap)

    return root


[docs]def get_book_template(ns: str = None) -> str: """Get the bare minimum XML required to create an ACBF book. Returns ------- str XML string template. """ if ns is None: ns = helpers.namespaces["1.1"] return etree.tostring(_get_root_template({None: ns}).getroottree(), encoding="utf-8", xml_declaration=True, pretty_print=True ).decode("utf-8")
[docs]class ACBFBook: """Base class for reading ACBF ebooks. Parameters ---------- file : str | Path | IO Path or file object to write ACBF book to. May be absolute or relative. mode : 'r' | 'w' | 'a' | 'x', default='r' The mode to open the file in. Defaults to read-only mode. r Read only mode. No editing is possible. Can read ACBF, Zip, 7Zip, Tar and Rar formatted books. w Overwrite file with new file. Raises exception for Rar archive types. a Edit the book without truncating. Raises exception for Rar archive types. x Exclusive write to file. Raises ``FileExists`` exception if file already exists. Only works for file paths. Raises exception for Rar archive types. archive_type : str | None, default="Zip" The type of ACBF book that the file is. If ``None`` Then creates a plain XML book. Otherwise creates archive of format. Accepted string values are listed at :class:`ArchiveTypes <libacbf.constants.ArchiveTypes>`. Warning ------- You do not have to specify the type of archive unless you are creating a new one. The correct type will be determined regardless of this parameter's value. Use this when you want to create a new book. Raises ------ EditRARArchiveError Raised if ``mode`` parameter is not ``'r'`` but file is a Rar archive. InvalidBook Raised if the XML does not match ACBF schema or if archive does not contain ACBF file. See Also -------- `ACBF Specifications <https://acbf.fandom.com/wiki/Advanced_Comic_Book_Format_Wiki>`_. Notes ----- Archive formats use the defaults of each type like compression level etc. Manage the archives yourself if you want to change this. Image refs that are relative paths check within the archive if the book is an archive. Otherwise it checks relative to the '.acbf' file. So you can simply use a directory to manage the book and archive it with your own settings when you are done. Examples -------- A book object can be opened, read and then closed. :: from libacbf import ACBFBook book = ACBFBook("path/to/file.cbz") # Read data from book book.close() ``ACBFBook`` is also a context manager and can be used in with statements. :: from libacbf import ACBFBook with ACBFBook("path/to/file.cbz") as book: # Read data from book You can pass a ``BytesIO`` object. Keep in mind that you cannot use ``mode='x'`` in this case. :: import io from libacbf import ACBFBook file = io.BytesIO() with ACBFBook(file, 'w') as book: # Write data to book Attributes ---------- book_info : BookInfo See :class:`BookInfo` for more information. publisher_info : PublishInfo See :class:`PublishInfo` for more information. document_info : DocumentInfo See :class:`DocumentInfo` for more information. body : ACBFBody See :class:`ACBFBody` for more information. data : ACBFData See :class:`ACBFData` for more information. references : dict A dictionary that contains a list of particular references that occur inside the main document body. Keys are unique reference ids and values are dictionaries that contain a ``'_'`` key with text. :: { "ref_id_001": { "_": "This is a reference." } "ref_id_002": { "_": "This is another reference." } } ``'_'`` can contain special tags for formatting. For more information and a full list, see :attr:`TextArea.text <libacbf.body.TextArea.text>`. styles : Styles See :class:`Styles` for more information. archive : ArchiveReader | None Can be used to read archive directly if file is not plain ACBF. Use this if you want to read exactly what files the book contains but try to avoid directly writing files through ``ArchiveReader``. """ def __init__(self, file: Union[str, Path, IO], mode: Literal['r', 'w', 'a', 'x'] = 'r', archive_type: Optional[str] = "Zip"): self._source = file self.book_path: Path = None self.archive: Optional[ArchiveReader] = None self.mode: Literal['r', 'w', 'a', 'x'] = mode self.is_open: bool = True if isinstance(file, str): self.book_path = Path(file).resolve() if isinstance(file, Path): self.book_path = file.resolve() archive_type = consts.ArchiveTypes[archive_type] if archive_type is not None else archive_type is_text = archive_type is None if mode in ('r', 'a'): try: archive_type = get_archive_type(file) is_text = False except UnsupportedArchive: archive_type = None is_text = True if archive_type == consts.ArchiveTypes.Rar and mode != 'r': raise EditRARArchiveError def create_file(): if not is_text: if archive_type == consts.ArchiveTypes.Zip: with ZipFile(file, 'w') as _: pass elif archive_type == consts.ArchiveTypes.SevenZip: with SevenZipFile(file, 'w') as _: pass elif archive_type == consts.ArchiveTypes.Tar: with tar.open(file, 'w') as _: pass self.archive = ArchiveReader(file, 'w') name = self.book_path.stem + ".acbf" if self.book_path is not None else "book.acbf" self.archive.write(get_book_template().encode("utf-8"), name) else: if self.book_path is not None: with open(str(self.book_path), 'w') as book: book.write(get_book_template()) else: file.write(get_book_template().encode("utf-8")) if mode in ('r', 'a'): if self.book_path is not None and not self.book_path.is_file(): raise FileNotFoundError if mode == 'a' and not is_text: self.archive = ArchiveReader(file, 'w') if self.archive._get_acbf_file() is None: name = Path(self.archive.filename).stem + ".acbf" \ if self.archive.filename is not None \ else "book.acbf" self.archive.write(get_book_template().encode("utf-8"), name) elif mode == 'x': if self.book_path is not None: if self.book_path.is_file(): raise FileExistsError else: create_file() else: raise FileExistsError elif mode == 'w': create_file() arc_mode = 'w' if mode in ('w', 'a', 'x') else 'r' if not is_text: if self.archive is None: self.archive = ArchiveReader(file, arc_mode) acbf_file = self.archive._get_acbf_file() if acbf_file is None: raise InvalidBook contents = self.archive.read(acbf_file) else: if self.book_path is None: contents = file.read() else: with open(file, 'r') as book: contents = book.read() if isinstance(contents, bytes): contents = contents.decode("utf-8") self._root = etree.fromstring(bytes(contents, "utf-8")) self._nsmap: str = self._root.nsmap if mode in ('r', 'a'): _validate_acbf(self._root.getroottree(), self._nsmap[None]) self.styles: Styles = Styles(self) self.book_info: BookInfo = BookInfo(self) self.publisher_info: PublishInfo = PublishInfo(self) self.document_info: DocumentInfo = DocumentInfo(self) self.body: ACBFBody = ACBFBody(self) self.data: ACBFData = ACBFData(self) self.references: Dict[str, Dict[str, str]] = {} # References if self._root.find("references", namespaces=self._nsmap) is not None: for ref in self._root.findall("references/reference", namespaces=self._nsmap): pa = [] for p in ref.findall("p", namespaces=self._nsmap): text = re.sub(r'</?p[^>]*>', '', etree.tostring(p, encoding="utf-8").decode("utf-8").strip()) pa.append(text) self.references[ref.attrib["id"]] = {'_': '\n'.join(pa)} def _get_acbf_tree(self): """Converts the XML tree to a string with any modifications. Returns ------- str ACBF book's XML data. """ if self.mode == 'r': raise UnsupportedOperation("Book is not writeable.") ns = f"{{{self._nsmap[None]}}}" root = _get_root_template(self._nsmap) meta = root.find("meta-data", namespaces=self._nsmap) bd = root.find("body", namespaces=self._nsmap) def add_authors(section, au_list): for author in au_list: au = etree.SubElement(section, f"{ns}author", nsmap=self._nsmap) props = {x.replace('_', '-'): getattr(author, x) for x in ("first_name", "last_name", "nickname") if getattr(author, x) is not None} props.update({x.replace('_', '-'): getattr(author, x) for x in ("middle_name", "home_page", "email") if getattr(author, x) is not None} ) if author.activity is not None: au.set("activity", author.activity.name) if author.lang is not None: au.set("lang", author.lang) for k, v in props.items(): pr = etree.SubElement(au, ns + k, nsmap=self._nsmap) pr.text = v #region Styles for st in self.styles.list_styles(): if st == '_': style = etree.Element(f"{ns}style", nsmap=self._nsmap) meta.addprevious(style) style.text = self.styles['_'].decode("utf-8") if self.styles.types['_'] is not None: style.set("type", self.styles.types['_']) else: sub = f'type="{self.styles.types[st]}" ' if self.styles.types[st] is not None else '' style = etree.ProcessingInstruction("xml-stylesheet", f'{sub}href="{st}"') root.addprevious(style) #endregion #region Book Info b_info = meta.find("book-info", namespaces=self._nsmap) # Authors add_authors(b_info, self.book_info.authors) # Titles for lang, title in self.book_info.book_title.items(): ti = etree.SubElement(b_info, f"{ns}book-title", nsmap=self._nsmap) if lang != '_': ti.set("lang", lang) ti.text = title # Genres for genre, match in self.book_info.genres.items(): gn = etree.SubElement(b_info, f"{ns}genre", nsmap=self._nsmap) gn.text = genre.name if match is not None: if 0 <= match <= 100: gn.set("match", str(match)) else: raise ValueError(f"book_info.genre `match={match}`. Value must be from 0 to 100.") # Annotations for lang, annotation in self.book_info.annotations.items(): an = etree.SubElement(b_info, f"{ns}annotation", nsmap=self._nsmap) if lang != '_': an.set("lang", lang) for para in annotation.splitlines(): p = etree.SubElement(an, f"{ns}p", nsmap=self._nsmap) p.text = para # Cover Page (Filled in body section) etree.SubElement(b_info, f"{ns}coverpage", nsmap=self._nsmap) # --- Optional --- # Language Layers if len(self.book_info.languages) > 0: ll = etree.SubElement(b_info, f"{ns}languages", nsmap=self._nsmap) for layer in self.book_info.languages: etree.SubElement(ll, f"{ns}text-layer", lang=layer.lang, show=str(layer.show).lower(), nsmap=self._nsmap) # Characters if len(self.book_info.characters) > 0: ch = etree.SubElement(b_info, f"{ns}characters", nsmap=self._nsmap) for name in self.book_info.characters: nm = etree.SubElement(ch, f"{ns}name", nsmap=self._nsmap) nm.text = name # Keywords for lang, kwords in self.book_info.keywords.items(): kw = etree.SubElement(b_info, f"{ns}keywords", nsmap=self._nsmap) if lang != '_': kw.set("lang", lang) kw.text = ", ".join(kwords) # Series for title, series in self.book_info.series.items(): seq = etree.SubElement(b_info, f"{ns}sequence", title=title, nsmap=self._nsmap) seq.text = str(series.sequence) if series.volume is not None: seq.set("volume", str(series.volume)) # Content Rating for type, rating in self.book_info.content_rating.items(): cr = etree.SubElement(b_info, f"{ns}content-rating", type=type, nsmap=self._nsmap) cr.text = rating # Database Reference for dbref in self.book_info.database_ref: db = etree.SubElement(b_info, f"{ns}databaseref", dbname=dbref.dbname, nsmap=self._nsmap) db.text = dbref.reference if dbref.type is not None: db.set("type", dbref.type) #endregion #region Publisher Info p_info = meta.find("publish-info", namespaces=self._nsmap) p_info.find("publisher", namespaces=self._nsmap).text = self.publisher_info.publisher p_info.find("publish-date", namespaces=self._nsmap).text = self.publisher_info.publish_date if self.publisher_info.publish_date_value is not None: p_info.find("publish-date", namespaces=self._nsmap).set("value", self.publisher_info.publish_date_value.isoformat()) if self.publisher_info.publish_city is not None: city = etree.SubElement(p_info, f"{ns}city", nsmap=self._nsmap) city.text = self.publisher_info.publish_city if self.publisher_info.isbn is not None: isbn = etree.SubElement(p_info, f"{ns}isbn", nsmap=self._nsmap) isbn.text = self.publisher_info.isbn if self.publisher_info.license is not None: license = etree.SubElement(p_info, f"{ns}license", nsmap=self._nsmap) license.text = self.publisher_info.license #endregion #region Document Info d_info = meta.find("document-info", namespaces=self._nsmap) add_authors(d_info, self.document_info.authors) d_info.find("creation-date", namespaces=self._nsmap).text = self.document_info.creation_date if self.document_info.creation_date_value is not None: d_info.find("creation-date", namespaces=self._nsmap).set("value", self.document_info.creation_date_value.isoformat()) if self.document_info.source is not None: source = etree.SubElement(d_info, f"{ns}source", nsmap=self._nsmap) for para in self.document_info.source.splitlines(): p = etree.SubElement(source, f"{ns}p", nsmap=self._nsmap) p.text = para if self.document_info.document_id is not None: id = etree.SubElement(d_info, f"{ns}id", nsmap=self._nsmap) id.text = self.document_info.document_id if self.document_info.document_version is not None: version = etree.SubElement(d_info, f"{ns}version", nsmap=self._nsmap) version.text = self.document_info.document_version if len(self.document_info.document_history) > 0: hst = etree.SubElement(d_info, f"{ns}history", nsmap=self._nsmap) for entry in self.document_info.document_history: p = etree.SubElement(hst, f"{ns}p", nsmap=self._nsmap) p.text = entry #endregion #region Body if self.body.bgcolor is not None: bd.set("bgcolor", self.body.bgcolor) pages = self.body.pages.copy() pages.insert(0, self.book_info.coverpage) for page in pages: if page.is_coverpage: pg = b_info.find("coverpage", namespaces=self._nsmap) else: pg = etree.SubElement(bd, f"{ns}page", nsmap=self._nsmap) if page.bgcolor is not None: pg.set("bgcolor", page.bgcolor) if page.transition is not None: pg.set("transition", page.transition.name) for lang, title in page.title.items(): ti = etree.SubElement(pg, f"{ns}title", nsmap=self._nsmap) if lang != '_': ti.set("lang", lang) ti.text = title etree.SubElement(pg, f"{ns}image", href=page.image_ref, nsmap=self._nsmap) for lang, tx_layer in page.text_layers.items(): tl = etree.SubElement(pg, f"{ns}text-layer", lang=lang, nsmap=self._nsmap) if tx_layer.bgcolor is not None: tl.set("bgcolor", tx_layer.bgcolor) for tx_area in tx_layer.text_areas: ta = etree.SubElement(tl, f"{ns}text-area", points=helpers.vec_to_pts(tx_area.points), nsmap=self._nsmap) ta.extend(helpers.para_to_tree(tx_area.text, self._nsmap)) for i in ("bgcolor", "inverted", "transparent"): if getattr(tx_area, i) is not None: ta.set(i, str(getattr(tx_area, i)).lower()) if tx_area.rotation is not None: ta.set("text-rotation", str(tx_area.rotation)) if tx_area.type is not None: ta.set("type", tx_area.type.name) for frame in page.frames: fr = etree.SubElement(pg, f"{ns}frame", points=helpers.vec_to_pts(frame.points), nsmap=self._nsmap) if frame.bgcolor is not None: fr.set("bgcolor", frame.bgcolor) for jump in page.jumps: etree.SubElement(pg, f"{ns}jump", page=str(jump.target), points=helpers.vec_to_pts(jump.points), nsmap=self._nsmap) #endregion #region Data if len(self.data) > 0: dt = etree.SubElement(root, f"{ns}data", nsmap=self._nsmap) for file in self.data.list_files(): data = self.data[file] bn = etree.SubElement(dt, f"{ns}binary", attrib={"id": data.id, "content-type": data.type}, nsmap=self._nsmap) bn.text = data._base64data #endregion #region References if len(self.references) > 0: refs = etree.SubElement(root, f"{ns}references", nsmap=self._nsmap) for id, reference in self.references.items(): reference = reference['_'] ref = etree.SubElement(refs, f"{ns}reference", id=id, nsmap=self._nsmap) for r in reference.splitlines(): p = f"<p>{r}</p>" p_element = etree.fromstring(bytes(p, encoding="utf-8")) for i in list(p_element.iter()): i.tag = '{' + self._nsmap[None] + '}' + i.tag ref.append(p_element) #endregion return root.getroottree() def _create_placeholders(self): """Creates the minimum required values for the book to follow the schema. This means creating an empty page if there are no pages. """ if len(self.body.pages) == 0: self.body.append_page('')
[docs] def get_acbf_xml(self) -> str: """Get the XML tree of the ACBF book. Returns ------- str The XML content of the ACBF book. """ return etree.tostring(self._get_acbf_tree(), encoding="utf-8", xml_declaration=True, pretty_print=True ).decode("utf-8")
[docs] def make_archive(self, archive_type: str = "Zip"): """Convert a plain ACBF XML book to an archive format. Parameters ---------- archive_type : str, default="Zip" The type of archive to create. Allowed values are listed at :class:`ArchiveTypes <libacbf.constants.ArchiveTypes>`. Raises ------ AttributeError (Book is already an archive of type ``{archive.type}``.) Raised when book is already an archive. """ archive_type = consts.ArchiveTypes[archive_type] if self.archive is not None: raise AttributeError(f"Book is already an archive of type `{self.archive.type.name}`.") helpers.check_write(self) if archive_type == consts.ArchiveTypes.Rar: raise EditRARArchiveError if archive_type == consts.ArchiveTypes.Zip: with ZipFile(self._source, 'w') as _: pass elif archive_type == consts.ArchiveTypes.SevenZip: with SevenZipFile(self._source, 'w') as _: pass elif archive_type == consts.ArchiveTypes.Tar: with tar.open(self._source, 'w') as _: pass self.archive = ArchiveReader(self._source, 'w') name = self.book_path.stem + ".acbf" if self.book_path is not None else "book.acbf" self.archive.write(self.get_acbf_xml().encode("utf-8"), name)
[docs] def close(self): """Saves and closes the book and closes the archive if it exists. Metadata and embedded data can still be read. Use ``ACBFBook.is_open`` to check if file is open. """ if self.mode != 'r': _validate_acbf(self._get_acbf_tree(), self._nsmap[None]) if self.archive is None: if self.book_path is not None: with open(self._source, 'w') as book: book.write(self.get_acbf_xml()) else: self._source.write(self.get_acbf_xml()) else: self.archive.write(self.get_acbf_xml().encode("utf-8"), self.archive._get_acbf_file()) self.mode = 'r' self.is_open = False if self.archive is not None: self.archive.close()
def __repr__(self): if self.is_open: return object.__repr__(self).replace("libacbf.libacbf.ACBFBook", "libacbf.ACBFBook") else: return "<libacbf.ACBFBook [Closed]>" def __enter__(self): return self def __exit__(self, exception_type, exception_value, traceback): if exception_type is not None: self.mode = 'r' self.is_open = False if self.archive is not None: self.archive.close() else: self.close()
[docs]class BookInfo: """Metadata about the book itself. See Also -------- `Book-Info section <https://acbf.fandom.com/wiki/Meta-data_Section_Definition#Book-info_section>`_. Attributes ---------- authors : List[Author] A list of :class:`Author <libacbf.metadata.Author>` objects. book_title : Dict[str, str] A dictionary with standard language codes as keys and titles as values. Key is ``'_'`` if no language is defined. :: { "_": "book title without language", "en": "English title", "en_GB": "English (UK) title", "en_US": "English (US) title" } genres : Dict[Genres, int | None] A dictionary with keys being a value from :class:`constants.Genres <libacbf.constants.Genres>` Enum and values being integers with the match value or ``None``. See :meth:`get_match()`. annotations : Dict[str, str] A short summary describing the book. It is a dictionary with keys being standard language codes or ``'_'`` if no language is defined and values being multiline strings. coverpage : Page It is the same as :class:`body.Page <libacbf.body.Page>` except it does not have :attr:`body.Page.title <libacbf.body.Page.title>`, :attr:`body.Page.bgcolor <libacbf.body.Page.bgcolor>` and :attr:`body.Page.transition <libacbf.body.Page.transition>`. languages : List[LanguageLayer], optional It represents all :class:`body.TextLayer <libacbf.body.TextLayer>` objects of the book. A list of :class:`LanguageLayer <libacbf.metadata.LanguageLayer>` objects. characters : List[str], optional List of (main) characters that appear in the book. keywords: Dict[str, Set[str]], optional For use by search engines. A dictionary with keys as standard language codes or ``'_'`` if no language is defined. Values are a set of lowercase keywords. series: Dict[str, Series], optional Contains the sequence and number if particular comic book is part of a series. A dictionary with keys as the title of the series and values as :class:`Series <libacbf.metadata.Series>` objects. content_rating: Dict[str, str], optional Content rating of the book based on age appropriateness and trigger warnings. It is a dictionary with the keys being the rating system and values being the rating. :: { "Age Rating": "16+", "DC Comics rating system": "T+", "Marvel Comics rating system": "PARENTAL ADVISORY" } database_ref : List[DBRef], optional References to a record in a comic book database (eg: GCD, MAL). A list of :class:`DBRef <libacbf.metadata.DBRef>` objects. """ def __init__(self, book: ACBFBook): self._book = book nsmap = book._nsmap info = book._root.find("meta-data/book-info", namespaces=nsmap) self.authors: List[metadata.Author] = [] self.book_title: Dict[str, str] = {} self.genres: Dict[consts.Genres, Optional[int]] = {} self.annotations: Dict[str, str] = {} self.coverpage: libacbf.body.Page = None # --- Optional --- self.languages: List[metadata.LanguageLayer] = [] self.characters: List[str] = [] self.keywords: Dict[str, Set[str]] = {} self.series: Dict[str, metadata.Series] = {} self.content_rating: Dict[str, str] = {} self.database_ref: List[metadata.DBRef] = [] #region Fill values # Author self.authors.extend( _update_authors( info.findall("author", namespaces=nsmap), nsmap ) ) # Titles for title in info.findall("book-title", namespaces=nsmap): lang = '_' if "lang" in title.keys(): lang = langcodes.standardize_tag(title.attrib["lang"]) self.book_title[lang] = title.text # Genres for genre in info.findall("genre", namespaces=nsmap): gn = consts.Genres[genre.text] self.genres[gn] = None if "match" in genre.keys(): self.genres[gn] = int(genre.attrib["match"]) # Annotations for an in info.findall("annotation", namespaces=nsmap): p = [] for i in an.findall('p', namespaces=nsmap): p.append(i.text) p = '\n'.join(p) lang = '_' if "lang" in an.keys(): lang = langcodes.standardize_tag(an.attrib["lang"]) self.annotations[lang] = p # Cover Page cpage = info.find("coverpage", namespaces=nsmap) image_ref = '' if cpage is not None: image_ref = cpage.find("image", namespaces=nsmap).attrib["href"] self.coverpage = libacbf.body.Page(image_ref, book, coverpage=True) if cpage is not None: _fill_page(cpage, self.coverpage, nsmap, self._book) # --- Optional --- # Languages if info.find("languages", namespaces=nsmap) is not None: text_layers = info.findall("languages/text-layer", namespaces=nsmap) for layer in text_layers: lang = langcodes.standardize_tag(layer.attrib["lang"]) show = bool(distutils.util.strtobool(layer.attrib["show"])) self.languages.append(metadata.LanguageLayer(lang, show)) # Characters if info.find("characters", namespaces=nsmap) is not None: for c in info.findall("characters/name", namespaces=nsmap): self.characters.append(c.text) # Keywords for k in info.findall("keywords", namespaces=nsmap): if k.text is not None: lang = '_' if "lang" in k.keys(): lang = langcodes.standardize_tag(k.attrib["lang"]) self.keywords[lang] = {x.lower() for x in re.split(", |,", k.text)} # Series for se in info.findall("sequence", namespaces=nsmap): ser = metadata.Series(se.text) if "volume" in se.keys(): ser.volume = se.attrib["volume"] self.series[se.attrib["title"]] = ser # Content Rating for rt in info.findall("content-rating", namespaces=nsmap): self.content_rating[rt.attrib["type"]] = rt.text # Database Reference for db in info.findall("databaseref", namespaces=nsmap): dbref = metadata.DBRef(db.attrib["dbname"], db.text) if "type" in db.keys(): dbref.type = db.attrib["type"] self.database_ref.append(dbref) #endregion
[docs] @helpers.check_book def add_author(self, *names: str, first_name=None, last_name=None, nickname=None) -> metadata.Author: """Add an Author to the book info. Usage is the same as :class:`Author <libacbf.metadata.Author>`. Returns ------- Author The created Author object. """ author = metadata.Author(*names, first_name=first_name, last_name=last_name, nickname=nickname) self.authors.append(author) return author
[docs] def get_genre_match(self, genre: str) -> int: """Get match value of genre by string. """ return self.genres[consts.Genres[genre]]
[docs] @helpers.check_book def edit_genre(self, genre: str, match: Optional[int] = '_'): """Edit a genre by string. Add it if it doesn't exist. Parameters ---------- genre : str See :class:`constants.Genres <libacbf.constants.Genres>` enum for a list of possible values. match : int | None, optional Set the match percentage of the genre. If ``None``, removes the match value. """ if match != '_' and (match < 0 or match > 100): raise ValueError("`match` must be an integer from 0 to 100.") genre = consts.Genres[genre] if match == '_': if genre in self.genres: match = self.genres[genre] else: match = None self.genres[genre] = match
[docs] @helpers.check_book def pop_genre(self, genre: str) -> Optional[int]: """Pop a genre by string. Returns ------- int | None The match value of the genre. """ return self.genres.pop(consts.Genres[genre])
[docs] @helpers.check_book def add_language(self, lang: str, show: bool): """Add a language layer to the book. Usage is the same as :class:`LanguageLayer <libacbf.metadata.LanguageLayer>`. """ self.languages.append(metadata.LanguageLayer(lang, show))
[docs] @helpers.check_book def add_series(self, title: str, sequence: str, volume: Optional[str] = None): """Add a series that the book belongs to. ``title`` is the key and usage for value is the same as :class:`Series <libacbf.metadata.Series>`. """ self.series[title] = metadata.Series(sequence, volume)
[docs] @helpers.check_book def add_dbref(self, dbname: str, ref: str, type: Optional[str] = None): """Add a database reference to the book. Usage is the same as :class:`DBRef <libacbf.metadata.DBRef>`. """ self.database_ref.append(metadata.DBRef(dbname, ref, type))
[docs]class PublishInfo: """Metadata about the book's publisher. See Also -------- `Publish-Info section <https://acbf.fandom.com/wiki/Meta-data_Section_Definition#Publish-Info_Section>`_. Attributes ---------- publisher : str Name of the publisher. publish_date : str Date when the book was published as a human readable string. publish_date_value : datetime.date, optional Date when the book was published. publish_city : str, optional City where the book was published. isbn : str, optional International Standard Book Number. license : str, optional The license that the book is under. """ def __init__(self, book: ACBFBook): self._book = book nsmap = book._nsmap info = book._root.find("meta-data/publish-info", namespaces=nsmap) self.publisher: str = info.find("publisher", namespaces=nsmap).text self.publish_date: str = info.find("publish-date", namespaces=nsmap).text # --- Optional --- self.publish_date_value: Optional[date] = None self.publish_city: Optional[str] = None self.isbn: Optional[str] = None self.license: Optional[str] = None #region Fill values # Date if "value" in info.find("publish-date", namespaces=nsmap).keys(): self.publish_date_value = date.fromisoformat( info.find("publish-date", namespaces=nsmap).attrib["value"]) # City if info.find("city", namespaces=nsmap) is not None: self.publish_city = info.find("city", namespaces=nsmap).text # ISBN if info.find("isbn", namespaces=nsmap) is not None: self.isbn = info.find("isbn", namespaces=nsmap).text # License if info.find("license", namespaces=nsmap) is not None: self.license = info.find("license", namespaces=nsmap).text #endregion
[docs] @helpers.check_book def set_date(self, date: Union[str, date], include_date: bool = True): """Edit the date the book was published. Parameters ---------- date : str | datetime.date Date to set to. include_date : bool, default=True Whether to also set :attr:`publish_date_value`. Passing ``False`` will set it to ``None``. """ _edit_date(self, "publish_date", "publish_date_value", date, include_date)
[docs]class DocumentInfo: """Metadata about the ACBF file itself. See Also -------- `Document-Info section <https://acbf.fandom.com/wiki/Meta-data_Section_Definition#Document-Info_Section>`_. Attributes ---------- authors : List[Author] Authors of the ACBF file as a list of :class:`Author <libacbf.metadata.Author>` objects. creation_date : str Date when the ACBF file was created as a human readable string. creation_date_value : datetime.date, optional Date when the ACBF file was created. source : str, optional A multiline string with information if this book is a derivative of another work. May contain URL and other source descriptions. document_id : str, optional Unique Document ID. Used to distinctly define ACBF files for cataloguing. document_version : str, optional Version of ACBF file. document_history : List[str], optional Change history of the ACBF file with change information in a list of strings. """ def __init__(self, book: ACBFBook): self._book = book nsmap = book._nsmap info = book._root.find("meta-data/document-info", namespaces=nsmap) self.authors: List[metadata.Author] = [] self.creation_date: str = info.find("creation-date", namespaces=nsmap).text # --- Optional --- self.creation_date_value: Optional[date] = None self.source: Optional[str] = None self.document_id: Optional[str] = None self.document_version: Optional[str] = None self.document_history: List[str] = [] #region Fill values # Author self.authors.extend( _update_authors( info.findall("author", namespaces=nsmap), nsmap ) ) # Date if "value" in info.find("creation-date", namespaces=nsmap).keys(): self.creation_date_value = date.fromisoformat( info.find("creation-date", namespaces=nsmap).attrib["value"]) # Source if info.find("source", namespaces=nsmap) is not None: p = [] for line in info.findall("source/p", namespaces=nsmap): p.append(line.text) self.source = '\n'.join(p) # ID if info.find("id", namespaces=nsmap) is not None: self.document_id = info.find("id", namespaces=nsmap).text # Version if info.find("version", namespaces=nsmap) is not None: self.document_version = info.find("version", namespaces=nsmap).text # History for item in info.findall("history/p", namespaces=nsmap): self.document_history.append(item.text) #endregion
[docs] @helpers.check_book def add_author(self, *names: str, first_name=None, last_name=None, nickname=None) -> metadata.Author: """Add an Author to the document info. Usage is the same as :class:`Author <libacbf.metadata.Author>`. Returns ------- Author The created Author object. """ author = metadata.Author(*names, first_name=first_name, last_name=last_name, nickname=nickname) self.authors.append(author) return author
[docs] @helpers.check_book def set_date(self, date: Union[str, date], include_date: bool = True): """Edit the date the ACBF file was created. Parameters ---------- date : str | datetime.date Date to set to. include_date : bool, default=True Whether to also set :attr:`creation_date_value`. Passing ``False`` will set it to ``None``. """ _edit_date(self, "creation_date", "creation_date_value", date, include_date)
[docs]class ACBFBody: """Body section contains the definition of individual book pages and text layers, frames and jumps inside those pages. See Also -------- `Body Section Definition <https://acbf.fandom.com/wiki/Body_Section_Definition>`_. Attributes ---------- pages : List[Page] A list of :class:`Page <libacbf.body.Page>` objects in the order they should be displayed in. bgcolor : str, optional Defines a background colour for the whole book. Can be overridden by ``bgcolor`` in pages, text layers, text areas and frames. """ def __init__(self, book: ACBFBook): self._book = book nsmap = book._nsmap body = book._root.find("body", namespaces=nsmap) self.pages: List[libacbf.body.Page] = [] # --- Optional --- self.bgcolor: Optional[str] = None #region Fill values # Background Colour if "bgcolor" in body.keys(): self.bgcolor = body.attrib["bgcolor"] # Pages for pg in body.findall("page", namespaces=nsmap): img_ref = pg.find("image", namespaces=nsmap).attrib["href"] page = libacbf.body.Page(img_ref, book) if "bgcolor" in pg.keys(): page.bgcolor = pg.attrib["bgcolor"] if "transition" in pg.keys(): page.transition = consts.PageTransitions[pg.attrib["transition"]] for title in pg.findall("title", namespaces=nsmap): lang = '_' if "lang" in title.keys(): lang = langcodes.standardize_tag(title.attrib["lang"]) page.title[lang] = title.text _fill_page(pg, page, nsmap, self._book) self.pages.append(page) #endregion
[docs] @helpers.check_book def insert_page(self, index: int, image_ref: str) -> libacbf.body.Page: """Insert a new Page object at the index. Parameters ---------- index : int Index of new page. image_ref : str Value to set for the image reference. See :attr:`Page.image_ref <libacbf.body.Page.image_ref>` for information on how to format it. Returns ------- Page The created Page object. """ self.pages.insert(index, libacbf.body.Page(image_ref, self._book)) return self.pages[index]
[docs] @helpers.check_book def append_page(self, image_ref: str) -> libacbf.body.Page: """Append a new Page object to the body. Parameters ---------- image_ref : str Value to set for the image reference. See :attr:`Page.image_ref <libacbf.body.Page.image_ref>` for information on how to format it. Returns ------- Page The created Page object. """ page = libacbf.body.Page(image_ref, self._book) self.pages.append(page) return page
[docs]class ACBFData: """Get any binary data embedded in the ACBF file or write data to archive or embed data in ACBF. See Also -------- `Data Section Definition <https://acbf.fandom.com/wiki/Data_Section_Definition>`_. Returns ------- BookData A file as a :class:`BookData <libacbf.bookdata.BookData>` object. Raises ------ FileNotFoundError Raised if file is not found embedded in the ACBF file. Examples -------- To get a file embedded in the ACBF file:: from libacbf import ACBFBook with ACBFBook("path/to/book.cbz") as book: image = book.data["image.png"] font = book.data["font.ttf"] """ def __init__(self, book: ACBFBook): self._book = book self._files: Dict[str, BookData] = {} nsmap = book._nsmap for i in book._root.findall("data/binary", namespaces=nsmap): new_data = BookData(i.attrib["id"], i.attrib["content-type"], i.text) self._files[i.attrib["id"]] = new_data
[docs] def list_files(self) -> Set[str]: """Returns a list of all the names of the files embedded in the ACBF file. May be images, fonts etc. Returns ------- Set[str] A set of file names. """ return set(self._files.keys())
[docs] @helpers.check_book def add_data(self, target: Union[str, Path, bytes], name: str = None, embed: bool = False): """Add or embed data into the book. Parameters ---------- target : str | Path | bytes Path to file to be added or data as bytes. name : str, optional Name to assign to file after writing. Defaults to name part of target. Required if ``target`` is bytes. embed : bool, default=False Whether to embed the file in the ACBF XML. Cannot be ``False`` if book is not an archive type. Use :meth:`ACBFBook.make_archive(...) <libacbf.ACBFBook.make_archive()>` to convert the book to an archive. """ if self._book.archive is None and not embed: raise AttributeError("Book is not an archive type. Write data with `embed = True` or use " "`ACBFBook.make_archive(...)` to convert the book to an archive.") if isinstance(target, str): target = Path(target).resolve(True) if isinstance(target, bytes) and name is None: raise ValueError("`name` is required if `target` is bytes.") name = target.name if name is None else name if embed: if isinstance(target, bytes): contents = target else: with open(target, 'rb') as file: contents = file.read() type = magic.from_buffer(contents, True) data = b64encode(contents).decode("utf-8") self._files[name] = BookData(name, type, data) else: self._book.archive.write(target, name)
[docs] @helpers.check_book def remove_data(self, target: Union[str, Path], embed: bool = False): """Remove file at target in the archive. If ``embed`` is true, removes from embedded files. Parameters ---------- target : str | Path Path to file in archive or id of embedded file. embed : bool, default=False Whether to check for file in archive or embedded in ACBF XML. Must be true if book is plain ACBF XML. """ if self._book.archive is None and not embed: raise AttributeError("Book is not an archive type. Write data with `embed = True` or use " "`ACBFBook.make_archive(...)` to convert the book to an archive.") if embed: if not isinstance(target, str): target = str(target) self._files.pop(target) else: if isinstance(target, str): target = Path(target) self._book.archive.delete(target)
def __len__(self): return len(self._files.keys()) def __getitem__(self, key: str): if key not in self.list_files(): raise FileNotFoundError(f"`{key}` not found embedded in book.") return self._files[key]
[docs]class Styles: """Stylesheets to be used in the book. See Also -------- `Stylesheet Declaration <https://acbf.fandom.com/wiki/Stylesheet_Declaration>`_. Returns ------- bytes Stylesheet data. Examples -------- To get stylesheets :: from libacbf import ACBFBook with ACBFBook("path/to/book.cbz") as book: style1 = book.styles["style1.css"] # Style referenced at the top of the ACBF XML. embedded_style = book.styles['_'] # Returns the stylesheet embedded in ACBF XML. Attributes ---------- types : Dict[str, str | None] A dictionary with keys being the style name (or ``'_'``) and values being the type or ``None`` if not specified. """ def __init__(self, book: ACBFBook): self._book = book nsmap = book._nsmap self._styles: Dict[str, Optional[bytes]] = {} self.types: Dict[str, Optional[str]] = {} for i in book._root.xpath("//processing-instruction('xml-stylesheet')"): self.types[i.attrib["href"]] = i.attrib["type"] if "type" in i.attrib.keys() else None self._styles[i.attrib["href"]] = None embedded = book._root.find("style", namespaces=nsmap) if embedded is not None: self._styles['_'] = book._root.find("style", namespaces=nsmap).text.strip().encode("utf-8") self.types['_'] = embedded.attrib["type"] if "type" in embedded.keys() else None
[docs] def list_styles(self) -> Set[str]: """All the stylesheets referenced by the ACBF XML. Returns ------- Set[str] Referenced stylesheets. """ return set(self.types.keys())
[docs] @helpers.check_book def edit_style(self, stylesheet: Union[str, Path, bytes], style_name: str = None, type: str = "text/css", embed: bool = False): """Writes or overwrites file in book with referenced stylesheet. Parameters ---------- stylesheet : str | Path | bytes Path to stylesheet or stylesheet as bytes. style_name : str, optional Name of stylesheet after being written. Defaults to name part of ``stylesheet_ref``. If it is ``'_'``, writes stylesheet to style tag of ACBF XML. Required if ``stylesheet`` is bytes. type : str, default="text/css" Mime Type of stylesheet. Defaults to CSS but can be others (like SASS). embed : bool, default=False Whether to embed stylesheet in the data section of the book. This is ignored if ``style_name`` is ``'_'``. Must be True if book is plain ACBF XML. Use :meth:`ACBFBook.make_archive(...) <libacbf.ACBFBook.make_archive()>` to convert the book to an archive. """ if isinstance(stylesheet, str): stylesheet = Path(stylesheet) if isinstance(stylesheet, bytes) and style_name is None: raise ValueError("`style_name` is required if `stylesheet` is bytes.") if style_name is None: style_name = stylesheet.name if style_name == '_': if isinstance(stylesheet, bytes): self._styles['_'] = stylesheet else: with open(stylesheet, "rb") as css: self._styles['_'] = css.read() self.types['_'] = type else: self._book.data.add_data(stylesheet, style_name, embed) self._styles[style_name] = None self.types[style_name] = type
[docs] @helpers.check_book def remove_style(self, style_name: str, embedded: bool = False): """Remove stylesheet from book. Parameters ---------- style_name : str Stylesheet to remove. If it is ``'_'``, remove embedded stylesheet. embedded : bool, default=False Remove style from embedded data of book. Ignored if style_name is ``'_'``. Must be False if book is plain ACBF XML. """ self._styles.pop(style_name) self.types.pop(style_name) if style_name != '_': self._book.data.remove_data(style_name, embedded)
def __len__(self): len(self._styles.keys()) def __getitem__(self, key: str): if key not in self.list_styles(): raise FileNotFoundError(f"`{key}` style could not be found.") if self._styles[key] is None: if key in self._book.data.list_files(): self._styles[key] = self._book.data[key].data elif self._book.archive is not None: self._styles[key] = self._book.archive.read(key) else: st_path = self._book.book_path.parent / Path(key) with open(str(st_path), "rb") as st: self._styles[key] = st.read() return self._styles[key]