Source code for inscriptis.model.canvas.block

"""Representation of a text block within the HTML canvas."""
from __future__ import annotations

from html import unescape
from typing import TYPE_CHECKING

from inscriptis.html_properties import WhiteSpace

if TYPE_CHECKING:
    from inscriptis.model.canvas import Prefix


[docs] class Block: """The current block of text. A block usually refers to one line of output text. .. note:: If pre-formatted content is merged with a block, it may also contain multiple lines. Args: idx: the current block's start index. prefix: prefix used within the current block. """ __slots__ = ("idx", "prefix", "_content", "collapsable_whitespace") def __init__(self, idx: int, prefix: Prefix): self.idx = idx self.prefix = prefix self._content = "" self.collapsable_whitespace = True
[docs] def merge(self, text: str, whitespace: WhiteSpace) -> None: """Merge the given text with the current block. Args: text: the text to merge. whitespace: whitespace handling. """ if whitespace == WhiteSpace.pre: self.merge_pre_text(text) else: self.merge_normal_text(text)
[docs] def merge_normal_text(self, text: str) -> None: """Merge the given text with the current block. Args: text: the text to merge Note: If the previous text ended with a whitespace and text starts with one, both will automatically collapse into a single whitespace. """ normalized_text = [] for ch in text: if not ch.isspace(): normalized_text.append(ch) self.collapsable_whitespace = False elif not self.collapsable_whitespace: normalized_text.append(" ") self.collapsable_whitespace = True if normalized_text: text = ( "".join((self.prefix.first, *normalized_text)) if not self._content else "".join(normalized_text) ) text = unescape(text) self._content += text self.idx += len(text)
[docs] def merge_pre_text(self, text: str) -> None: """Merge the given pre-formatted text with the current block. Args: text: the text to merge """ text = "".join((self.prefix.first, text.replace("\n", "\n" + self.prefix.rest))) text = unescape(text) self._content += text self.idx += len(text) self.collapsable_whitespace = False
def is_empty(self) -> bool: return len(self.content) == 0 @property def content(self): if not self.collapsable_whitespace: return self._content if self._content.endswith(" "): self._content = self._content[:-1] self.idx -= 1 return self._content
[docs] def new_block(self) -> "Block": """Return a new Block based on the current one.""" self.prefix.consumed = False return Block(idx=self.idx + 1, prefix=self.prefix)