Source code for inscriptis.model.canvas.block

"""Representation of a text block within the HTML canvas."""
from html import unescape
from inscriptis.html_properties import WhiteSpace


[docs]class Block: """The current block of text. A block usually refers to one line of output text. .. note:: If pre-formatted content is merged with a block, it may also contain multiple lines. Args: idx: the current block's start index. prefix: prefix used within the current block. """ __slots__ = ('idx', 'prefix', '_content', 'collapsable_whitespace') def __init__(self, idx: int, prefix: str): self.idx = idx self.prefix = prefix self._content = '' self.collapsable_whitespace = True
[docs] def merge(self, text: str, whitespace: WhiteSpace) -> None: """Merge the given text with the current block. Args: text: the text to merge. whitespace: whitespace handling. """ if whitespace == WhiteSpace.pre: self.merge_pre_text(text) else: self.merge_normal_text(text)
[docs] def merge_normal_text(self, text: str) -> None: """Merge the given text with the current block. Args: text: the text to merge """ normalized_text = [] for ch in text: if not ch.isspace(): normalized_text.append(ch) self.collapsable_whitespace = False elif not self.collapsable_whitespace: normalized_text.append(' ') self.collapsable_whitespace = True if normalized_text: text = ''.join((self.prefix.first, *normalized_text)) if not \ self._content else ''.join(normalized_text) text = unescape(text) self._content += text self.idx += len(text)
[docs] def merge_pre_text(self, text: str) -> None: """Merge the given pre-formatted text with the current block. Args: text: the text to merge """ text = ''.join((self.prefix.first, text.replace('\n', '\n' + self.prefix.rest))) text = unescape(text) self._content += text self.idx += len(text) self.collapsable_whitespace = False
def is_empty(self) -> bool: return len(self.content) == 0 @property def content(self): if not self.collapsable_whitespace: return self._content if self._content.endswith(' '): self._content = self._content[:-1] self.idx -= 1 return self._content
[docs] def new_block(self) -> 'Block': """Return a new Block based on the current one.""" self.prefix.consumed = False return Block(idx=self.idx + 1, prefix=self.prefix)