Source code for inscriptis.annotation

"""The model used for saving annotations."""

from typing import NamedTuple, Tuple
from typing import List

from inscriptis.html_properties import HorizontalAlignment

[docs]class Annotation(NamedTuple): """An Inscriptis annotation which provides metadata on the extracted text. The :attr:`start` and :attr:`end` indices indicate the span of the text to which the metadata refers, and the attribute :attr:`metadata` contains the tuple of tags describing this span. Example:: Annotation(0, 10, ('heading', )) The annotation above indicates that the text span between the 1st (index 0) and 11th (index 10) character of the extracted text contains a *heading*. """ start: int """the annotation's start index within the text output.""" end: int """the annotation's end index within the text output.""" metadata: Tuple[str] """a tuple of tags to be attached to the annotation."""
[docs]def horizontal_shift(annotations: List[Annotation], content_width: int, line_width: int, align: HorizontalAlignment, shift: int = 0) -> List[Annotation]: r"""Shift annotations based on the given line's formatting. Adjusts the start and end indices of annotations based on the line's formatting and width. Args: annotations: a list of Annotations. content_width: the width of the actual content line_width: the width of the line in which the content is placed. align: the horizontal alignment (left, right, center) to assume for the adjustment shift: an optional additional shift Returns: A list of :class:`Annotation`\s with the adjusted start and end positions. """ if align == HorizontalAlignment.left: h_align = shift elif align == HorizontalAlignment.right: h_align = shift + line_width - content_width else: h_align = shift + (line_width - content_width) // 2 return [Annotation(a.start + h_align, a.end + h_align, a.metadata) for a in annotations]