Source code for inscriptis.model.table

#!/usr/bin/env python3
# encoding: utf-8
"""Classes used for representing Tables, TableRows and TableCells."""

from itertools import chain, accumulate
from typing import List

from inscriptis.annotation import Annotation, horizontal_shift
from inscriptis.html_properties import HorizontalAlignment, VerticalAlignment
from inscriptis.model.canvas import Canvas


[docs] class TableCell(Canvas): """A table cell. Attributes: line_width: the original line widths per line (required to adjust annotations after a reformatting) vertical_padding: vertical padding that has been introduced due to vertical formatting rules. """ __slots__ = ( "annotations", "block_annotations", "blocks", "current_block", "margin", "annotation_counter", "align", "valign", "_width", "line_width", "vertical_padding", ) def __init__(self, align: HorizontalAlignment, valign: VerticalAlignment): super().__init__() self.align = align self.valign = valign self._width = None self.line_width = None self.vertical_padding = 0
[docs] def normalize_blocks(self) -> int: """Split multi-line blocks into multiple one-line blocks. Returns: The height of the normalized cell. """ self.flush_inline() self.blocks = list(chain(*(line.split("\n") for line in self.blocks))) if not self.blocks: self.blocks = [""] return len(self.blocks)
@property def height(self) -> int: """Compute the table cell's height. Returns: The cell's current height. """ return max(1, len(self.blocks)) @property def width(self) -> int: """Compute the table cell's width. Returns: The cell's current width. """ if self._width: return self._width return max( (len(line) for line in chain(*(block.split("\n") for block in self.blocks))) ) @width.setter def width(self, width): """Set the table's width and applies the cell's horizontal formatting. Args: The cell's expected width. """ # save the original line widths before reformatting self.line_width = [len(block) for block in self.blocks] # record new width and start reformatting self._width = width format_spec = "{{:{align}{width}}}".format(align=self.align.value, width=width) self.blocks = [format_spec.format(b) for b in self.blocks] @height.setter def height(self, height: int): """Set the cell's height to the given value. Notes: Depending on the height and the cell's vertical formatting this might require the introduction of empty lines. """ rows = len(self.blocks) if rows < height: empty_line = [""] if self.valign == VerticalAlignment.bottom: self.vertical_padding = height - rows self.blocks = self.vertical_padding * empty_line + self.blocks elif self.valign == VerticalAlignment.middle: self.vertical_padding = (height - rows) // 2 self.blocks = ( self.vertical_padding * empty_line + self.blocks + ((height - rows + 1) // 2 * empty_line) ) else: self.blocks = self.blocks + ((height - rows) * empty_line)
[docs] def get_annotations(self, idx: int, row_width: int) -> List[Annotation]: """Return a list of all annotations within the TableCell. Returns: A list of annotations that have been adjusted to the cell's position. """ self.current_block.idx = idx if not self.annotations: return [] # the easy case - the cell has only one line :) if len(self.blocks) == 1: self.line_width[0] = self.width return horizontal_shift( self.annotations, self.line_width[0], self.width, self.align, idx ) # the more challenging one - multiple cell lines line_break_pos = list(accumulate(self.line_width)) annotation_lines = [[] for _ in self.blocks] # assign annotations to the corresponding line for a in self.annotations: for no, line_break in enumerate(line_break_pos): if a.start <= (line_break + no): # consider newline annotation_lines[no + self.vertical_padding].append(a) break # compute the annotation index based on its line and delta :) result = [] idx += self.vertical_padding # newlines introduced by the padding for line_annotations, line_len in zip(annotation_lines, self.line_width): result.extend( horizontal_shift( line_annotations, line_len, self.width, self.align, idx ) ) idx += row_width - line_len self.line_width = [self.width for _ in self.line_width] return result
[docs] class TableRow: """A single row within a table. Attributes: columns: the table row's columns. cell_separator: string used for separating columns from each other. """ __slots__ = ("columns", "cell_separator") def __init__(self, cell_separator: str): self.columns: List[TableCell] = [] self.cell_separator = cell_separator def __len__(self): return len(self.columns)
[docs] def get_text(self) -> str: """Return a text representation of the TableRow.""" row_lines = [ self.cell_separator.join(line) for line in zip(*[column.blocks for column in self.columns]) ] return "\n".join(row_lines)
@property def width(self) -> int: """Compute and return the width of the current row.""" if not self.columns: return 0 return sum((cell.width for cell in self.columns)) + len(self.cell_separator) * ( len(self.columns) - 1 )
[docs] class Table: """An HTML table. Attributes: rows: the table's rows. left_margin_len: length of the left margin before the table. cell_separator: string used for separating cells from each other. """ __slots__ = ("rows", "left_margin_len", "cell_separator") def __init__(self, left_margin_len: int, cell_separator: str): self.rows = [] self.left_margin_len = left_margin_len self.cell_separator = cell_separator
[docs] def add_row(self): """Add an empty :class:`TableRow` to the table.""" self.rows.append(TableRow(self.cell_separator))
[docs] def add_cell(self, table_cell: TableCell): """Add a new :class:`TableCell` to the table's last row. .. note:: If no row exists yet, a new row is created. """ if not self.rows: self.add_row() self.rows[-1].columns.append(table_cell)
def _set_row_height(self): """Set the cell height for all :class:`TableCell`s in the table.""" for row in self.rows: max_row_height = ( max((cell.normalize_blocks() for cell in row.columns)) if row.columns else 0 ) for cell in row.columns: cell.height = max_row_height def _set_column_width(self): """Set the column width for all :class:`TableCell`s in the table.""" # determine maximum number of columns max_columns = max((len(row.columns) for row in self.rows)) for cur_column_idx in range(max_columns): # determine the required column width for the current column max_column_width = max( ( row.columns[cur_column_idx].width for row in self.rows if len(row) > cur_column_idx ) ) # set column width for all TableCells in the current column for row in self.rows: if len(row) > cur_column_idx: row.columns[cur_column_idx].width = max_column_width
[docs] def get_text(self) -> str: """Return and render the text of the given table.""" if not self.rows: return "\n" self._set_row_height() self._set_column_width() return "\n".join((row.get_text() for row in self.rows)) + "\n"
[docs] def get_annotations(self, idx: int, left_margin_len: int) -> List[Annotation]: r"""Return all annotations in the given table. Args: idx: the table's start index. left_margin_len: len of the left margin (required for adapting the position of annotations). Returns: A list of all :class:`~inscriptis.annotation.Annotation`\s present in the table. """ if not self.rows: return [] annotations = [] idx += left_margin_len for row in self.rows: if not row.columns: continue row_width = row.width + left_margin_len row_height = row.columns[0].height cell_idx = idx for cell in row.columns: annotations += cell.get_annotations(cell_idx, row_width) cell_idx += cell.width + len(row.cell_separator) idx += (row_width + 1) * row_height # linebreak return annotations