PK!^oojinja2xlsx/__init__.py# flake8: noqa from jinja2xlsx.api import render from jinja2xlsx.style import Style render_xlsx = render PK! Kjinja2xlsx/adjust.pyfrom dataclasses import dataclass from typing import Iterable from openpyxl.utils import get_column_letter from openpyxl.worksheet.dimensions import ColumnDimension from openpyxl.worksheet.worksheet import Worksheet from requests_html import Element from jinja2xlsx.style import parse_style_attr from jinja2xlsx.utils import ( try_extract_pixels, width_pixels_to_xlsx_units, height_pixels_to_xlsx_units, ) @dataclass class Adjuster: sheet: Worksheet def adjust_columns(self, columns: Iterable[Element]) -> None: for index, column in enumerate(columns): col_width_in_pixels = int(column.attrs.get("width", 0)) if not col_width_in_pixels: continue column_dimension: ColumnDimension = self.sheet.column_dimensions[ get_column_letter(index + 1) ] column_dimension.width = width_pixels_to_xlsx_units(col_width_in_pixels) def adjust_rows(self, rows: Iterable[Element]) -> None: for index, row in enumerate(rows): # todo there must be a better way style_dict = parse_style_attr(row.attrs.get("style")) height_str = style_dict.get("line-height") or style_dict.get("height") or "" row_height = try_extract_pixels(height_str) if not row_height: continue self.sheet.row_dimensions[index + 1].height = height_pixels_to_xlsx_units(row_height) PK!Ƥjinja2xlsx/api.pyfrom typing import Optional from openpyxl import Workbook from jinja2xlsx.parse import Parser from jinja2xlsx.render import Renderer from jinja2xlsx.style import Style, Stylist def render(html_str: str, default_style: Optional[Style] = None) -> Workbook: parser = Parser(html_str) stylist = Stylist(default_style or Style()) renderer = Renderer(parser, stylist) return renderer() PK!҅yyjinja2xlsx/config.pyimport os BASE_DIR = os.path.dirname(os.path.dirname(__file__)) TEST_DATA_DIR = os.path.join(BASE_DIR, "test_data") PK!im ''jinja2xlsx/image.pyimport base64 import io import re from openpyxl.drawing.image import Image from requests_html import Element def parse_img(image_tag: Element) -> Image: image_src = image_tag.attrs["src"] image_base64 = parse_src(image_src) image_stream = base64_to_stream(image_base64) return Image(image_stream) def parse_src(src: str) -> str: """ >>> parse_src("data:image/png;base64, iVBORw0KGgoAAAANSUhEUgAAAAUA\\nAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO\\n9TXL0Y4OHwAAAABJRU5ErkJggg==") 'iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==' >>> parse_src("smiley.gif") Traceback (most recent call last): ... ValueError: Only base64 images supported. >>> parse_src("http://www.example.com/image.gif") Traceback (most recent call last): ... ValueError: Only base64 images supported. """ try: base64_str = re.findall(r"data:.*?;base64,([\s\S]*)", src)[0] except IndexError: raise ValueError("Only base64 images supported.") base64_str = base64_str.strip().replace("\n", "") return base64_str def base64_to_stream(base64_str: str) -> io.BytesIO: return io.BytesIO(base64.b64decode(base64_str)) PK!Pjinja2xlsx/parse.pyfrom dataclasses import dataclass from typing import Sequence, Optional from requests_html import HTML, Element @dataclass() class Parser: html_str: str @property def html(self) -> HTML: return HTML(html=self.html_str) @property def table(self) -> Element: table = self.html.find("table", first=True) assert table return table @property def table_body(self) -> Element: tbody = self.table.find("tbody", first=True) assert tbody return tbody @property def rows(self) -> Sequence[Element]: return self.table_body.find("tr") @property def colgroup(self) -> Optional[Element]: return self.table.find("colgroup", first=True) @property def columns(self) -> Sequence[Element]: colgroup = self.colgroup if colgroup: return colgroup.find("col") else: return [] PK!N N jinja2xlsx/render.pyfrom dataclasses import dataclass from typing import Tuple from openpyxl import Workbook from openpyxl.cell import MergedCell, Cell from openpyxl.worksheet.worksheet import Worksheet from jinja2xlsx.adjust import Adjuster from jinja2xlsx.image import parse_img from jinja2xlsx.parse import Parser from jinja2xlsx.style import Stylist from jinja2xlsx.utils import CellGenerator, create_cell_range_str, parse_cell_value @dataclass class Renderer: parser: Parser stylist: Stylist workbook: Workbook = None sheet: Worksheet = None def __call__(self) -> Workbook: self.wb = Workbook() self.sheet = self.wb.active cells = list(self._generate_cells()) self._fill_cells(cells) self._style_cells(cells) adjuster = Adjuster(self.sheet) adjuster.adjust_columns(self.parser.columns) adjuster.adjust_rows(self.parser.rows) return self.wb def _generate_cells(self) -> CellGenerator: for row_index, row in enumerate(self.parser.rows): col_index = 0 for html_cell in row.find("td"): target_cell, col_index = self._find_free_cell(col_index, row_index) colspan = int(html_cell.attrs.get("colspan", 1)) rowspan = int(html_cell.attrs.get("rowspan", 1)) if colspan > 1 or rowspan > 1: cell_range_str = create_cell_range_str(col_index, colspan, row_index, rowspan) self.sheet.merge_cells(cell_range_str) yield html_cell, None, self.sheet[cell_range_str] else: yield html_cell, target_cell, None col_index += colspan def _find_free_cell(self, col_index: int, row_index: int) -> Tuple[Cell, int]: target_cell = self.sheet.cell(row_index + 1, col_index + 1) while True: if isinstance(target_cell, MergedCell): col_index += 1 target_cell = self.sheet.cell(row_index + 1, col_index + 1) else: break return target_cell, col_index def _fill_cells(self, cells: CellGenerator) -> None: for html_cell, cell, cell_range in cells: target_cell = None if cell: target_cell = cell if cell_range: target_cell = cell_range[0][0] assert target_cell image_tag = html_cell.find("img", first=True) if image_tag: image = parse_img(image_tag) self.sheet.add_image(image, target_cell.coordinate) else: target_cell.value = parse_cell_value(html_cell.text) def _style_cells(self, cells: CellGenerator) -> None: for html_cell, cell, cell_range in cells: style = self.stylist.build_style_from_html(html_cell) if cell: self.stylist.style_single_cell(cell, style) elif cell_range: self.stylist.style_merged_cells(cell_range, style) PK!Q_#'66jinja2xlsx/style.pyimport re from dataclasses import dataclass, field from typing import Optional, Dict, Iterator from openpyxl.cell import Cell from openpyxl.styles import Border, Side, Alignment, Font from requests_html import Element from jinja2xlsx.utils import union_dicts, CellRange REMOVE_SIDE = Side() @dataclass() class Style: border: Border = field(default_factory=Border) alignment: Alignment = field(default_factory=Alignment) font: Font = field(default_factory=Font) def union(self, style: 'Style') -> 'Style': """ >>> from openpyxl.styles import Side >>> default_style = Style(alignment=Alignment(wrap_text=True), font=Font("Times New Roman", 10)) >>> style = Style(border=Border(left=Side("medium")), font=Font(sz=11, bold=True)) >>> new_style = default_style.union(style) >>> new_style.border.left.style 'medium' >>> new_style.alignment.wrap_text True >>> new_style.font == Font("Times New Roman", 11, bold=True) True """ border_data = union_dicts(vars(self.border), vars(style.border)) alignment_data = union_dicts(vars(self.alignment), vars(style.alignment)) font_data = union_dicts(vars(self.font), vars(style.font)) return Style(Border(**border_data), Alignment(**alignment_data), Font(**font_data)) def extract_style(style_attr: str) -> Style: """ >>> style = extract_style("border: 1px solid black; text-align: center; font-weight: bold") >>> style.alignment.horizontal 'center' >>> style.border.left.style 'thin' >>> style.border.left.style == style.border.right.style == style.border.top.style == style.border.bottom.style True >>> style.font.bold True """ if not style_attr: return Style() style_dict = parse_style_attr(style_attr) border = _build_border(style_dict) alignment = _build_alignment(style_dict) font = _build_font(style_dict) return Style(border, alignment, font) @dataclass() class Stylist: default_style: 'Style' = field(default_factory=Style) def build_style_from_html(self, html_element: Element) -> Style: style_attr = html_element.attrs.get("style") style = extract_style(style_attr) style = self.default_style.union(style) return style def style_single_cell(self, cell: Cell, style: Style) -> None: cell.border = style.border cell.alignment = style.alignment cell.font = style.font def style_merged_cells(self, cell_range: CellRange, style: Style) -> None: """ Source: https://openpyxl.readthedocs.io/en/2.5/styles.html#styling-merged-cells """ first_cell = cell_range[0][0] first_cell.alignment = style.alignment first_cell.font = style.font top = Border(top=style.border.top) left = Border(left=style.border.left) right = Border(right=style.border.right) bottom = Border(bottom=style.border.bottom) for cell in cell_range[0]: cell.border = cell.border + top for cell in cell_range[-1]: cell.border = cell.border + bottom for row in cell_range: left_cell = row[0] right_cell = row[-1] left_cell.border = left_cell.border + left right_cell.border = right_cell.border + right def parse_style_attr(style_str: Optional[str]) -> Dict: """ >>> parse_style_attr("border: 1px solid black; text-align: center; font-weight: bold") {'border': '1px solid black', 'text-align': 'center', 'font-weight': 'bold'} >>> parse_style_attr("") {} >>> parse_style_attr(None) {} """ if not style_str: return {} return { style.strip(): value.strip() for style, value in (style.split(":") for style in filter(None, style_str.split(";"))) } def _build_border(style_dict: Dict[str, str]) -> Border: """ >>> border = _build_border({"border": "1px solid black"}) >>> border.left.style 'thin' >>> border.left.style == border.right.style == border.top.style == border.bottom.style True >>> border = _build_border({"border-right": "2px solid black"}) >>> border.right.style 'medium' >>> border = _build_border({"border": "1px solid black", "border-bottom": "0"}) >>> border == Border(left=Side("thin"), right=Side("thin"), top=Side("thin")) True >>> border = _build_border({"border": "1px solid black", "border-top": "none"}) >>> border == Border(left=Side("thin"), right=Side("thin"), bottom=Side("thin")) True """ def _from_border_attr(border_attr: str) -> Optional[Border]: border_rule = style_dict.get(border_attr) if not border_rule: return None if border_rule == "1px solid black": side = Side(style="thin") elif re.match(r"\d+px solid black", border_rule): side = Side(style="medium") elif border_rule.startswith("0") or border_rule.startswith("none"): side = REMOVE_SIDE else: side = Side() if border_attr == "border": return Border(left=side, right=side, top=side, bottom=side) if border_attr == "border-left": return Border(left=side) if border_attr == "border-right": return Border(right=side) if border_attr == "border-top": return Border(top=side) if border_attr == "border-bottom": return Border(bottom=side) return None borders: Iterator[Border] = filter( None, ( _from_border_attr("border"), _from_border_attr("border-left"), _from_border_attr("border-right"), _from_border_attr("border-top"), _from_border_attr("border-bottom"), ), ) final_border = Border() sides = ("left", "right", "top", "bottom") for border in borders: for side_name in sides: side = getattr(border, side_name) if side == Side() and side is not REMOVE_SIDE: continue setattr(final_border, side_name, side) return final_border def _build_alignment(style_dict: Dict) -> Alignment: word_wrap = style_dict.get("word-wrap") wrap_text: Optional[bool] if word_wrap == "break-word": wrap_text = True elif word_wrap == "normal": wrap_text = False else: wrap_text = None alignment = Alignment(horizontal=style_dict.get("text-align"), wrap_text=wrap_text) return alignment def _build_font(style_dict: Dict) -> Font: font = Font(bold=style_dict.get("font-weight") == "bold") return font PK!xD  jinja2xlsx/testing_utils.pyimport os from contextlib import contextmanager from typing import List, Tuple, Iterator, TextIO from openpyxl import Workbook from jinja2xlsx.config import TEST_DATA_DIR def get_test_file_path(file_: str) -> str: return os.path.join(TEST_DATA_DIR, file_) @contextmanager def read_from_test_dir(file_: str) -> Iterator[TextIO]: with open(get_test_file_path(file_), encoding="utf-8") as f: yield f def get_wb_values(wb: Workbook) -> List[Tuple]: return list(wb.active.values) PK!xjinja2xlsx/utils.pyimport re from typing import Dict, Optional, Any, Tuple, Iterable from openpyxl.cell import Cell from openpyxl.utils import get_column_letter from requests_html import Element CellRange = Tuple[Tuple[Cell]] CellGenerator = Iterable[Tuple[Element, Optional[Cell], Optional[CellRange]]] def union_dicts(dict_1: Dict, dict_2: Dict, with_none_drop: bool = True) -> Dict: """ >>> union_dicts({"a": 1}, {"a": None, "b": 2}) {'a': 1, 'b': 2} """ if with_none_drop: new_dict_2 = {key: value for key, value in dict_2.items() if value is not None} else: new_dict_2 = dict_2 return {**dict_1, **new_dict_2} def try_extract_pixels(pixel_str: Optional[str]) -> Optional[float]: """ >>> try_extract_pixels("100px") 100.0 >>> try_extract_pixels("") is None True """ if not pixel_str: return None return float(re.findall(r"(\d+)px", pixel_str)[0]) def parse_cell_value(cell_text: str) -> Any: """ >>> parse_cell_value("") is None True >>> parse_cell_value("ass") 'ass' >>> parse_cell_value("1") 1 >>> parse_cell_value("1.2") 1.2 """ if cell_text == "": return None if cell_text.isdigit(): return int(cell_text) # float(str) break python for very long non-float string ~_~ if cell_text.replace(".", "", 1).isdigit(): return float(cell_text) return cell_text def create_cell_range_str(col_index: int, colspan: int, row_index: int, rowspan: int) -> str: start_column = get_column_letter(col_index + 1) start_row = row_index + 1 end_column = get_column_letter(col_index + colspan) end_row = row_index + rowspan cell_range = f"{start_column}{start_row}:{end_column}{end_row}" return cell_range def width_pixels_to_xlsx_units(pixels: float) -> float: return pixels / 7.5 def height_pixels_to_xlsx_units(pixels: float) -> float: return pixels * 3 / 4 PK!HڽTU jinja2xlsx-0.3.1.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!HFx/I#jinja2xlsx-0.3.1.dist-info/METADATATmo6_qs؀%9q mH5@b(JHʱ;Ib9^>w< L(>dS-?Je(~]7u CD ߍ* wl *0|MsMd-\^-*\}G=Kł\L 5DXp!w7k!S 6Ĥgq|2#v*@@ xQ;ܨE!5\r]ѵ]]pZYᔩڕڑ9:UEZS8,V}~O4;8-/D,  cs2,34-X4 ȹm=7\V>͍dAzB+B@yRKR;Ƽ7J 䗳!$5Ѵeoƀ~y|cA nxۃpg?GsIؚh.dzճwf7|c?e S${o1IW}[Tѽ>w0H=**HQM \C8 ?`.EiفЎL 1Kx -! Ҿҗ:uJ47+WmK3u!:/\U%-Z< (`D4aDxeף’`^-z~\mDn-!Y/PK!HM*X!jinja2xlsx-0.3.1.dist-info/RECORD}ǖ@@} ,@0$l8(RD~V}g?p{9K4avP4G7 yJ4hb l}Fxia_Y0X7]쯸3hP*,w_KwX>0@•Dpz:'j!^xĐ(-Lt,`гŕG4 ev:(;j4 >arAn#ԚkˋXQOn&6H1؆Z[8!W^1PRW{U)#yB;w1X:ȗ>6o5 s>+ưԤruyr !u{nq5{&t 'cxY[}zXdhlK f#WmLƻIyV[8%2FS jj~i;Nɼ7>vJ12{lR8A %`Ao`4ߎ*˧xr޼^FNYRUx ?_ٳl ` ?2J.(bXK&ZQrL$cc~ yw1$ PK!^oojinja2xlsx/__init__.pyPK! Kjinja2xlsx/adjust.pyPK!Ƥjinja2xlsx/api.pyPK!҅yy}jinja2xlsx/config.pyPK!im ''( jinja2xlsx/image.pyPK!Pjinja2xlsx/parse.pyPK!N N jinja2xlsx/render.pyPK!Q_#'66jinja2xlsx/style.pyPK!xD  f:jinja2xlsx/testing_utils.pyPK!x<jinja2xlsx/utils.pyPK!HڽTU Djinja2xlsx-0.3.1.dist-info/WHEELPK!HFx/I#WEjinja2xlsx-0.3.1.dist-info/METADATAPK!HM*X!Hjinja2xlsx-0.3.1.dist-info/RECORDPK ^K