, attributes dict
self.tr_index = None # row index
self.theader = None # table header cells
self.tfooter = None # table footer cells
self.theader_out = self.tfooter_out = False
self.table_row_height = 0
self.heading_level = None
self.heading_sizes = dict(**DEFAULT_HEADING_SIZES)
if heading_sizes:
self.heading_sizes.update(heading_sizes)
self._only_imgs_in_td = False
def width2unit(self, length):
"Handle conversion of % measures into the measurement unit used"
if length[-1] == "%":
total = self.pdf.w - self.pdf.r_margin - self.pdf.l_margin
if self.table["width"][-1] == "%":
total *= int(self.table["width"][:-1]) / 100
return int(length[:-1]) * total / 100
return int(length)
def handle_data(self, data):
if self.td is not None: # drawing a table?
self._insert_td(data)
elif self.table is not None:
# ignore anything else than td inside a table
pass
elif self.align:
LOGGER.debug("align '%s'", data.replace("\n", "\\n"))
self.pdf.multi_cell(
0,
self.h,
data,
border=0,
ln=1,
align=self.align[0].upper(),
link=self.href,
)
else:
data = data.replace("\n", " ")
if self.href:
self.put_link(data)
else:
if self.heading_level:
self.pdf.start_section(data, self.heading_level - 1)
LOGGER.debug("write '%s' h=%d", data.replace("\n", "\\n"), self.h)
self.pdf.write(self.h, data)
def _insert_td(self, data=""):
self._only_imgs_in_td = False
width = self._td_width()
height = int(self.td.get("height", 0)) // 4 or self.h * 1.30
if not self.table_row_height:
self.table_row_height = height
elif self.table_row_height > height:
height = self.table_row_height
border = int(self.table.get("border", 0))
if self.th:
self.set_style("B", True)
border = border or "B"
align = self.td.get("align", "C")[0].upper()
else:
align = self.td.get("align", "L")[0].upper()
border = border and "LR"
bgcolor = color_as_decimal(self.td.get("bgcolor", self.tr.get("bgcolor", "")))
# parsing table header/footer (drawn later):
if self.thead is not None:
self.theader.append(((width, height, data, border, 0, align), bgcolor))
if self.tfoot is not None:
self.tfooter.append(((width, height, data, border, 0, align), bgcolor))
# check if reached end of page, add table footer and header:
if self.tfooter:
height += self.tfooter[0][0][1]
if self.pdf.y + height > self.pdf.page_break_trigger and not self.th:
self.output_table_footer()
self.pdf.add_page(same=True)
self.theader_out = self.tfooter_out = False
if self.tfoot is None and self.thead is None:
if not self.theader_out:
self.output_table_header()
self.box_shadow(width, height, bgcolor)
# self.pdf.x may have shifted due to
inside :
self.pdf.set_x(self._td_x())
LOGGER.debug(
"td cell x=%d width=%d height=%d border=%s align=%s '%s'",
self.pdf.x,
width,
height,
border,
align,
data.replace("\n", "\\n"),
)
self.pdf.cell(width, height, data, border=border, ln=0, align=align)
def _td_x(self):
"Return the current table cell left side horizontal position"
prev_cells_total_width = sum(
self.width2unit(width)
for width in self.table_col_width[: self.table_col_index]
)
return self.table_offset + prev_cells_total_width
def _td_width(self):
"Return the current table cell width"
# pylint: disable=raise-missing-from
if "width" in self.td:
column_widths = [self.td["width"]]
elif "colspan" in self.td:
i = self.table_col_index
colspan = int(self.td["colspan"])
column_widths = self.table_col_width[i : i + colspan]
else:
try:
column_widths = [self.table_col_width[self.table_col_index]]
except IndexError:
raise ValueError(
f"Width not specified for table column {self.table_col_index},"
" unable to continue"
)
return sum(self.width2unit(width) for width in column_widths)
def box_shadow(self, w, h, bgcolor):
LOGGER.debug("box_shadow w=%d h=%d bgcolor=%s", w, h, bgcolor)
if bgcolor:
fill_color = self.pdf.fill_color
self.pdf.set_fill_color(*bgcolor)
self.pdf.rect(self.pdf.x, self.pdf.y, w, h, "F")
self.pdf.fill_color = fill_color
def output_table_header(self):
if self.theader:
b = self.style.get("b")
self.pdf.set_x(self.table_offset)
self.set_style("b", True)
for cell, bgcolor in self.theader:
self.box_shadow(cell[0], cell[1], bgcolor)
self.pdf.cell(*cell) # includes the border
self.set_style("b", b)
self.pdf.ln(self.theader[0][0][1])
self.pdf.set_x(self.table_offset)
# self.pdf.set_x(prev_x)
self.theader_out = True
def output_table_footer(self):
if self.tfooter:
x = self.pdf.x
self.pdf.set_x(self.table_offset)
for cell, bgcolor in self.tfooter:
self.box_shadow(cell[0], cell[1], bgcolor)
self.pdf.cell(*cell)
self.pdf.ln(self.tfooter[0][0][1])
self.pdf.set_x(x)
if self.table.get("border"):
self.output_table_sep()
self.tfooter_out = True
def output_table_sep(self):
x1 = self.pdf.x
y1 = self.pdf.y
width = sum(self.width2unit(length) for length in self.table_col_width)
self.pdf.line(x1, y1, x1 + width, y1)
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
LOGGER.debug("STARTTAG %s %s", tag, attrs)
if tag in ("b", "i", "u"):
self.set_style(tag, True)
if tag == "a":
self.href = attrs["href"]
if tag == "br":
self.pdf.ln(self.h)
if tag == "p":
self.pdf.ln(self.h)
if attrs:
self.align = attrs.get("align")
if tag in self.heading_sizes:
self.font_stack.append((self.font_face, self.font_size, self.font_color))
self.heading_level = int(tag[1:])
hsize = self.heading_sizes[tag]
self.pdf.set_text_color(150, 0, 0)
self.set_font(size=hsize)
self.pdf.ln(self.h)
if attrs:
self.align = attrs.get("align")
if tag == "hr":
self.pdf.add_page(same=True)
if tag == "pre":
self.font_stack.append((self.font_face, self.font_size, self.font_color))
self.set_font("courier", 11)
if tag == "blockquote":
self.pdf.set_text_color(100, 0, 45)
self.indent += 1
self.pdf.ln(3)
if tag == "ul":
self.indent += 1
self.bullet.append(self.ul_bullet_char)
if tag == "ol":
self.indent += 1
self.bullet.append(0)
if tag == "li":
self.pdf.ln(self.h + 2)
self.pdf.set_text_color(190, 0, 0)
bullet = self.bullet[self.indent - 1]
if not isinstance(bullet, str):
bullet += 1
self.bullet[self.indent - 1] = bullet
bullet = f"{bullet}. "
self.pdf.write(self.h, f"{' ' * self.li_tag_indent * self.indent}{bullet} ")
self.set_text_color(*self.font_color)
if tag == "font":
# save previous font state:
self.font_stack.append((self.font_face, self.font_size, self.font_color))
if "color" in attrs:
color = color_as_decimal(attrs["color"])
self.font_color = color
if "face" in attrs:
face = attrs.get("face").lower()
try:
self.pdf.set_font(face)
self.font_face = face
except RuntimeError:
pass # font not found, ignore
if "size" in attrs:
self.font_size = int(attrs.get("size"))
self.set_font()
self.set_text_color(*self.font_color)
if tag == "table":
self.table = {k.lower(): v for k, v in attrs.items()}
if "width" not in self.table:
self.table["width"] = "100%"
if self.table["width"][-1] == "%":
w = self.pdf.w - self.pdf.r_margin - self.pdf.l_margin
w *= int(self.table["width"][:-1]) / 100
self.table_offset = (self.pdf.w - w) / 2
self.table_col_width = []
self.theader_out = self.tfooter_out = False
self.theader = []
self.tfooter = []
self.thead = None
self.tfoot = None
self.pdf.ln()
if tag == "tr":
self.tr_index = 0 if self.tr_index is None else (self.tr_index + 1)
self.tr = {k.lower(): v for k, v in attrs.items()}
self.table_col_index = 0
self.table_row_height = 0
self.pdf.set_x(self.table_offset)
# Adding an horizontal line separator between rows:
if self.table_line_separators and self.tr_index > 0:
self.output_table_sep()
if tag == "td":
self.td = {k.lower(): v for k, v in attrs.items()}
if "width" in self.td and self.table_col_index >= len(self.table_col_width):
assert self.table_col_index == len(
self.table_col_width
), f"table_col_index={self.table_col_index} #table_col_width={len(self.table_col_width)}"
self.table_col_width.append(self.td["width"])
if attrs:
self.align = attrs.get("align")
self._only_imgs_in_td = False
if tag == "th":
self.td = {k.lower(): v for k, v in attrs.items()}
self.th = True
if "width" in self.td and self.table_col_index >= len(self.table_col_width):
assert self.table_col_index == len(
self.table_col_width
), f"table_col_index={self.table_col_index} #table_col_width={len(self.table_col_width)}"
self.table_col_width.append(self.td["width"])
if tag == "thead":
self.thead = {}
if tag == "tfoot":
self.tfoot = {}
if tag == "img" and "src" in attrs:
width = px2mm(attrs.get("width", 0))
height = px2mm(attrs.get("height", 0))
if self.pdf.y + height > self.pdf.page_break_trigger:
self.pdf.add_page(same=True)
y = self.pdf.get_y()
if self.table_col_index is not None:
self._only_imgs_in_td = True
# in a | : its width must not exceed the cell width:
td_width = self._td_width()
if not width or width > td_width:
if width: # Preserving image aspect ratio:
height *= td_width / width
width = td_width
x = self._td_x()
if self.align and self.align[0].upper() == "C":
x += (td_width - width) / 2
else:
x = self.pdf.get_x()
if self.align and self.align[0].upper() == "C":
x = self.pdf.w / 2 - width / 2
LOGGER.debug(
'image "%s" x=%d y=%d width=%d height=%d',
attrs["src"],
x,
y,
width,
height,
)
self.pdf.image(
self.image_map(attrs["src"]), x, y, width, height, link=self.href
)
self.pdf.set_x(x + width)
if self.table_col_index is not None:
# in a | : we grow the cell height according to the image height:
if height > self.table_row_height:
self.table_row_height = height
else:
self.pdf.set_y(y + height)
if tag in ("b", "i", "u"):
self.set_style(tag, True)
if tag == "center":
self.align = "Center"
if tag == "toc":
self.pdf.insert_toc_placeholder(
self.render_toc, pages=int(attrs.get("pages", 1))
)
def handle_endtag(self, tag):
# Closing tag
LOGGER.debug("ENDTAG %s", tag)
if tag in self.heading_sizes:
self.heading_level = None
face, size, color = self.font_stack.pop()
self.set_font(face, size)
self.set_text_color(*color)
self.pdf.ln(self.h)
self.align = None
if tag == "pre":
face, size, color = self.font_stack.pop()
self.set_font(face, size)
self.set_text_color(*color)
if tag == "blockquote":
self.set_text_color(*self.font_color)
self.indent -= 1
self.pdf.ln(3)
if tag == "strong":
tag = "b"
if tag == "em":
tag = "i"
if tag in ("b", "i", "u"):
self.set_style(tag, False)
if tag == "a":
self.href = ""
if tag == "p":
self.pdf.ln(self.h)
self.align = ""
if tag in ("ul", "ol"):
self.indent -= 1
self.bullet.pop()
if tag == "table":
if not self.tfooter_out:
self.output_table_footer()
self.table = None
self.th = False
self.theader = None
self.tfooter = None
self.pdf.ln(self.h)
self.tr_index = None
if tag == "thead":
self.thead = None
self.tr_index = None
if tag == "tfoot":
self.tfoot = None
self.tr_index = None
if tag == "tbody":
self.tbody = None
self.tr_index = None
if tag == "tr":
if self.tfoot is None:
self.pdf.ln(self.table_row_height)
self.table_col_index = None
self.tr = None
if tag in ("td", "th"):
if self.th:
LOGGER.debug("revert style")
self.set_style("b", False) # revert style
elif self._only_imgs_in_td:
self._insert_td()
self.table_col_index += int(self.td.get("colspan", "1"))
self.td = None
self.th = False
if tag == "font":
# recover last font state
face, size, color = self.font_stack.pop()
self.font_color = color
self.set_font(face, size)
self.set_text_color(*self.font_color)
if tag == "center":
self.align = None
def set_font(self, face=None, size=None):
if face:
self.font_face = face
if size:
self.font_size = size
self.h = size / 72 * 25.4
LOGGER.debug("H %s", self.h)
style = "".join(s for s in ("b", "i", "u") if self.style.get(s)).upper()
if (self.font_face, style) != (self.pdf.font_family, self.pdf.font_style):
self.pdf.set_font(self.font_face, style, self.font_size)
if self.font_size != self.pdf.font_size:
self.pdf.set_font_size(self.font_size)
def set_style(self, tag=None, enable=False):
# Modify style and select corresponding font
if tag:
self.style[tag.lower()] = enable
style = "".join(s for s in ("b", "i", "u") if self.style.get(s))
LOGGER.debug("SET_FONT_STYLE %s", style)
self.pdf.set_font(style=style)
def set_text_color(self, r=None, g=0, b=0):
self.pdf.set_text_color(r, g, b)
def put_link(self, txt):
# Put a hyperlink
self.set_text_color(0, 0, 255)
self.set_style("u", True)
self.pdf.write(self.h, txt, self.href)
self.set_style("u", False)
self.set_text_color(*self.font_color)
# pylint: disable=no-self-use
def render_toc(self, pdf, outline):
"This method can be overriden by subclasses to customize the Table of Contents style."
pdf.ln()
for section in outline:
link = pdf.add_link()
pdf.set_link(link, page=section.page_number)
text = f'{" " * section.level * 2} {section.name}'
text += f' {"." * (60 - section.level*2 - len(section.name))} {section.page_number}'
pdf.multi_cell(w=pdf.epw, h=pdf.font_size, txt=text, ln=1, link=link)
# Subclasses of _markupbase.ParserBase must implement this:
def error(self, message):
raise RuntimeError(message)
class HTMLMixin:
HTML2FPDF_CLASS = HTML2FPDF
def write_html(self, text, *args, **kwargs):
"""Parse HTML and convert it to PDF"""
kwargs2 = vars(self)
# Method arguments must override class & instance attributes:
kwargs2.update(kwargs)
h2p = self.HTML2FPDF_CLASS(self, *args, **kwargs2)
text = html.unescape(text) # To deal with HTML entities
h2p.feed(text)
|