#!/usr/bin/env python # -*- coding: utf-8 -*- # Time-stamp: <2008-06-23 22:21:26 ah> """ Provide an encoder for a font specification configuration: the encoder is fed with Unicode characters one by one and determines the needed font switches between the preceding and the current character. """ import re import xml.dom.minidom import logging def _indent(string, width=2): """Indent the lines by blank characters.""" istr = ' ' * width s = istr + istr.join(string.splitlines(1)) return s class UnicodeInterval: """Unicode codepoint interval, including all codepoints between its minimum and maximum boundary. For any Unicode codepoint it can be queried if it belongs to the interval. """ # Internal data attributes: # _min_boundary: Minimum boundary of the codepoint interval (ordinal) # _max_boundary: Maximum boundary of the codepoint interval (ordinal) _re_codepoint = re.compile(r'^[Uu]\+?([0-9A-Fa-f]+)$') def __init__(self): self._min_boundary = 0 self._max_boundary = 0 def __str__(self): """Dump the instance's data attributes.""" string = '[' + str(self._min_boundary) if self._max_boundary != self._min_boundary: string += ',' + str(self._max_boundary) string += ']' return string def _unicode_to_ordinal(self, codepoint): """Return the ordinal of the specified codepoint.""" m = self._re_codepoint.match(codepoint) if m: return int(m.group(1), 16) else: raise RuntimeError, 'Not a unicode codepoint: ' + codepoint def from_char(self, char): """Interval for a single character""" self._min_boundary = ord(char) self._max_boundary = self._min_boundary return self def from_codepoint(self, codepoint): """Interval for a single character defined as unicode string.""" self._min_boundary = self._unicode_to_ordinal(codepoint) self._max_boundary = self._min_boundary return self def from_interval(self, codepoint1, codepoint2): """Interval from a unicode range.""" self._min_boundary = self._unicode_to_ordinal(codepoint1) self._max_boundary = self._unicode_to_ordinal(codepoint2) if self._min_boundary > self._max_boundary: self._min_boundary, self._max_boundary = \ self._max_boundary, self._min_boundary return self def contains(self, char): """ Determine whether the specified character is contained in this instance's interval. """ #print "%d in [%d - %d]?" % (ord(char), self._min_boundary,self._max_boundary) return (ord(char) >= self._min_boundary and ord(char) <= self._max_boundary) class FontSpec: """ Font specification, consisting of one or several unicode character intervals and of fonts to select for those characters. The object fully defines the fonts to switch to. """ # Internal data attributes: # _intervals: UnicodeInterval list transition_types = ['enter', 'inter', 'exit'] _re_interval = re.compile(r'^([Uu][0-9A-Fa-f]+)-([Uu][0-9A-Fa-f]+)$') _re_codepoint = re.compile(r'^([Uu][0-9A-Fa-f]+)$') def __init__(self, intervals=None, subfont_first=False): """Create a font specification from the specified codepoint intervals. The other data attributes will be set by the caller later. """ self.type = "" self.id = None self.refmode = None self.transitions = {} self.fontspecs = [self] self.subfont_first = subfont_first self._ignored = [] self.log = logging.getLogger("dblatex") for type in self.transition_types: self.transitions[type] = {} if not(intervals): self._intervals = [] return try: self._intervals = list(intervals) except TypeError: self._intervals = [intervals] def fromnode(self, node): range = node.getAttribute('range') charset = node.getAttribute('charset') id = node.getAttribute('id') refmode = node.getAttribute('refmode') self.type = node.getAttribute('type') if (range): self._intervals = self._parse_range(range) elif (charset): for char in charset: self.add_char(char) # Unique identifier if (id): self.id = id if (refmode): self.refmode = refmode for transition_type in self.transition_types: self._parse_transitions(node, transition_type) def mainfont(self): # Try to return the most representative font of this spec return (self.transitions["enter"].get("main") or self.transitions["enter"].get("sans")) def _parse_range(self, range): """Parse the specified /fonts/fontspec@range attribute to a UnicodeInterval list. """ #print range intervals = [] chunks = range.split() for chunk in chunks: m = self._re_interval.match(chunk) #print match if m: urange = UnicodeInterval().from_interval(m.group(1), m.group(2)) intervals.append(urange) else: m = self._re_codepoint.match(chunk) if m: intervals.append( UnicodeInterval().from_codepoint(m.group(1))) else: raise RuntimeError, 'Unable to parse range: "' + range + '"' return intervals def _parse_transitions(self, node, transition_type): """Evaluate the font elements of the specified fontspec element for the specified transition type (enter, inter or exit). """ fontlist = self.transitions[transition_type] for dom_transition in node.getElementsByTagName(transition_type): for dom_font in dom_transition.getElementsByTagName('font'): font = '' types = dom_font.getAttribute("type") types = types.split() for dom_child in dom_font.childNodes: if dom_child.nodeType == dom_child.TEXT_NODE: font += dom_child.nodeValue if (font): for type in types: fontlist[type] = font def _switch_to(self, fonts): """ Return a string with the XeTeX font switching commands for the specified font types. """ s = '' for type, font in fonts.items(): s += '\switch%sfont{%s}' % (type, font) if s: s = r"\savefamily" + s + r"\loadfamily{}" return s def enter(self): self.log.debug("enter in %s" % self.id) s = self._switch_to(self.transitions["enter"]) return s def exit(self): self.log.debug("exit from %s" % self.id) s = self._switch_to(self.transitions["exit"]) return s def interchar(self): s = self._switch_to(self.transitions["inter"]) return s def __str__(self): """Dump the instance's data attributes.""" string = 'FontSpec:' string += '\n Id: %s' % self.id string += '\n Refmode: %s' % self.refmode string += '\n subFirst: %s' % self.subfont_first for interval in self._intervals: string += '\n' + _indent(str(interval)) return string def add_subfont(self, fontspec): self.log.debug("%s -> %s" % (self.id, fontspec.id)) if self.subfont_first: self.fontspecs.insert(-1, fontspec) else: self.fontspecs.append(fontspec) def add_char(self, char): self._intervals.append(UnicodeInterval().from_char(char)) def add_uranges(self, ranges, depth=1): # Recursively extend the supported character range if depth: for f in self.fontspecs: if f != self: f.add_uranges(ranges) self._intervals.extend(ranges) def add_ignored(self, ranges, depth=1): if depth: for f in self.fontspecs: if f != self: f.add_ignored(ranges) self._ignored.extend(ranges) def get_uranges(self): return self._intervals def contains(self, char): #print "%s: %s" % (self.id, self._intervals) for interval in self._intervals: if interval.contains(char): return True else: return False def isignored(self, char): self.log.debug("%s: %s" % (self.id, [ str(a) for a in self._ignored ])) for interval in self._ignored: if interval.contains(char): return True else: return False def _loghas(self, id, char): try: self.log.debug("%s has '%s'" % (id, str(char))) except: self.log.debug("%s has '%s'" % (id, ord(char))) def match(self, char, excluded=None): """Determine whether the font specification matches the specified object, thereby considering refmode. """ fontspec = None self.log.debug( "Lookup in %s" % self.id) if self.isignored(char): self._loghas(self.id, char) return self for fontspec in self.fontspecs: # Don't waste time in scanning excluded nodes if fontspec == excluded: continue #print " Look in %s" % fontspec.id if fontspec.contains(char): self._loghas(fontspec.id, char) return fontspec return None