# This file is part of Rubber and thus covered by the GPL # (c) Emmanuel Beffara, 2002--2006 """ LaTeX document building system for Rubber. This module defines the class that parses the input LaTeX to load the expected modules. """ import re class TexParser: re_input = re.compile("\\\\input +(?P[^{} \n\\\\]+)") re_comment = re.compile(r"(?P([^\\%]|\\%|\\)*)(%.*)?") def __init__(self, doc): self.doc = doc self.comment_mark = "%" self.exclude_mods = [] self.hooks = { "usepackage" : self.h_usepackage, "begin{btSect}": self.h_bibtopic, } self.update_rehooks() def update_rehooks(self): """ Update the regular expression used to match macro calls using the keys in the `hook' dictionary. We don't match all control sequences for obvious efficiency reasons. """ # Make a "foo|bar\*stub" list hooklist = [x.replace("*", "\\*") for x in self.hooks] pattern = "\\\\(?P%s)\*?"\ " *(\\[(?P[^\\]]*)\\])?"\ " *({(?P[^{}]*)}|(?=[^A-Za-z]))" self.rehooks = re.compile(pattern % "|".join(hooklist)) def add_hook(self, name, fun): """ Register a given function to be called (with no arguments) when a given macro is found. """ self.hooks[name] = fun self.update_rehooks() def parse(self, fd, exclude_mods=None): """ Process a LaTeX source. The file must be open, it is read to the end calling the handlers for the macro calls. This recursively processes the included sources. If the optional argument 'dump' is not None, then it is considered as a stream on which all text not matched as a macro is written. """ self.exclude_mods = exclude_mods or [] self.lineno = 0 for line in fd: self.parse_line(line) def parse_line(self, line, dump=None): self.lineno += 1 # Remove comments line = self.re_comment.match(line).group("line") match = self.rehooks.search(line) while match: dict = match.groupdict() name = dict["name"] # The case of \input is handled specifically, because of the # TeX syntax with no braces if name == "input" and not dict["arg"]: match2 = self.re_input.search(line) if match2: match = match2 dict = match.groupdict() if dump: dump.write(line[:match.start()]) dict["match"] = line[match.start():match.end()] dict["line"] = line[match.end():] #dict["pos"] = { 'file': self.vars["file"], 'line': self.lineno } dict["pos"] = { 'file': "file", 'line': self.lineno } dict["dump"] = dump # if self.env.caching: # self.cache_list.append(("hook", name, dict)) self.hooks[name](dict) line = dict["line"] match = self.rehooks.search(line) if dump: dump.write(line) def h_usepackage(self, dict): """ Called when a \\usepackage macro is found. If there is a package in the directory of the source file, then it is treated as an include file unless there is a supporting module in the current directory, otherwise it is treated as a package. """ if not dict["arg"]: return for name in dict["arg"].split(","): name = name.strip() # file = self.env.find_file(name + ".sty") # if file and not exists(name + ".py"): # self.process(file) # else: if (name in self.exclude_mods): continue self.doc.modules.register(name, dict) def h_bibtopic(self, dict): """ Called when a \\btSect macro is found. It can also be loaded by a usepackage of bibtopic. Note that once loaded the btSect hook will be preempted by the bibtopic module hook. """ if ("bibtopic" in self.exclude_mods): return self.doc.modules.register("bibtopic", dict)