#! /usr/bin/env python # A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96) # Reads a text file given on standard input or named as first argument, and # generates HTML 2.0 on standard output. Recognizes these constructions: # # HTML element pattern at the beginning of a line # # section heading ()+ # numbered list element <1-2 spaces>()+ # unnumbered list element <0-2 spaces> # preformatted section # # Heading level is determined by the number of () segments. # Blank lines force a separation of elements; if none of the above four # types is indicated, a new paragraph begins. A line beginning with many # spaces is interpreted as a continuation (instead of preformatted) after # a list element. Headings are anchored; paragraphs starting with "Q." are # emphasized, and those marked with "A." get their first sentence emphasized. # # Hyperlinks are created from references to: # URLs, explicitly marked using # other questions, of the form "question ()*" # sections, of the form "section ". import sys, string, regex, regsub, regex_syntax regex.set_syntax(regex_syntax.RE_SYNTAX_AWK) # --------------------------------------------------------- regular expressions orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +') itemprog = regex.compile(' ? ?[-*] +') headingprog = regex.compile('([1-9][0-9]*\.)+ +') prefmtprog = regex.compile(' ') blankprog = regex.compile('^[ \t\r\n]$') questionprog = regex.compile(' *Q\. +') answerprog = regex.compile(' *A\. +') sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)') mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To' '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold) urlprog = regex.compile('<URL:([^&]+)>') addrprog = regex.compile('<([^>@:]+@[^&@:]+)>') qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)') srefprog = regex.compile('section +([1-9][0-9]*)') entityprog = regex.compile('[&<>]') # ------------------------------------------------------------ global variables body = [] ollev = ullev = 0 element = content = secnum = version = '' # ----------------------------------------------------- for making nested lists def dnol(): global body, ollev ollev = ollev + 1 if body[-1] == '': del body[-1] body.append('
    ') def upol(): global body, ollev ollev = ollev - 1 body.append(ollev and '
' or '') # --------------------------------- output one element and convert its contents def spew(clearol=0, clearul=0): global content, body, ollev, ullev if content: if entityprog.search(content) > -1: content = regsub.gsub('&', '&', content) content = regsub.gsub('<', '<', content) content = regsub.gsub('>', '>', content) n = questionprog.match(content) if n > 0: content = '' + content[n:] + '' if ollev: # question reference in index fragid = regsub.gsub('^ +|\.? +$', '', secnum) content = '%s' % (fragid, content) if element[0] == 'h': # heading in the main text fragid = regsub.gsub('^ +|\.? +$', '', secnum) content = secnum + '%s' % (fragid, content) n = answerprog.match(content) if n > 0: # answer paragraph content = regsub.sub(sentprog, '\\1', content[n:]) body.append('<' + element + '>' + content) body.append('') content = '' while clearol and ollev: upol() if clearul and ullev: body.append(''); ullev = 0 # ---------------------------------------------------------------- main program faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin lines = faq.readlines() for line in lines: if line[2:9] == '=======': #
will appear *before* body.append('
') # the underlined heading continue n = orditemprog.match(line) if n > 0: # make ordered list item spew(0, 'clear ul') secnum = line[:n] level = string.count(secnum, '.') while level > ollev: dnol() while level < ollev: upol() element, content = 'li', line[n:] continue n = itemprog.match(line) if n > 0: # make unordered list item spew('clear ol', 0) if ullev == 0: body.append('
    '); ullev = 1 element, content = 'li', line[n:] continue n = headingprog.match(line) if n > 0: # make heading element spew('clear ol', 'clear ul') secnum = line[:n] sys.stderr.write(line) element, content = 'h%d' % string.count(secnum, '.'), line[n:] continue n = 0 if not secnum: # haven't hit body yet n = mailhdrprog.match(line) v = version and -1 or regex.match('Version: ', line) if v > 0 and not version: version = line[v:] if n <= 0 and element != 'li': # not pre if after a list item n = prefmtprog.match(line) if n > 0: # make preformatted element if element == 'pre': content = content + line else: spew('clear ol', 'clear ul') element, content = 'pre', line continue if blankprog.match(line) > 0: # force a new element spew() element = '' elif element: # continue current element content = content + line else: # no element; make paragraph spew('clear ol', 'clear ul') element, content = 'p', line spew() # output last element body = string.joinfields(body, '') body = regsub.gsub(urlprog, '\\1', body) body = regsub.gsub(addrprog, '\\1', body) body = regsub.gsub(qrefprog, 'question \\1', body) body = regsub.gsub(srefprog, 'section \\1', body) print '' print 'Python Frequently-Asked Questions v' + version print "(This file was generated using Ping's" print 'faq2html.py.)' print body + ''