Syntax highlighting for a blog / etc

Example Usage

{{=XML(__highlight__(blog_post.content))}}

Code - Place this in a model (functions.py)

def html_entity_decode(text):
    """
    Removes HTML or XML character references and entities from a text string.

    @param text The HTML (or XML) source text.
    @return The plain text, as a Unicode string, if necessary.
    """
    import re, htmlentitydefs
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text # leave as is
    return re.sub("&#?\w+;", fixup, text)

def __highlight__(content, dom_element='pre'):
    """
    Performs syntax highlighting on text inside of dom_element
    Uses BeautifulSoup for processing and pygments for highlighting

    @param content The HTML (or XML) content to parse
    @param dom_element The dom tag to search and replace with highlighted
    @return The content with highlighted code withing dom_element

    """
    from pygments import highlight
    from pygments.lexers import get_lexer_by_name
    from pygments.formatters import HtmlFormatter
    from BeautifulSoup import BeautifulSoup

    soup = BeautifulSoup(html_entity_decode(content))

    formatter = HtmlFormatter(linenos=True, noclasses=True)

    for tag in soup.findAll(dom_element):
        language = tag.get('lang')
        lexer = get_lexer_by_name(language, encoding='UTF-8')
        tag.replaceWith(highlight(tag.renderContents(), lexer, formatter))
        pass
    return unicode(soup)

Comments (0)