| 1 |
""" |
| 2 |
Table of Contents Extension for Python-Markdown |
| 3 |
* * * |
| 4 |
|
| 5 |
(c) 2008 [Jack Miller](http://codezen.org) |
| 6 |
|
| 7 |
Dependencies: |
| 8 |
* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) |
| 9 |
|
| 10 |
""" |
| 11 |
import markdown |
| 12 |
from markdown import etree |
| 13 |
import re |
| 14 |
|
| 15 |
class TocTreeprocessor(markdown.treeprocessors.Treeprocessor): |
| 16 |
# Iterator wrapper to get parent and child all at once |
| 17 |
def iterparent(self, root): |
| 18 |
for parent in root.getiterator(): |
| 19 |
for child in parent: |
| 20 |
yield parent, child |
| 21 |
|
| 22 |
def run(self, doc): |
| 23 |
div = etree.Element("div") |
| 24 |
div.attrib["class"] = "toc" |
| 25 |
last_li = None |
| 26 |
|
| 27 |
# Add title to the div |
| 28 |
if self.config["title"][0]: |
| 29 |
header = etree.SubElement(div, "span") |
| 30 |
header.attrib["class"] = "toctitle" |
| 31 |
header.text = self.config["title"][0] |
| 32 |
|
| 33 |
level = 0 |
| 34 |
list_stack=[div] |
| 35 |
header_rgx = re.compile("[Hh][123456]") |
| 36 |
|
| 37 |
# Get a list of id attributes |
| 38 |
used_ids = [] |
| 39 |
for c in doc.getiterator(): |
| 40 |
if "id" in c.attrib: |
| 41 |
used_ids.append(c.attrib["id"]) |
| 42 |
|
| 43 |
for (p, c) in self.iterparent(doc): |
| 44 |
if not c.text: |
| 45 |
continue |
| 46 |
|
| 47 |
# To keep the output from screwing up the |
| 48 |
# validation by putting a <div> inside of a <p> |
| 49 |
# we actually replace the <p> in its entirety. |
| 50 |
# We do not allow the marker inside a header as that |
| 51 |
# would causes an enless loop of placing a new TOC |
| 52 |
# inside previously generated TOC. |
| 53 |
|
| 54 |
if c.text.find(self.config["marker"][0]) > -1 and not header_rgx.match(c.tag): |
| 55 |
for i in range(len(p)): |
| 56 |
if p[i] == c: |
| 57 |
p[i] = div |
| 58 |
break |
| 59 |
|
| 60 |
if header_rgx.match(c.tag): |
| 61 |
tag_level = int(c.tag[-1]) |
| 62 |
|
| 63 |
while tag_level < level: |
| 64 |
list_stack.pop() |
| 65 |
level -= 1 |
| 66 |
|
| 67 |
if tag_level > level: |
| 68 |
newlist = etree.Element("ul") |
| 69 |
if last_li: |
| 70 |
last_li.append(newlist) |
| 71 |
else: |
| 72 |
list_stack[-1].append(newlist) |
| 73 |
list_stack.append(newlist) |
| 74 |
if level == 0: |
| 75 |
level = tag_level |
| 76 |
else: |
| 77 |
level += 1 |
| 78 |
|
| 79 |
# Do not override pre-existing ids |
| 80 |
if not "id" in c.attrib: |
| 81 |
id = self.config["slugify"][0](c.text) |
| 82 |
if id in used_ids: |
| 83 |
ctr = 1 |
| 84 |
while "%s_%d" % (id, ctr) in used_ids: |
| 85 |
ctr += 1 |
| 86 |
id = "%s_%d" % (id, ctr) |
| 87 |
used_ids.append(id) |
| 88 |
c.attrib["id"] = id |
| 89 |
else: |
| 90 |
id = c.attrib["id"] |
| 91 |
|
| 92 |
# List item link, to be inserted into the toc div |
| 93 |
last_li = etree.Element("li") |
| 94 |
link = etree.SubElement(last_li, "a") |
| 95 |
link.text = c.text |
| 96 |
link.attrib["href"] = '#' + id |
| 97 |
|
| 98 |
if int(self.config["anchorlink"][0]): |
| 99 |
anchor = etree.SubElement(c, "a") |
| 100 |
anchor.text = c.text |
| 101 |
anchor.attrib["href"] = "#" + id |
| 102 |
anchor.attrib["class"] = "toclink" |
| 103 |
c.text = "" |
| 104 |
|
| 105 |
list_stack[-1].append(last_li) |
| 106 |
|
| 107 |
class TocExtension(markdown.Extension): |
| 108 |
def __init__(self, configs): |
| 109 |
self.config = { "marker" : ["[TOC]", |
| 110 |
"Text to find and replace with Table of Contents -" |
| 111 |
"Defaults to \"[TOC]\""], |
| 112 |
"slugify" : [self.slugify, |
| 113 |
"Function to generate anchors based on header text-" |
| 114 |
"Defaults to a built in slugify function."], |
| 115 |
"title" : [None, |
| 116 |
"Title to insert into TOC <div> - " |
| 117 |
"Defaults to None"], |
| 118 |
"anchorlink" : [0, |
| 119 |
"1 if header should be a self link" |
| 120 |
"Defaults to 0"]} |
| 121 |
|
| 122 |
for key, value in configs: |
| 123 |
self.setConfig(key, value) |
| 124 |
|
| 125 |
# This is exactly the same as Django's slugify |
| 126 |
def slugify(self, value): |
| 127 |
""" Slugify a string, to make it URL friendly. """ |
| 128 |
import unicodedata |
| 129 |
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') |
| 130 |
value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) |
| 131 |
return re.sub('[-\s]+','-',value) |
| 132 |
|
| 133 |
def extendMarkdown(self, md, md_globals): |
| 134 |
tocext = TocTreeprocessor(md) |
| 135 |
tocext.config = self.config |
| 136 |
md.treeprocessors.add("toc", tocext, "_begin") |
| 137 |
|
| 138 |
def makeExtension(configs={}): |
| 139 |
return TocExtension(configs=configs) |