......@@ -5,6 +5,8 @@ Convert spip YAML content to markdown following
from anytree import Node, RenderTree
import argparse
from ruamel.yaml import YAML
import bs4
from colorlog import ColoredFormatter
......@@ -373,6 +375,49 @@ summary: {self.summary}
return self.skip_reason
class Tree(dict):
Simple Tree data structure from
Stores data in the form:
"a": {
"b": {},
"c": {},
"d": {
"e": {},
And can be nested to any depth.
def __missing__(self, key):
value = self[key] = type(self)()
return value
def insert(self, node, ancestors):
"""Insert the supplied node, creating all ancestors as required.
This expects a list (possibly empty) containing the ancestors,
and a value for the node.
if not ancestors:
self[ancestors[0]].insert(node, ancestors[1:])
def label(self, labels, sort_key=lambda x: x[0]):
"""Return a nested 2-tuple with just the supplied labels.
Realistically, the labels could be any type of object.
return sorted([
value.label(labels, sort_key)
) for key, value in self.items()
], key=sort_key)
class Website:
"""Define a website from Spip data"""
......@@ -445,7 +490,7 @@ class Website:
"""Return category from id_rubrique"""
while id_rubrique not in self.categories:
id_rubrique = parents[id_rubrique]
id_rubrique = self.parents[id_rubrique]
except KeyError:
id_rubrique = -1
......@@ -455,10 +500,63 @@ class Website:
with open(self.rubriques_filename, mode='r') as yml_file:
rubriques = yaml.load(
parents = {rubrique['id_rubrique']: rubrique['id_parent'] for rubrique in rubriques}
self.parents = {rubrique['id_rubrique']: rubrique['id_parent'] for rubrique in rubriques}
self.labels = {rubrique['id_rubrique']: rubrique['titre'] for rubrique in rubriques}
self.category_index = {rubrique['id_rubrique']: get_category(rubrique['id_rubrique'])
for rubrique in rubriques}
def print_rubrique_tree(self):
"""Print the rubrique structure as a tree using anytree"""
def get_label(node_id):
"""Render node label from node id"""
return f"{node_id}: {self.labels[node_id]}"
def walk(node, parent=None):
A recursive function to walk a Tree object in order to build an anytree.Node tree
node: a Tree object
parent: a Tree object
if len(node):
# node is a real Tree, not a leaf
name = list(node.keys())[0]
nodes[name] = Node(name, parent=parent)
children = list(node.values())[0]
for k, v in children.items():
walk({k: v}, nodes[name])
data = [] # a list of Tree nodes
tree = Tree() # a Tree object to store rubrique structure as a nested dict
self.labels[0] = "root" # Root node label
# Build ancestors list
for node_id in self.parents:
node = {}
label = get_label(node_id)
node['node'] = label
node['label'] = self.labels[node_id]
node['ancestors'] = []
current_id = node_id
while current_id != 0:
# Insert node if it is not the tree root
current_id = self.parents[current_id]
label = get_label(current_id)
node['ancestors'].insert(0, label)
for node in data:
tree.insert(node['node'], node['ancestors'])
# Create a dictionary of anytree.Node objects
nodes = {get_label(0): Node(get_label(0))}
# Render the anytree object like the bash "tree" command would do
for pre, fill, node in RenderTree(nodes['0: root']):
def _build_doc_index(self):
"""Build the index dictionary: {id_doc: file_path}"""
......@@ -489,6 +587,7 @@ class Website:
def add_articles(filename, spip_type):
"""Add article or rubrique to website.articles and website.article_index"""
# Load original article file as a list
with open(filename, mode='r') as yml_file:
spip_articles = yaml.load(yml_file)
......@@ -532,8 +631,20 @@ class Website:
logger.debug("See ouput.log")
if __name__ == '__main__':
def parse_cl_args():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description="Convert Spip website as YAML data to Pelican format")
parser.add_argument('-r', '--rubriques', action='store_true', help="Show Spip rubriques structure as a tree")
parser.add_argument('-c', '--convert', action='store_true', default=True, help="Convert to Pelican")
return parser.parse_args()
if __name__ == '__main__':
args = parse_cl_args()
website = Website()
if args.rubriques:
# Show only Spip rubrique structure
