Commit 18c1b8d1 authored by Matthieu Boileau's avatar Matthieu Boileau

Merge branch 'dev-rst' into 'master'

Dev rst

See merge request !2
parents d90d74b8 a3c09591
......@@ -27,11 +27,13 @@ SKIP_REASON = {"skip_rub": "belonging to a skipped rubrique",
"unpub": "not published"}
SHORTEN = {'article': 'art', 'rubrique': 'rub', 'breve': 'brev', 'message': 'mess'}
CONTENTDIR = 'content'
LOGFILE = 'spip2pelican.log'
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# create file handler which logs up to info messages
fh = logging.FileHandler('output.log', mode='w')
fh = logging.FileHandler(LOGFILE, mode='w')
fh.setLevel(logging.INFO)
# create console handler with higher log level and colored output
......@@ -47,9 +49,9 @@ logger.addHandler(ch)
class SpipToMarkdown:
"""A class to convert a Spip article string into a markdown Pelican article string"""
def __init__(self, website):
"""A generic class to export spip format to a markup language"""
self.website = website
def convert(self, s):
......@@ -295,20 +297,22 @@ class SpipToMarkdown:
class SpipToRst(SpipToMarkdown):
"""A class to export spip article format to a ReStructuredText Pelican article"""
def convert(self, s, preserve_line_breaks=False):
"""Apply a pandoc conversion to markdown format"""
s = super().convert(s)
if preserve_line_breaks:
extra_args = ['--wrap=preserve']
else:
extra_args = ['--wrap=auto']
s = pypandoc.convert_text(s, 'rst', format='md', extra_args=extra_args)
s = re.sub(r"\%7Bfilename\%7D", r"{filename}", s) # Correct unwanted pandoc translation
s = re.sub(r"%7Bfilename%7D", r"{filename}", s) # Correct unwanted pandoc translation
return s
class Article:
"""A single Spip article or rubrique to be converted into Pelican"""
"""A generic class for a single Spip article or rubrique to be converted into a Pelican article file"""
def __init__(self, spip_article, spip_type, website):
......@@ -365,53 +369,17 @@ class Article:
self.tags = [self.type]
if self.website.ml_type == 'md':
# Instanciate a spip -> markdown translator
s2md = SpipToMarkdown(self.website)
self.convert = s2md.convert
def convert_title(title):
return self.convert(title).strip() # strip to remove any CR at end of string
self.convert_title = convert_title
self.get_header = self.get_header_markdown
elif self.website.ml_type == 'rst':
# Instanciate a spip -> rst translator
s2rst = SpipToRst(self.website)
self.convert = s2rst.convert
def convert_title(title):
return self.convert(title, preserve_line_breaks=True).strip()
self.convert_title = convert_title
self.get_header = self.get_header_rst
else:
exit(f'Unknown markup language type: {self.website.ml_type}!')
self.convert = self.get_converter()
def get_header_markdown(self):
header = f"""\
title: {self.title}
date: {self.date}
modified: {self.modified}
category: {self.category}
tags: {self.tags}
slug: {self.prefix}
authors: {self.authors}
summary: {self.summary}
def get_header(self):
pass
"""
return header
def get_converter(self):
pass
def get_header_rst(self):
print(self.title)
title = f"{self.title}\n{'#'*len(self.title)}\n\n"
header = title + f"""\
:date: {self.date}
:modified: {self.modified}
:category: {self.category}
:tags: {self.tags}
:slug: {self.prefix}
:authors: {self.authors}
:summary: {self.summary}
"""
return header
def convert_title(self, title):
"""Article title needs special conversion"""
pass
def export_to_pelican(self):
"""
......@@ -437,7 +405,7 @@ summary: {self.summary}
content = ftfy.fix_encoding(self.convert(self.text))
markdown = self.get_header() + content
export_path = os.path.join("content", self.path)
export_path = os.path.join(CONTENTDIR, self.path)
with open(export_path, 'w') as f:
f.write(markdown)
logger.debug(f" --> {export_path}")
......@@ -445,8 +413,65 @@ summary: {self.summary}
return self.skip_reason
class ArticleMd(Article):
"""A single Spip article or rubrique to be converted into Pelican"""
def get_converter(self):
"""Return a Markdown converter"""
s2ml = SpipToMarkdown(self.website) # Instanciate a spip -> markdown translator
return s2ml.convert
def convert_title(self, title):
"""strip to remove any linke break at end of string"""
return self.convert(title).strip()
def get_header(self):
"""Return header in md format"""
header = f"""\
title: {self.title}
date: {self.date}
modified: {self.modified}
category: {self.category}
tags: {self.tags}
slug: {self.prefix}
authors: {self.authors}
summary: {self.summary}
"""
return header
class ArticleRst(Article):
"""A single Spip article or rubrique to be converted into Pelican"""
def get_converter(self):
"""Return a Markdown converter"""
s2ml = SpipToRst(self.website) # Instanciate a spip -> rst translator
return s2ml.convert
def convert_title(self, title):
"""Prevent line breaks when converting title"""
return self.convert(title, preserve_line_breaks=True).strip()
def get_header(self):
"""Return header in rst format"""
print(self.title)
title = f"{self.title}\n{'#'*len(self.title)}\n\n"
header = title + f"""\
:date: {self.date}
:modified: {self.modified}
:category: {self.category}
:tags: {self.tags}
:slug: {self.prefix}
:authors: {self.authors}
:summary: {self.summary}
"""
return header
class Website:
"""Define a website from Spip data"""
"""Define a website content from Spip data"""
DEFAULT_AUTHOR = "Webmaster"
ATTACHMENTS_PREFIX = "attachments/spip/"
......@@ -463,6 +488,7 @@ class Website:
self.reset_output_dir = reset_output_dir
self.include_breves = include_breves
self.ml_type = ml_type
self.rubrique_to_category = {}
self.article_index = {}
self.doc_index = {}
......@@ -518,13 +544,20 @@ class Website:
if self.include_breves:
self.categories['breves'] = get_categories('breves')
if self.ml_type == 'md':
self.Article = ArticleMd
elif self.ml_type == 'rst':
self.Article = ArticleRst
else:
exit(f'Unknown markup language type: {self.ml_type}.')
def _reset_output_directories(self):
"""Erase existing output files and create empty output directories"""
if os.path.exists("content"):
shutil.rmtree("content")
if os.path.exists(CONTENTDIR):
shutil.rmtree(CONTENTDIR)
for category in set(self.categories['rubriques'].values()):
if category != 'skip':
os.makedirs(os.path.join("content", category))
os.makedirs(os.path.join(CONTENTDIR, category))
def _build_rubrique_to_category(self):
"""Build the index dictionary: {id_rubrique: category_name}"""
......@@ -631,7 +664,7 @@ class Website:
with open(filename, mode='r') as yml_file:
spip_articles = yaml.load(yml_file)
for spip_article in spip_articles:
article = Article(spip_article, spip_type, self)
article = self.Article(spip_article, spip_type, self)
self.articles.append(article)
if not article.skip_reason:
self.article_index[article.id] = article.path
......@@ -672,7 +705,7 @@ class Website:
logger.warning(f" {processed.count(SKIP_REASON[k])} skipped articles because {SKIP_REASON[k]}")
logger.warning(f" {self.nullified_urls} nullified URLs")
logger.info("-------")
logger.debug("See ouput.log")
logger.debug(f"See {LOGFILE}")
def parse_cl_args():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment