Commit a3c09591 authored by Matthieu Boileau's avatar Matthieu Boileau

Minor code corrections

parent 7d0130c3
......@@ -27,11 +27,13 @@ SKIP_REASON = {"skip_rub": "belonging to a skipped rubrique",
"unpub": "not published"}
SHORTEN = {'article': 'art', 'rubrique': 'rub', 'breve': 'brev', 'message': 'mess'}
CONTENTDIR = 'content'
LOGFILE = 'spip2pelican.log'
logger = logging.getLogger(__name__)
# create file handler which logs up to info messages
fh = logging.FileHandler('output.log', mode='w')
fh = logging.FileHandler(LOGFILE, mode='w')
# create console handler with higher log level and colored output
......@@ -47,7 +49,7 @@ logger.addHandler(ch)
class SpipToMarkdown:
"""A class to export spip article format to a markdown Pelican article"""
"""A class to convert a Spip article string into a markdown Pelican article string"""
def __init__(self, website): = website
......@@ -305,7 +307,7 @@ class SpipToRst(SpipToMarkdown):
extra_args = ['--wrap=auto']
s = pypandoc.convert_text(s, 'rst', format='md', extra_args=extra_args)
s = re.sub(r"\%7Bfilename\%7D", r"{filename}", s) # Correct unwanted pandoc translation
s = re.sub(r"%7Bfilename%7D", r"{filename}", s) # Correct unwanted pandoc translation
return s
......@@ -367,7 +369,6 @@ class Article:
self.tags = [self.type]
s2md = SpipToMarkdown(
self.convert = self.get_converter()
def get_header(self):
......@@ -376,6 +377,10 @@ class Article:
def get_converter(self):
def convert_title(self, title):
"""Article title needs special conversion"""
def export_to_pelican(self):
Content of a markdown article should look like:
......@@ -400,7 +405,7 @@ class Article:
content = ftfy.fix_encoding(self.convert(self.text))
markdown = self.get_header() + content
export_path = os.path.join("content", self.path)
export_path = os.path.join(CONTENTDIR, self.path)
with open(export_path, 'w') as f:
logger.debug(f" --> {export_path}")
......@@ -417,9 +422,11 @@ class ArticleMd(Article):
return s2ml.convert
def convert_title(self, title):
return self.convert(title).strip() # strip to remove any CR at end of string
"""strip to remove any linke break at end of string"""
return self.convert(title).strip()
def get_header(self):
"""Return header in md format"""
header = f"""\
title: {self.title}
date: {}
......@@ -443,9 +450,11 @@ class ArticleRst(Article):
return s2ml.convert
def convert_title(self, title):
"""Prevent line breaks when converting title"""
return self.convert(title, preserve_line_breaks=True).strip()
def get_header(self):
"""Return header in rst format"""
title = f"{self.title}\n{'#'*len(self.title)}\n\n"
header = title + f"""\
......@@ -462,7 +471,7 @@ class ArticleRst(Article):
class Website:
"""Define a website from Spip data"""
"""Define a website content from Spip data"""
DEFAULT_AUTHOR = "Webmaster"
ATTACHMENTS_PREFIX = "attachments/spip/"
......@@ -540,15 +549,15 @@ class Website:
elif self.ml_type == 'rst':
self.Article = ArticleRst
exit(f'Unknown markup language type: {self.ml_type}!')
exit(f'Unknown markup language type: {self.ml_type}.')
def _reset_output_directories(self):
"""Erase existing output files and create empty output directories"""
if os.path.exists("content"):
if os.path.exists(CONTENTDIR):
for category in set(self.categories['rubriques'].values()):
if category != 'skip':
os.makedirs(os.path.join("content", category))
os.makedirs(os.path.join(CONTENTDIR, category))
def _build_rubrique_to_category(self):
"""Build the index dictionary: {id_rubrique: category_name}"""
......@@ -696,7 +705,7 @@ class Website:
logger.warning(f" {processed.count(SKIP_REASON[k])} skipped articles because {SKIP_REASON[k]}")
logger.warning(f" {self.nullified_urls} nullified URLs")"-------")
logger.debug("See ouput.log")
logger.debug(f"See {LOGFILE}")
def parse_cl_args():
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment