Commit 1f8c7fac authored by Matthieu Boileau's avatar Matthieu Boileau

Some corrections for IRMA's website

parent c226b807
Pipeline #6819 failed with stage
in 17 seconds
......@@ -8,6 +8,7 @@ https://github.com/nhoizey/spip2markdown/blob/master/spip2markdown_options.php
import anytree
import argparse
from ruamel.yaml import YAML
import yaml as pyyaml
import bs4
from bs4 import BeautifulSoup
from colorlog import ColoredFormatter
......@@ -19,6 +20,7 @@ import re
import shutil
import sys
from process_yaml import strip_invalid, remove_null_date
from pathlib import Path
yaml = YAML(typ='safe')
......@@ -150,11 +152,13 @@ class Article:
else:
self.title = self.convert_title(self.title)
content = ftfy.fix_encoding(self.convert(self.text))
markdown = self.get_header() + content
fullcontent = self.get_header() + content
export_path = os.path.join(CONTENTDIR, self.path)
# Create destination directory if not exists
Path(export_path).parent.mkdir(parents=True, exist_ok=True)
with open(export_path, 'w') as f:
f.write(markdown)
f.write(fullcontent)
logger.debug(f" --> {export_path}")
return self.skip_reason
......@@ -195,8 +199,18 @@ summary: {self.summary}
s = self.header_extended(s)
s = self.horizontal_rule(s)
s = self.link(s)
s = s.strip('"\n') # because single quotes might have been added to escape some yaml fields
s = s.replace(r'\n', '\n')
s = s.replace(r'\r', '\r')
return s
def nullify_document(self, doc_id):
"""Throw WARNING message and return empty URL"""
msg = f" WARNING: nullify link to non existing document {doc_id}"
logger.warning(msg)
self.website.nullified_docs += 1
return ""
def document(self, s):
"""
SPIP: <doc|path> or <img|path>
......@@ -209,15 +223,14 @@ summary: {self.summary}
try:
doc_path = self.website.doc_index[doc_id]
url = os.path.join(self.website.attachments_prefix, "IMG",
self.website.doc_index[doc_id])
self.website.doc_index[doc_id])
docname = os.path.basename(url)
if doc_type == 'doc':
return f"[{docname}]({url})"
else:
return f"![{docname}]({url})"
except KeyError:
logger.warning(f"Missing document {doc_id}")
return ''
return self.nullify_document(doc_id)
return re.sub(r'<(doc|img)([0-9]+)\|.*>', doc_replace, s)
......@@ -484,16 +497,18 @@ class ArticleRst(Article):
def doc_rst(match):
doc_type = match[1]
doc_id = int(match[2])
url = os.path.join(self.website.attachments_prefix,
"IMG",
self.website.doc_index[doc_id])
docname = os.path.basename(url)
if doc_type == 'doc':
return f'`{docname} <{url}>`__'
else:
return f'\n\n..image:: {url}\n\n'
try:
doc_id = int(match[2])
url = os.path.join(self.website.attachments_prefix,
"IMG",
self.website.doc_index[doc_id])
docname = os.path.basename(url)
if doc_type == 'doc':
return f'`{docname} <{url}>`__'
else:
return f'\n\n..image:: {url}\n\n'
except KeyError:
return self.nullify_document(doc_id)
regex = re.compile(r'<(doc|img)([0-9]+)\|.*>')
return regex.sub(doc_rst, s)
......@@ -808,6 +823,7 @@ class Website:
self.author_index = {}
self.nullified_urls = 0
self.nullified_docs = 0
self.articles = []
self.rubrique_nodes = {}
......@@ -1061,7 +1077,10 @@ class Website:
for k in SKIP_REASON:
logger.warning(f" {processed.count(SKIP_REASON[k])} skipped "
f"articles because {SKIP_REASON[k]}")
logger.warning(f" {self.nullified_urls} nullified URLs")
if self.nullified_urls:
logger.warning(f" {self.nullified_urls} nullified URLs")
if self.nullified_docs:
logger.warning(f" {self.nullified_docs} nullified docs")
logger.info("-------")
logger.debug(f"See {LOGFILE}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment