Commit b368c284 authored by Matthieu Boileau's avatar Matthieu Boileau

Add Spip breves (fix #15)

parent d97cbd27
......@@ -17,8 +17,9 @@ Les fichiers récupérés sont dans `attachments/spip/`.
```bash
spip_articles -> spip_articles.yml # le contenu et les métadonnées des articles
spip_rubriques -> spip_rubriques.yml # permet de construire la table breve_id -> category et le contenu des rubriques
spip_breves -> spip_breves.yml # le contenu des brèves
spip_documents -> spip_documents.yml # permet de construire la table document_id -> document_path
spip_rubriques -> spip_rubriques.yml # permet de construire la table rubrique_id -> category
spip_auteurs -> spip_auteurs.yml # permet de construire la table author_id -> author_name
spip_auteurs_liens -> spip_auteurs_liens.yml # permet de construire la table article_id -> author_name
```
......
......@@ -322,20 +322,21 @@ class Article:
self.mdprefix = f"spip_{self.type}-{self.id}"
self.mdpath = os.path.join(self.category, self.mdprefix + ".md")
self.date = spip_article['date']
try:
self.modified = spip_article['date_modif']
self.date = spip_article['date']
except KeyError:
self.modified = self.date
self.date = spip_article['date_heure']
self.summary = spip_article['descriptif']
self.modified = spip_article.get('date_modif', self.date)
self.summary = spip_article.get('descriptif', '')
self.text = spip_article['texte']
try:
self.authors = self.website.author_index[self.type][self.id]
except KeyError:
self.authors = self.website.default_author
try:
self.authors = self.website.author_index[self.type][self.id]
except KeyError:
self.authors = self.website.default_author
self.tags = [self.type]
def export_to_pelican(self):
"""
......@@ -359,13 +360,12 @@ class Article:
else:
self.title = spip_to_markdown(self.title, self.website).strip() # strip to remove any CR at end of string
tags = []
header = f"""\
title: {self.title}
date: {self.date}
modified: {self.modified}
category: {self.category}
tags: {tags}
tags: {self.tags}
slug: {self.mdprefix}
authors: {self.authors}
summary: {self.summary}
......@@ -389,6 +389,7 @@ class Website:
ATTACHMENTS_PREFIX = "attachments/spip/"
RUBRIQUES_FILENAME = "spip_rubriques_clean.yml"
ARTICLES_FILENAME = "spip_articles_clean.yml"
BREVES_FILENAME = "spip_breves_clean.yml"
DOCUMENTS_FILENAME = "spip_documents.yml"
AUTHORS_FILENAME = "spip_auteurs_clean.yml"
AUTHORS_LINKS_FILENAME = "spip_auteurs_liens.yml"
......@@ -424,6 +425,7 @@ class Website:
self.attachments_prefix = spip_file.get('attachments_prefix', self.ATTACHMENTS_PREFIX)
self.rubriques_filename = spip_file.get('rubriques', self.RUBRIQUES_FILENAME)
self.articles_filename = spip_file.get('articles', self.ARTICLES_FILENAME)
self.breves_filename = spip_file.get('breves', self.BREVES_FILENAME)
self.documents_filename = spip_file.get('documents', self.DOCUMENTS_FILENAME)
self.authors_filename = spip_file.get('authors', self.AUTHORS_FILENAME)
self.authors_links_filename = spip_file.get('authors_links', self.AUTHORS_LINKS_FILENAME)
......@@ -440,9 +442,9 @@ class Website:
logger.critical(f"Error in {config_filename}: {pelican_category}: {spip_rubrique}")
if reset_output_dir:
self.reset_output_directories()
self._reset_output_directories()
def reset_output_directories(self):
def _reset_output_directories(self):
"""Erase existing output files and create empty output directories"""
if os.path.exists("content"):
shutil.rmtree("content")
......@@ -498,10 +500,8 @@ class Website:
def _build_articles(self):
"""
Build:
- the list of Article objects
- the index dictionary: {id_doc: file_path}
"""
Instanciate a list of Article objects
"""
def add_articles(filename, spip_type):
# Load original article file as a list
......@@ -515,6 +515,7 @@ class Website:
add_articles(self.articles_filename, 'article')
add_articles(self.rubriques_filename, 'rubrique')
add_articles(self.breves_filename, 'breve')
def read_spip(self):
"""Read spip yaml files to build useful indices and article list"""
......
This diff is collapsed.
......@@ -39,11 +39,17 @@ def clean_titles(s):
"""Return a clean title line"""
title_type = matchobj.group(1)
title = matchobj.group(2)
title = re.sub(r"'", "''", title) # Avoid real text simple quote to be interpreted as end of string
return f"{title_type}'{title}'"
if title.startswith("|-"):
# Do not replace if multiline content
return matchobj.group(0)
else:
# Avoid real text simple quote to be interpreted as end of string
title = re.sub(r"'", "''", title)
return f"{title_type}'{title}'"
s = re.sub('^( titre: )(.*)$', title_replace, s, flags=re.MULTILINE)
s = re.sub('^( nom_site: )(.*)$', title_replace, s, flags=re.MULTILINE)
s = re.sub('^( texte: )(.*)$', title_replace, s, flags=re.MULTILINE)
return s
......@@ -96,3 +102,4 @@ if __name__ == '__main__':
clean_yaml("spip_auteurs.yml")
clean_yaml("spip_articles.yml")
clean_yaml("spip_rubriques.yml")
clean_yaml("spip_breves.yml")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment