Commit f0d3d368 authored by Matthieu Boileau's avatar Matthieu Boileau

Handle spip_article -> pelican_category

- Fix #17
- Fix #18
parent c960a883
......@@ -295,14 +295,28 @@ class Article:
self.type = spip_type
self.website = website
self.rubrique = spip_article['id_rubrique']
self.category = self.website.category_index[spip_article['id_rubrique']]
id_tag = 'id_' + self.type
self.short_id = spip_article[id_tag]
self.id = f"{SHORTEN[self.type]}{self.short_id}"
self.title = spip_article['titre']
self.rubrique = spip_article['id_rubrique']
rubrique_category = self.website.rubrique_to_category[self.rubrique]
if self.type == 'rubrique':
self.category = rubrique_category
else:
# type is article or breve
types = self.type + 's'
article_category = self.website.categories[types].get(self.short_id, 'spip_divers')
if article_category != 'spip_divers':
self.category = article_category
else:
self.category = self.website.categories[types].get(self.short_id, rubrique_category)
if self.category == 'spip_divers':
logger.warning(f"Article belongs to spip_divers {self.id}: {self.title}")
if self.category == 'skip':
self.skip_reason = SKIP_REASON["skip_rub"]
elif not spip_article['texte'] and not self.type == 'rubrique':
......@@ -395,10 +409,11 @@ class Website:
with open(filename, mode='r') as yml_file:
return yaml.load(remove_null_date(strip_invalid(yml_file)))
def __init__(self, reset_output_dir=True):
def __init__(self, reset_output_dir=True, include_breves=False):
self.reset_output_dir = reset_output_dir
self.category_index = {}
self.include_breves = include_breves
self.rubrique_to_category = {}
self.article_index = {}
self.doc_index = {}
......@@ -424,38 +439,55 @@ class Website:
self.documents_filename = spip_file.get('documents', self.DOCUMENTS_FILENAME)
self.authors_filename = spip_file.get('authors', self.AUTHORS_FILENAME)
self.authors_links_filename = spip_file.get('authors_links', self.AUTHORS_LINKS_FILENAME)
self.categories = {-1: "spip_divers"}
for pelican_category, spip_rubrique in cfg['categories'].items():
if type(spip_rubrique) == int:
# this pelican category corresponds to a single rubrique
self.categories[spip_rubrique] = pelican_category
elif type(spip_rubrique) == list:
# this pelican category corresponds to a list of rubriques
for rubrique in spip_rubrique:
self.categories[rubrique] = pelican_category
else:
logger.critical(f"Error in {config_filename}: {pelican_category}: {spip_rubrique}")
self.categories = {}
def get_categories(spip_type):
"""Return {spip_rubrique: pelican_category} for given spip_type"""
categories = {}
try:
cfg_categories = cfg['categories'][spip_type]
for pelican_category, spip_id in cfg_categories.items():
if type(spip_id) == int:
# this pelican category corresponds to a single rubrique
categories[spip_id] = pelican_category
elif type(spip_id) == list:
# this pelican category corresponds to a list of rubriques
for rubrique in spip_id:
categories[rubrique] = pelican_category
else:
logger.critical(f"Error in {config_filename}: {pelican_category}: {spip_id}")
except KeyError:
logger.warning(f"No category description for {spip_type} in {config_filename}")
finally:
categories[-1] = "spip_divers"
return categories
self.categories['rubriques'] = get_categories('rubriques')
self.categories['articles'] = get_categories('articles')
if self.include_breves:
self.categories['breves'] = get_categories('breves')
def _reset_output_directories(self):
"""Erase existing output files and create empty output directories"""
if os.path.exists("content"):
shutil.rmtree("content")
for category in set(self.categories.values()):
for category in set(self.categories['rubriques'].values()):
if category != 'skip':
os.makedirs(os.path.join("content", category))
def _build_category_index(self):
def _build_rubrique_to_category(self):
"""Build the index dictionary: {id_rubrique: category_name}"""
def get_category(id_rubrique):
"""Return category from id_rubrique"""
while id_rubrique not in self.categories:
while id_rubrique not in self.categories['rubriques']:
try:
id_rubrique = self.parents[id_rubrique]
except KeyError:
id_rubrique = -1
return self.categories[id_rubrique]
return self.categories['rubriques'][id_rubrique]
# Load original rubriques file as a list
with open(self.rubriques_filename, mode='r') as yml_file:
......@@ -463,8 +495,8 @@ class Website:
self.parents = {rubrique['id_rubrique']: rubrique['id_parent'] for rubrique in rubriques}
self.labels = {rubrique['id_rubrique']: rubrique['titre'].strip() for rubrique in rubriques}
self.category_index = {rubrique['id_rubrique']: get_category(rubrique['id_rubrique'])
for rubrique in rubriques}
self.rubrique_to_category = {rubrique['id_rubrique']: get_category(rubrique['id_rubrique'])
for rubrique in rubriques}
def print_rubrique_tree(self):
"""Print the rubrique structure as a tree using anytree"""
......@@ -509,13 +541,12 @@ class Website:
for pre, fill, rubrique_node in anytree.RenderTree(self.rubrique_nodes['0: root']):
print(f"{pre}{rubrique_node.name} [{rubrique_node.count}]")
def print_articles(self, id_rubrique):
"""Print the rubrique structure as a tree using anytree"""
logger.info(f"Spip articles that belong to rubrique {id_rubrique}: {self.labels[id_rubrique]}")
def print_articles(self, id_rubrique, spip_type):
"""Print a list of given spip_type articles"""
logger.info(f"Spip {spip_type}s that belong to rubrique {id_rubrique}: {self.labels[id_rubrique]}")
for article in self.articles:
if article.rubrique == id_rubrique and article.type != "rubrique":
print(f"- {article.short_id} # {article.title}")
if article.rubrique == id_rubrique and article.type == spip_type:
print(f" - {article.short_id} # {article.title}")
def _build_doc_index(self):
"""Build the index dictionary: {id_doc: file_path}"""
......@@ -557,13 +588,14 @@ class Website:
add_articles(self.articles_filename, 'article')
add_articles(self.rubriques_filename, 'rubrique')
add_articles(self.breves_filename, 'breve')
if self.include_breves:
add_articles(self.breves_filename, 'breve')
def read_spip(self):
"""Read spip yaml files to build useful indices and article list"""
logger.debug("-------")
logger.debug("Loading Spip data")
self._build_category_index()
self._build_rubrique_to_category()
self._build_doc_index()
self._build_author_index()
self._build_articles()
......@@ -599,18 +631,27 @@ def parse_cl_args():
parser.add_argument('-r', '--rubriques', action='store_true', help="Show Spip rubriques structure as a tree")
parser.add_argument('-a', '--articles', metavar="id_rubrique", nargs=1,
help="List Spip articles corresponding to given rubrique id")
parser.add_argument('-b', '--breves', metavar="id_rubrique", nargs=1,
help="List Spip breves corresponding to given rubrique id")
parser.add_argument('-c', '--convert', action='store_true', default=True, help="Convert to Pelican")
parser.add_argument('-ib', '--include_breves', action='store_true', help="Include breve in conversion")
return parser.parse_args()
if __name__ == '__main__':
args = parse_cl_args()
website = Website()
website = Website(include_breves=args.include_breves)
website.read_spip()
if args.rubriques:
# Show only Spip rubrique structure
website.print_rubrique_tree()
elif args.articles:
website.print_articles(int(args.articles[0]))
# Show a list of articles that belongs to id_rubrique
id_rubrique = int(args.articles[0])
website.print_articles(id_rubrique, 'article')
elif args.breves:
# Show a list of breves that belongs to id_rubrique
id_rubrique = int(args.breves[0])
website.print_articles(id_rubrique, 'breve')
else:
website.export_to_pelican()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment