Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
groupe-calcul
spip2pelican
Commits
c2aa06f9
Commit
c2aa06f9
authored
Jun 06, 2018
by
Matthieu Boileau
Browse files
Fix
#14
parent
ec93e330
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
config.yml
View file @
c2aa06f9
site_url
:
http://calcul.math.cnrs.fr
author
s
:
Groupe Calcul
default_
author
:
Webmaster
attachments_prefix
:
attachments/spip/
categories
:
journees
:
4
...
...
@@ -13,3 +13,5 @@ categories:
rubriques
:
spip_rubriques_clean.yml
articles
:
spip_articles_clean.yml
documents
:
spip_documents.yml
authors
:
spip_auteurs_clean.yml
authors_links
:
spip_auteurs_liens.yml
spip2pelican.py
View file @
c2aa06f9
...
...
@@ -32,7 +32,7 @@ fh.setLevel(logging.INFO)
# create console handler with higher log level and colored output
ch
=
logging
.
StreamHandler
(
sys
.
stdout
)
ch
.
setLevel
(
logging
.
DEBUG
)
LOGFORMAT
=
"
%(log_color)s%(message)s%(reset)s"
LOGFORMAT
=
"%(log_color)s%(message)s%(reset)s"
color_formatter
=
ColoredFormatter
(
LOGFORMAT
)
ch
.
setFormatter
(
color_formatter
)
...
...
@@ -41,8 +41,6 @@ logger.addHandler(fh)
logger
.
addHandler
(
ch
)
# TODO: handle mix of italic and bold : { {{Marc Poinot}} (ONERA)}
def
header
(
s
):
"""
SPIP: {{{...}}}
...
...
@@ -188,8 +186,8 @@ def link(s, website):
art_url
=
re
.
match
(
r
"\Aart([0-9]+)"
,
url
)
if
art_url
:
# [text->art#]
id_art
=
int
(
art_url
.
group
(
1
))
try
:
id_art
=
int
(
art_url
.
group
(
1
))
new_url
=
"{filename}/"
+
website
.
article_index
[
id_art
]
except
KeyError
:
new_url
=
nullify_url
(
"non existing article"
,
id_art
,
text
,
url
)
...
...
@@ -334,6 +332,11 @@ class Article:
self
.
summary
=
spip_article
[
'descriptif'
]
self
.
text
=
spip_article
[
'texte'
]
try
:
self
.
authors
=
self
.
website
.
author_index
[
self
.
type
][
self
.
id
]
except
KeyError
:
self
.
authors
=
self
.
website
.
default_author
def
export_to_pelican
(
self
):
"""
Content of a markdown article should look like:
...
...
@@ -357,8 +360,6 @@ class Article:
self
.
title
=
spip_to_markdown
(
self
.
title
,
self
.
website
).
strip
()
# strip to remove any CR at end of string
tags
=
[]
authors
=
self
.
website
.
authors
content
=
spip_to_markdown
(
self
.
text
,
self
.
website
)
header
=
f
"""
\
title:
{
self
.
title
}
date:
{
self
.
date
}
...
...
@@ -366,10 +367,11 @@ modified: {self.modified}
category:
{
self
.
category
}
tags:
{
tags
}
slug:
{
self
.
mdprefix
}
authors:
{
authors
}
authors:
{
self
.
authors
}
summary:
{
self
.
summary
}
"""
content
=
spip_to_markdown
(
self
.
text
,
self
.
website
)
markdown
=
header
+
content
export_path
=
os
.
path
.
join
(
"content"
,
self
.
mdpath
)
...
...
@@ -388,22 +390,33 @@ class Website:
def
__missing__
(
self
,
key
):
return
0
@
staticmethod
def
_load_and_clean_yaml
(
filename
):
"""Load yaml file filename, clean it and return a dictionary"""
with
open
(
filename
,
mode
=
'r'
)
as
yml_file
:
return
yaml
.
load
(
remove_null_date
(
strip_invalid
(
yml_file
)))
def
__init__
(
self
,
reset_output_dir
=
True
):
self
.
category_index
=
{}
self
.
article_index
=
{}
self
.
doc_index
=
{}
self
.
author_index
=
{}
self
.
nullified_urls
=
self
.
MissingKeyDict
()
self
.
articles
=
[]
config_filename
=
"config.yml"
with
open
(
config_filename
,
'r'
)
as
ymlfile
:
cfg
=
yaml
.
load
(
ymlfile
)
self
.
site_url
=
cfg
[
'site_url'
]
self
.
author
s
=
cfg
[
'author
s
'
]
self
.
default_
author
=
cfg
[
'
default_
author'
]
self
.
attachments_prefix
=
cfg
[
'attachments_prefix'
]
self
.
rubriques_filename
=
cfg
[
'rubriques'
]
self
.
documents_filename
=
cfg
[
'documents'
]
self
.
articles_filename
=
cfg
[
'articles'
]
self
.
authors_filename
=
cfg
[
'authors'
]
self
.
authors_links_filename
=
cfg
[
'authors_links'
]
self
.
categories
=
{
-
1
:
"spip_divers"
}
for
pelican_category
,
spip_rubrique
in
cfg
[
'categories'
].
items
():
if
type
(
spip_rubrique
)
==
int
:
...
...
@@ -441,8 +454,8 @@ class Website:
return
self
.
categories
[
id_rubrique
]
# Load original rubriques file as a list
with
open
(
self
.
rubriques_filename
,
mode
=
'r'
)
as
yml_
rubriques
:
rubriques
=
yaml
.
load
(
yml_
rubriques
.
read
())
with
open
(
self
.
rubriques_filename
,
mode
=
'r'
)
as
yml_
file
:
rubriques
=
yaml
.
load
(
yml_
file
.
read
())
parents
=
{
rubrique
[
'id_rubrique'
]:
rubrique
[
'id_parent'
]
for
rubrique
in
rubriques
}
self
.
category_index
=
{
rubrique
[
'id_rubrique'
]:
get_category
(
rubrique
[
'id_rubrique'
])
...
...
@@ -452,10 +465,27 @@ class Website:
"""Build the index dictionary: {id_doc: file_path}"""
# Load original document file as a list
with
open
(
self
.
documents_filename
,
mode
=
'r'
)
as
yml_doc
:
docs
=
yaml
.
load
(
remove_null_date
(
strip_invalid
(
yml_doc
)))
docs
=
self
.
_load_and_clean_yaml
(
self
.
documents_filename
)
self
.
doc_index
=
{
doc
[
'id_document'
]:
doc
[
'fichier'
]
for
doc
in
docs
}
def
_build_author_index
(
self
):
"""Build the index dictionary: {spip_type: art_id: author_name}"""
# Load author file as a list
authors
=
self
.
_load_and_clean_yaml
(
self
.
authors_filename
)
author_name_index
=
{
author
[
'id_auteur'
]:
author
[
'nom'
]
for
author
in
authors
}
# Load article/author file as a list
authors_links
=
self
.
_load_and_clean_yaml
(
self
.
authors_links_filename
)
for
authors_link
in
authors_links
:
spip_type
=
authors_link
[
'objet'
]
art_id
=
authors_link
[
'id_objet'
]
author_id
=
authors_link
[
'id_auteur'
]
if
spip_type
not
in
self
.
author_index
.
keys
():
self
.
author_index
[
spip_type
]
=
{}
self
.
author_index
[
spip_type
][
art_id
]
=
author_name_index
[
author_id
]
def
_build_articles
(
self
):
"""
Build:
...
...
@@ -473,20 +503,23 @@ class Website:
if
not
article
.
skip_reason
:
self
.
article_index
[
article
.
id
]
=
article
.
mdpath
self
.
articles
=
[]
self
.
article_index
=
{}
add_articles
(
self
.
articles_filename
,
'article'
)
add_articles
(
self
.
rubriques_filename
,
'rubrique'
)
def
read_spip
(
self
):
"""Read spip yaml files to build useful indices and article list"""
logger
.
debug
(
"-------"
)
logger
.
debug
(
"Loading Spip data"
)
self
.
_build_category_index
()
self
.
_build_doc_index
()
self
.
_build_author_index
()
self
.
_build_articles
()
def
export_to_pelican
(
self
):
"""Loop on Spip articles to convert them into Pelican format"""
logger
.
debug
(
"-------"
)
logger
.
debug
(
"Exporting to Pelican"
)
processed
=
[]
for
article
in
self
.
articles
:
skip_reason
=
article
.
export_to_pelican
()
...
...
spip_auteurs.yml
0 → 100644
View file @
c2aa06f9
This diff is collapsed.
Click to expand it.
spip_auteurs_liens.yml
0 → 100644
View file @
c2aa06f9
This diff is collapsed.
Click to expand it.
yaml_cleaner.py
View file @
c2aa06f9
...
...
@@ -27,7 +27,7 @@ def remove_null_date(s):
"""
Remove "date:", "date_tmp:", etc. if equals to 0000-00-00 00:00:00 (otherwise yaml.load() would fail)
"""
s
=
re
.
sub
(
r
'^ date.*: 0000-00-00 00:00:00$'
,
r
''
,
s
,
flags
=
re
.
MULTILINE
)
s
=
re
.
sub
(
r
'
(
^ date
|^ en_ligne)
.*: 0000-00-00 00:00:00$'
,
r
''
,
s
,
flags
=
re
.
MULTILINE
)
return
s
...
...
@@ -37,12 +37,14 @@ def clean_titles(s):
"""
def
title_replace
(
matchobj
):
"""Return a clean title line"""
title
=
matchobj
.
group
(
1
)
title_type
=
matchobj
.
group
(
1
)
title
=
matchobj
.
group
(
2
)
title
=
re
.
sub
(
r
"'"
,
"''"
,
title
)
# Avoid real text simple quote to be interpreted as end of string
return
f
"
tit
re:
'
{
title
}
'"
return
f
"
{
tit
le_type
}
'
{
title
}
'"
res
=
re
.
sub
(
'^ titre: (.*)$'
,
title_replace
,
s
,
flags
=
re
.
MULTILINE
)
return
res
s
=
re
.
sub
(
'^( titre: )(.*)$'
,
title_replace
,
s
,
flags
=
re
.
MULTILINE
)
s
=
re
.
sub
(
'^( nom_site: )(.*)$'
,
title_replace
,
s
,
flags
=
re
.
MULTILINE
)
return
s
def
force_encode
(
line
,
iline
,
codecs
=
(
'cp1252'
,
'utf8'
)):
...
...
@@ -91,5 +93,6 @@ def clean_yaml(yml_filename):
if
__name__
==
'__main__'
:
clean_yaml
(
"spip_auteurs.yml"
)
clean_yaml
(
"spip_articles.yml"
)
clean_yaml
(
"spip_rubriques.yml"
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment