diff --git a/create_spip_article_clean.py b/create_spip_article_clean.py index e1999bde750374f6d718a1a938d2547255c3ad9b..66c51bda0962d560fcc257b92e1d1b3c32d953a4 100644 --- a/create_spip_article_clean.py +++ b/create_spip_article_clean.py @@ -86,12 +86,7 @@ with open("./spip_yml/spip_articles.yml", 'r') as stream: regex = re.compile(f'({r}.*)') lines = regex.sub(remove, lines) - #lines = lines.encode('iso-8859-1', 'utf8').decode('utf-8', 'mixed') - import ftfy - lines = ftfy.fix_encoding(lines) - - with open("tmp.yml", "w") as fout: - fout.write(lines) + lines = lines.encode('iso-8859-1', 'utf8').decode('utf-8', 'mixed') y = yaml.load(clean_titles(strip_invalid(lines)))