Commit a4c3de3b authored by Benoit Fabrèges's avatar Benoit Fabrèges
Browse files

more detailled messages

parent 9adbced8
Pipeline #6051 failed with stages
in 2 minutes and 47 seconds
from pelican import signals
from bs4 import BeautifulSoup
import requests
from requests.exceptions import Timeout, RequestException
from requests.exceptions import Timeout, RequestException, HTTPError, SSLError, ConnectionError
import os
from pathlib import Path
import logging
......@@ -14,8 +14,8 @@ def check_file(filename):
"""
code = None
avail = filename.exists()
return avail, avail, code
msg = "File doesn't exist"
return avail, code, msg
def check_url(url, timeout_ms):
"""
......@@ -23,24 +23,35 @@ def check_url(url, timeout_ms):
:param url: URL of the website to be checked
:timeout_mx: timeout duration in ms
:return: (availibility, success, HTTP code)
:return: (success, HTTP code, message)
"""
availibility, success, code = (False, False, None)
success, code = False, None
msg = ""
timeout_s = timeout_ms * 1e-3
try:
# verify:
# - True: checks certificates
# - False: bypasses certificate validation completely
r = requests.get(url, timeout=timeout_s, verify=True)
code = r.status_code
availibility = True
success = code == requests.codes.ok
except Timeout:
availibility = False
success = None
except RequestException:
availibility = None
success = False
msg = "Timeout error"
except SSLError:
msg = "Certificate validation error - check it with https://www.ssllabs.com/ssltest/analyze.html"
except ConnectionError:
msg = "Connection error"
else:
code = r.status_code
try:
r.raise_for_status()
except HTTPError:
msg = "Error code received"
else:
success = True
code == requests.codes.ok
return availibility, success, code
return success, code, msg
......@@ -90,24 +101,25 @@ def all_generators_finalized(generators):
# We do not check twice for the same link
if url in cache:
avail, success, code = cache[url]
success, code, msg = cache[url]
else:
if url.startswith('http'):
avail, success, code = check_url(url, timeout_duration_ms)
success, code, msg = check_url(url, timeout_duration_ms)
else:
if url.startswith('/'):
internal_filename = output_path.joinpath(*(url.split(os.sep)))
else:
internal_filename = output_path.joinpath(Path(page.url).parent, url)
avail, success, code = check_file(internal_filename.resolve())
success, code, msg = check_file(internal_filename.resolve())
cache[url] = (avail, success, code)
cache[url] = (success, code, msg)
if not avail:
logging.warning(f'Check link (not available): {url}\nIn file {page.url}\nFrom source file {page.get_relative_source_path()}')
elif not success:
logging.error(f'Dead link (error code {code}): {url}\nIn file {page.url}\nFrom source file {page.get_relative_source_path()}')
if not success:
if code is not None:
logging.error(f'Dead link (error code {code}): {url}\nIn file {page.url}\nFrom source file {page.get_relative_source_path()}')
else:
logging.warning(f'Check link ({msg}): {url}\nIn file {page.url}\nFrom source file {page.get_relative_source_path()}')
else:
logging.debug(f'Good link: {url}')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment