perrotuerto.blog/scripts/make.py

import re
import sys
import json
from datetime import datetime
from pathlib import Path


def parse_md(txt):
    """
    Implementación mínima con este soporte: párrafos, encabezados, listas,
    cita, negrita, itálica y enlaces
    """
    # Regex para detectar Markdown
    header = r"(#+)\s*(.+)"
    quote = r">\s*(.+)"
    ulist = r"[\*-]\s*"
    olist = r"\d\.\s*"
    bolditalic = r"\*\*\*([^\*]+)\*\*\*"
    bold = r"\*\*([^\*]+)\*\*"
    italic = r"\*([^\*]+)\*"
    link = r"\[([^\[]+)\]\(([^\(]+)\)"
    # Obtiene los bloques a partir de una cadena de caracteres
    blocks = map(lambda x: x.strip(), re.split(r"\n\s*\n", txt))
    blocks = list(filter(None, blocks))
    # Analiza cada bloque
    for i, block in enumerate(blocks):
        # Si es encabezado
        if re.match(header, block):
            groups = re.match(header, block).groups()
            tag = "h" + str(len(groups[0]))
            block = f"<{tag}>{groups[1]}</{tag}>"
        # Si es cita
        elif re.match(quote, block):
            groups = re.match(quote, block).groups()
            block = f"<blockquote><p>{groups[0]}</p></blockquote>"
        # Si es lista no ordenada
        elif re.match(ulist, block):
            items = re.sub(r"^" + ulist, "", block)
            items = re.split(r"\n\s*" + ulist, items)
            items = "".join(map(lambda x: f"<li><p>{x}</p></li>", items))
            block = f"<ul>{items}</ul>"
        # Si es lista ordenada
        elif re.match(olist, block):
            items = re.sub(r"^" + olist, "", block)
            items = re.split(r"\n\s*" + olist, items)
            items = "".join(map(lambda x: f"<li><p>{x}</p></li>", items))
            block = f"<ol>{items}</ol>"
        # De lo contrario, se trata como párrafo
        else:
            block = f"<p>{block}</p>"
        # Limpia y aplica los estilos en línea (negritas, itálicas y enlaces)
        block = " ".join(block.split())
        block = re.sub(bolditalic, r"<b><i>\1</i></b>", block)
        block = re.sub(bold, r"<b>\1</b>", block)
        block = re.sub(italic, r"<i>\1</i>", block)
        block = re.sub(link, r'<a target="_blank" href="\2">\1</a>', block)
        blocks[i] = block
    # Une los bloques y hace limpiezas
    html = "\n".join(blocks)
    html = re.sub(r"</blockquote>\n<blockquote>", "", html)
    return html


root = Path(__file__).parent.parent
about = " ".join(
    """
    Hola, soy perro tuerto. Mi formación académica es en Filosofía, mi
    profesión es en la edición de publicaciones (libros, fanzines, revistas…)
    y mi programación se enfoca en el desarrollo de metodologías libres para la
    publicación. Soy fan de las humanidades, la paleoantropología y las
    ciencias de la computación, así como soy voluntario en organizaciones sobre
    edición, *software* y cultura libres, como
    [Programando LIBREros](https://programando.li/breros),
    [Miau](https://t.me/miau2018), [Cuates](https://cuates.net/) o
    [Wikipedia](https://wikimedia.mx/). Doy soporte técnico a la
    [Academia Mexicana de la Lengua](https://academia.org.mx/) y puedo ayudarte
    en tus proyectos. **En este espacio comparto enlaces que me parecen
    chéveres.**
""".split()
)
contact = {
    "site": "https://perrotuerto.blog",
    "gitlab": "https://gitlab.com/perrotuerto",
    "cuates": "https://git.cuates.net/perro",
    "wikipedia": "https://es.wikipedia.org/wiki/Usuario:Perrotuerto",
    "github": "https://github.com/perrotuerto",
    "email": "hi@perrotuerto.blog",
}
json_file = Path(sys.argv[1])
links = json.loads(json_file.read_text())
data = {"acerca": about, "contacto": contact, "enlaces": links["results"]}
index = root / "public" / "index.html"
template = (root / "src" / "template.html").read_text()
body = ""

for key, val in data.items():
    if key == "contacto":
        for name, url in val.items():
            template = re.sub(f"#{name.upper()}#", url, template)
        continue
    body += f'\n<section id="{key}">'
    body += f"\n<h1>{key.capitalize()}</h1>"
    if isinstance(val, str):
        body += f"\n{parse_md(val)}"
    else:
        body += '<ul class="list">'
        for link in val:
            date1 = "%Y-%m-%dT%H:%M:%S.%fZ"
            date2 = "%d/%m/%Y"
            url = link["url"]
            archive = f"https://web.archive.org/web/*/{url}"
            created = link["date_added"]
            updated = link["date_modified"]
            created = datetime.strptime(created, date1).strftime(date2)
            updated = datetime.strptime(updated, date1).strftime(date2)
            desc = link["description"]
            desc = desc[0:-1] if desc[-1] == "." else desc
            tags = filter(lambda x: x != "blog", link["tag_names"])
            tags = map(lambda x: f"<a>#{x}</a>", tags)
            body += f'\n<li class="link" id="{link["id"]}">'
            body += f'\n<h1><a class="anchor" href="#{link["id"]}">⚓</a>'
            body += f'<a class="name" target="_blank" href="{url}">'
            body += f'{link["title"]}</a></h1>'
            body += '\n<p class="dates">'
            body += f'<span class="created">{created}</span>'
            body += f'<span class="updated">{updated}</span></p>'
            body += f'\n<p class="description">{desc}</p>'
            body += f'\n<p class="tags">{" ".join(tags)}</p>'
            body += '\n<p class="social">'
            body += (
                f'<a target="_blank" href="{archive}">Ver archivado</a></p>'
            )
            if "notes" in link.keys():
                body += '\n<details class="info">'
                body += "\n<summary>Leer más</summary>"
                body += parse_md(link["notes"])
                body += "\n</details>"
            body += "\n</li>"
        body += "</ul>"
    body += "\n</section>"

index.write_text(re.sub("#LINKS#", body, template))
json_file.write_text(json.dumps(data))