Skip to content
Snippets Groups Projects
Martyn Welch's avatar
Martyn Welch authored
After moving the documents from designs.a.o to www.a.o, there were
quite a few links from the designs.a.o docs pointing to wiki.a.o and
quite a few links from www.a.o pointing to designs.a.o. Replace these
links with Hugo ref shortcodes to avoid hammering the redirects.

This was achieved with the following script:

```

import os
import re
import sys
import toml

from urllib.parse import urlparse

def get_aliases(filename):
    #print("%s: " % filename)
    with open(filename, 'r') as file:
        contents = file.read()
        if not contents[0:3] == "+++":
            return

        if "_index.md" in filename:
            filename = filename.strip(".")
        else:
            filename = filename.split("/")[-1]

        data = toml.loads(contents.split("+++")[1])

        if not "aliases" in data.keys():
            return

        for alias in data["aliases"]:
            if "/old-wiki/" in alias:
                wiki_aliases[alias.replace("/old-wiki/", "/")] = filename

            if "/old-designs/" in alias:
                # We're dropping the per-release links from designs.a.o, so match on shorter path
                design_aliases[alias.replace("/old-designs/latest/", "/")] = filename

def fix_link(url):
    link = url.group('link')

    url = urlparse(link)

    if url.netloc == "wiki.apertis.org":
        if url.path in wiki_aliases.keys():
            url = url._replace(scheme="")
            url = url._replace(netloc="")
            url = url._replace(path=wiki_aliases[url.path])
            link = " {{< ref \"%s\" >}} " % url.geturl()

    if url.netloc == "designs.apertis.org":
        # We're dropping the per-release links from designs.a.o, so match on shorter path
        path = "/%s" % url.path.split('/', 2)[-1]
        print(path)
        if path in design_aliases.keys():
            url = url._replace(scheme="")
            url = url._replace(netloc="")
            url = url._replace(path=design_aliases[path])
            link = " {{< ref \"%s\" >}} " % url.geturl()

    return "](%s)" % link

def fix_ref(url):
    link = url.group('link')

    url = urlparse(link)

    if url.netloc == "wiki.apertis.org":
        if url.path in wiki_aliases.keys():
            url = url._replace(scheme="")
            url = url._replace(netloc="")
            url = url._replace(path=wiki_aliases[url.path])
            link = " {{< ref \"%s\" >}} " % url.geturl()

    if url.netloc == "designs.apertis.org":
        # We're dropping the per-release links from designs.a.o, so match on shorter path
        path = "/%s" % url.path.split('/', 2)[-1]
        print(path)
        if path in design_aliases.keys():
            url = url._replace(scheme="")
            url = url._replace(netloc="")
            url = url._replace(path=design_aliases[path])
            link = " {{< ref \"%s\" >}} " % url.geturl()

    return "]: %s" % link

def correct_links(filename):
    #print("%s: " % filename)
    with open(filename, 'r+') as file:
        contents = file.read()

        if not contents[0:3] == "+++":
            return

        fm = contents.split("+++")[1]
        doc = contents.split("+++",2)[2]

        # Hotdoc allows empty links like `[](url)`
        pattern = re.compile("\]\((?P<link>.*?)\)")
        doc = pattern.sub(fix_link, doc)

        pattern = re.compile("\]: (?P<link>.*)")
        doc = pattern.sub(fix_ref, doc)

        file.seek(0)
        file.truncate()

        file.write("+++")
        file.write(fm)
        file.write("+++")
        file.write(doc)

wiki_aliases = {}
design_aliases = {}

for root, dirs, files in os.walk(sys.argv[1]):
    for file in files:
        if ".md" in file:
            get_aliases("%s/%s" %(root, file))

print("wiki_aliases:")
print(wiki_aliases)
print("design_aliases:")
print(design_aliases)

for root, dirs, files in os.walk(sys.argv[1]):
    for file in files:
        if ".md" in file:
            correct_links("%s/%s" %(root, file))
```

Signed-off-by: default avatarMartyn Welch <martyn.welch@collabora.com>
ee26b8cf
History
Name Last commit Last update