#!/usr/bin/python3
#
# Process the Installation Guide to a format suitable for use as built-in-help in Anaconda
import os
import shutil
import glob
import subprocess
# we are using lxml as unlike in the Python built-in ElementTree elements
# in the lxml tree know their parents - this is handy when removing the figure tags
from lxml import etree as ET

PLACEHOLDERS = ["FedoraPlaceholderWithLinks.html", "FedoraPlaceholder.html", "FedoraPlaceholder.txt"]
INPUT_FOLDER = "en-US"
OUTPUT_FOLDER = "anaconda_help_content/en-US"

# list of the XML help content & supporting files Anaconda currently cares about
ANACONDA_HELP_FILES = [
    "DateTimeSpoke.xml",
    "KeyboardSpoke.xml",
    "KdumpSpoke.xml",
    "SourceSpoke.xml",
    "FilterSpoke.xml",
    "FilterSpoke_AddiSCSI.xml",
    "FilterSpoke_AddFCoE.xml",
    "PasswordSpoke.xml",
    "StorageSpoke.xml",
    "StorageSpoke_BootLoader.xml",
    "WelcomeSpoke.xml",
    "SummaryHub.xml",
    "Installation_Guide.ent",
    "LangSupportSpoke.xml",
    "CustomSpoke.xml",
    "CustomSpoke_AddPhysical.xml",
    "CustomSpoke_SoftwareRAID.xml",
    "CustomSpoke_AddLVM.xml",
    "CustomSpoke_AddBtrfs.xml",
    "CustomSpoke_FileSystems.xml",
    "CustomSpoke_RecommendedScheme.xml",
    "CustomSpoke_PartitioningAdvice.xml",
    "NetworkSpoke.xml",
    "NetworkSpoke_VirtualInterfaces.xml",
    "NetworkSpoke_EditConnection.xml",
    "SoftwareSpoke.xml",
    "UserSpoke.xml",
    "ProgressHub.xml",
    "InitialSetupHub.xml"
]

def run_xmllint():
    for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.xml")):
        try:
            temp_file_path = "%s.temp" % path
            # xmllint outputs to stdout, so we catch the output to a temporary
            # file and then overwrite the original with the temporary file once
            # xmllint is done
            temp_file = open(temp_file_path, "w")
            subprocess.check_call(["xmllint", "--noent", path], stdout=temp_file)
            temp_file.close()
            shutil.move(temp_file_path, path)
        except subprocess.CalledProcessError:
            print("WARNING: running xmllint on %s failed" % path)

# does the input folder exist ?
if not os.path.isdir(INPUT_FOLDER):
    print("ERROR: input folder does not exists")
    exit(1)

# make sure that the output folder is empty
if os.path.exists(OUTPUT_FOLDER):
    # if it already exists, delete it
    shutil.rmtree(OUTPUT_FOLDER)
os.makedirs(OUTPUT_FOLDER)

print("copying relevant help content files")
for file_name in ANACONDA_HELP_FILES:
    origin = os.path.join(INPUT_FOLDER, file_name)
    destination = os.path.join(OUTPUT_FOLDER, file_name)
    if not os.path.isfile(origin):
        print("WARNING: required file %s is missing" % origin)
    shutil.copy(origin, destination)

print("removing non breakable spaces")
for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.ent")):
    os.system("sed 's/&nbsp;/ /g' -i %s %s" % (path, path))
for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.xml")):
    os.system("sed 's/&nbsp;/ /g' -i %s %s" % (path, path))

# run xmllint to resolve entities
print("running xmllint to resolve entities")
run_xmllint()

print("loading all XML files")

xml_files = {}
known_ids = {}

for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.xml")):
    print("loading: %s" % path)
    try:
        tree = ET.parse(path)
        root = tree.getroot()
        # find all elements that have an id attribute
        for element in root.iter():
            id = element.attrib.get("id")
            # the element has an id attribute
            if id:
                title = element.find('title')
                if hasattr(title, "text"):
                    # store the tile text and filename under the id
                    filename = os.path.split(path)[1]
                    known_ids[id] = (filename, title.text)
                else:
                    # some title elements might not have any text property
                    print("WARNING: id %s in %s has no title text" % (id, path))

        xml_files[path] = tree
    except ET.ParseError as err:
        print("WARNING: parsing failed:\n%s" % err)

print("%d XML files loaded" % len(xml_files))
print("%d ids found" % len(known_ids))

# remove pictures/figures
removed_figures = 0
removed_remarks = 0
rewritten_links = 0
outside_links = 0
print("removing figure & remark tags, rewriting links")
for path, tree in xml_files.items():
    root = tree.getroot()

    for figure in root.findall('.//figure'):
        parent = figure.getparent()
        parent.remove(figure)
        removed_figures += 1

    for remark in root.findall('.//remark'):
        parent = remark.getparent()
        parent.remove(remark)
        removed_remarks += 1

    # rewrite all links to a format digestible by Yelp
    for xref in root.findall('.//xref'):
        link_target = xref.attrib.get('linkend')
        if link_target:
            if link_target in known_ids:
                filename, title = known_ids[link_target]
                new_element = ET.Element("ulink")
                new_element.attrib["url"] = filename
                new_element.text = title
                new_element.tail = xref.tail
                # replace the old link element with the new one
                xref.getparent().replace(xref, new_element)
            else:
                # this link points outside of the help files currently
                # used by Anaconda, so replace it with "find it somewhere else"
                # template
                print("INFO: outside link, id: %s in %s" % (link_target, path))
                # lxml doesn't seem to be able to replace an element with a string,
                # so we will just clear the element and replace it with the templates
                # in a later sed pas :P
                tail = xref.tail
                # clear() removes the tail, which is in this case pretty much unrelated
                # to the element, so we need to make sure to save & restore it
                xref.clear()
                xref.tail = tail
                outside_links += 1
            rewritten_links += 1
        else:
            print("WARNING: %s has a xref link with missing linkend" % path)

print("%d figures and %d remarks have been removed" % (removed_figures, removed_remarks))
print("%d links have been rewritten, %d were outside links" % (rewritten_links, outside_links))

# write the modified XMLs to disk
print("saving modified XMLs to storage")
for path, tree in xml_files.items():
    tree.write(path)

# replace the outside links here with sed as lxml is not able to do that for us
print("removing obsolete <xref/> tags")
template = "the full <citetitle>\&PRODUCT\; Installation Guide<\/citetitle>, available at \&IGURL\;"
for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.xml")):
    os.system("sed 's/<xref\/>/%s/g' -i %s %s" % (template, path, path))

# resolve any newly added entities
print("running xmllint to resolve any newly added entities")
run_xmllint()

# remove the entity file, it is no longer needed
print("removing the entity file")
os.remove(os.path.join(OUTPUT_FOLDER, "Installation_Guide.ent"))

print("adding placeholders:")
for placeholder in PLACEHOLDERS:
    shutil.copy(os.path.join(INPUT_FOLDER, placeholder), OUTPUT_FOLDER)
    print(placeholder)

print("creating plain text variants with xmlto")
for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.xml")):
    print("processing: %s" % path)
    os.system("xmlto --noautosize -o %s txt %s" % (OUTPUT_FOLDER, path))

print("done!")
exit(0)
