#!/usr/bin/python
#
# Process the Installation Guide to a format suitable for use as built-in-help in Anaconda
import os
import shutil
import glob
import subprocess
# we are using lxml as unlike in the Python built-in ElementTree elements
# in the lxml tree know their parents - this is handy when removing the figure tags
from lxml import etree as ET


PLACEHOLDERS = ["RHEL7PlaceholderWithLinks.html", "RHEL7Placeholder.html"]
INPUT_FOLDER = "en-US"
OUTPUT_FOLDER = "anaconda_help_content/en-US"

# list of the XML help content & supporting files Anaconda currently cares about
ANACONDA_HELP_FILES = [
    "Account_Configuration-common-list-1.xml",
    "Account_Configuration-common-note-1.xml",
    "Account_Configuration-common-note-2.xml",
    "Account_Configuration-common-para-1.xml",
    "Account_Configuration-common-para-2.xml",
    "Account_Configuration-common-para-3.xml",
    "Adding_Partitions-ppc.xml",
    "Adding_Partitions-s390.xml",
    "Adding_Partitions-section-2-indexterm-1.xml",
    "Adding_Partitions-section-2-indexterm-2.xml",
    "Adding_Partitions-section-2-indexterm-3.xml",
    "Adding_Partitions-section-2-indexterm-4.xml",
    "Adding_Partitions-section-2-itemizedlist-1-listitem-1.xml",
    "Adding_Partitions-section-2-itemizedlist-1-listitem-2.xml",
    "Adding_Partitions-section-2-itemizedlist-1-listitem-3.xml",
    "Adding_Partitions-section-2-itemizedlist-1-listitem-4.xml",
    "Adding_Partitions-section-2-itemizedlist-1-listitem-5.xml",
    "Adding_Partitions-section-2-itemizedlist-1-listitem-6.xml",
    "Adding_Partitions-section-2-itemizedlist-1-listitem-7.xml",
    "Adding_Partitions-section-2-itemizedlist-1-listitem-8.xml",
    "Adding_Partitions-section-2-itemizedlist-1-listitem-9.xml",
    "Adding_Partitions-section-2-itemizedlist-2.xml",
    "Adding_Partitions-section-2-para-1.xml",
    "Adding_Partitions-section-2-para-2.xml",
    "Adding_Partitions-section-2-ppc-itemizedlist-1.xml",
    "Adding_Partitions-section-2-ppc-itemizedlist-2.xml",
    "Adding_Partitions-section-2-s390-itemizedlist-1.xml",
    "Adding_Partitions-section-2-s390-itemizedlist-2.xml",
    "Adding_Partitions-section-2-x86-itemizedlist-1.xml",
    "Adding_Partitions-x86.xml",
    "Adding_Partitions_common-indexterm-1.xml",
    "Adding_Partitions_common-indexterm-2.xml",
    "Adding_Partitions_common-indexterm-3.xml",
    "Advice_on_Partitions.xml",
    "Assign_Storage_Devices-common-important-indexterm-1.xml",
    "Automatic_Partitioning_common-caution-1.xml",
    "Automatic_Partitioning_common-indexterm-1.xml",
    "Automatic_Partitioning_common-indexterm-2.xml",
    "Automatic_Partitioning_common-note-1.xml",
    "Automatic_Partitioning_common-para-2.xml",
    "Automatic_Partitioning_common-para-6.xml",
    "Automatic_Partitioning_ppc-tip.xml",
    "Automatic_Partitioning_x86-tip.xml",
    "Bootloader_MBR_GPT_x86-ppc-para-1.xml",
    "Bootloader_MBR_GPT_x86-ppc-varlist-1.xml",
    "Bootloader_x86-ppc-para-1.xml",
    "Bootloader_x86-ppc-para-2.xml",
    "Bootloader_x86-ppc-para-3.xml",
    "Bootloader_x86-ppc-para-4.xml",
    "Bootloader_x86-ppc-para-5.xml",
    "Bootloader_x86-ppc-warning-1.xml",
    "Bootloader_x86-ppc-warning-2.xml",
    "Complete-ppc.xml",
    "Complete-s390.xml",
    "Complete-x86.xml",
    "Complete_common-para-1.xml",
    "Complete_common-para-5.s390.xml",
    "Complete_common-para-5.xml",
    "Complete_ppc_s390-para-2.xml",
    "Complete_ppc_x86-para-3.xml",
    "Complete_s390-para-2.xml",
    "Complete_x86-para-2.xml",
    "Create_Btrfs-common-note-2.xml",
    "Create_Btrfs-common-para-1.xml",
    "Create_Btrfs-common-para-2.xml",
    "Create_Btrfs-common-variablelist-1.xml",
    "Create_Btrfs-ppc-procedure.xml",
    "Create_Btrfs-ppc.xml",
    "Create_Btrfs-s390-procedure.xml",
    "Create_Btrfs-s390.xml",
    "Create_Btrfs-x86-procedure.xml",
    "Create_Btrfs-x86.xml",
    "Create_Btrfs_volume-size_policy-common-variablelist.xml",
    "Create_LVM-common-important-1.xml",
    "Create_LVM-common-note-1.xml",
    "Create_LVM-common-para-1.xml",
    "Create_LVM-common-para-2.xml",
    "Create_LVM-common-para-3.xml",
    "Create_LVM-ppc-procedure-1.xml",
    "Create_LVM-ppc.xml",
    "Create_LVM-s390-procedure-1.xml",
    "Create_LVM-s390.xml",
    "Create_LVM-x86-procedure-1.xml",
    "Create_LVM-x86.xml",
    "Create_Software_RAID-Btrfs-common-procedure-para-1.xml",
    "Create_Software_RAID-Btrfs-common-procedure-para-2.xml",
    "Create_Software_RAID-Btrfs-common-procedure-para-4.xml",
    "Create_Software_RAID-Btrfs-common-procedure-para-5.xml",
    "Create_Software_RAID-Btrfs-common-procedure-step-1.xml",
    "Create_Software_RAID-Btrfs-common-procedure-step-2.xml",
    "Create_Software_RAID-Btrfs-common-procedure-step-3.xml",
    "Create_Software_RAID-common-para-1.xml",
    "Create_Software_RAID-common-para-2.xml",
    "Create_Software_RAID-common-para-3.xml",
    "Create_Software_RAID-common-para-4.xml",
    "Create_Software_RAID-common-variablelist-1.xml",
    "Create_Software_RAID-ppc-procedure.xml",
    "Create_Software_RAID-ppc.xml",
    "Create_Software_RAID-s390-procedure.xml",
    "Create_Software_RAID-s390.xml",
    "Create_Software_RAID-x86-procedure.xml",
    "Create_Software_RAID-x86.xml",
    "Create_volume-btrfs-common-para-1.xml",
    "Create_volume-btrfs-common-para-2.xml",
    "Create_volume-btrfs-common-para-3.xml",
    "Create_volume-btrfs-common-step-1.xml",
    "Create_volume-size_policy-common-para-1.xml",
    "Create_volume-size_policy-common-para-3.xml",
    "Create_volume-size_policy-common-step-1.xml",
    "Create_volume-size_policy-common-step-2.xml",
    "Create_volume-size_policy-common-variablelist.xml",
    "Create_volume-size_policy-ppc-para-2.xml",
    "Create_volume-size_policy-s390-para-2.xml",
    "Create_volume-size_policy-x86-para-2.xml",
    "CustomSpoke-ppc64.xml",
    "CustomSpoke-s390.xml",
    "CustomSpoke-x86.xml",
    "DateTimeSpoke-ppc64.xml",
    "DateTimeSpoke-s390.xml",
    "DateTimeSpoke-x86.xml",
    "Disk_Partitioning_Advanced_Storage-indexterm1.xml",
    "Disk_Partitioning_Advanced_Storage-indexterm2.xml",
    "Disk_Partitioning_Advanced_Storage-ppc.xml",
    "Disk_Partitioning_Advanced_Storage-s390.xml",
    "Disk_Partitioning_Advanced_Storage-x86.xml",
    "Disk_Partitioning_Advanced_Storage_common-note-1.xml",
    "Disk_Partitioning_Advanced_Storage_common-para-1.xml",
    "Disk_Partitioning_Advanced_Storage_common-para-1b.xml",
    "Disk_Partitioning_Advanced_Storage_common-para-2.xml",
    "Disk_Partitioning_Advanced_Storage_common-para-4.xml",
    "Disk_Partitioning_Advanced_Storage_common-para-5.xml",
    "Disk_Partitioning_Advanced_Storage_common-para-6.xml",
    "Disk_Partitioning_Advanced_Storage_common-para-7.xml",
    "Disk_Partitioning_Advanced_Storage_common-para-8.xml",
    "Disk_Partitioning_Advanced_Storage_common-para-9.xml",
    "Disk_Partitioning_Advanced_Storage_common-procedure-1.xml",
    "Disk_Partitioning_common-indexterm-1.xml",
    "Disk_Partitioning_common-indexterm-2.xml",
    "Disk_Partitioning_common-indexterm-3.xml",
    "Disk_Partitioning_Graphical_common-important-1.xml",
    "Disk_Partitioning_Graphical_common-itemizedlist-1.xml",
    "Disk_Partitioning_Graphical_common-para-1.xml",
    "Disk_Partitioning_Graphical_common-para-10.xml",
    "Disk_Partitioning_Graphical_common-para-11.xml",
    "Disk_Partitioning_Graphical_common-para-12.xml",
    "Disk_Partitioning_Graphical_common-para-2.xml",
    "Disk_Partitioning_Graphical_common-para-3.xml",
    "Disk_Partitioning_Graphical_common-para-4.xml",
    "Disk_Partitioning_Graphical_common-para-7.xml",
    "Disk_Partitioning_Graphical_common-para-8.xml",
    "Disk_Partitioning_Graphical_common-para-9.xml",
    "Disk_Partitioning_Graphical_ppc-para-12.xml",
    "Disk_Partitioning_Graphical_s390-para-12.xml",
    "Disk_Partitioning_Graphical_x86-para-12.xml",
    "Disk_Partitioning_Scheme-para-1.xml",
    "Disk_Partitioning_Scheme-ppc.xml",
    "Disk_Partitioning_Scheme-s390.xml",
    "Disk_Partitioning_Scheme-term-1.xml",
    "Disk_Partitioning_Scheme-x86.xml",
    "Disk_Partitioning_Scheme_common-indexterm-1.xml",
    "Disk_Partitioning_Scheme_common-indexterm-2.xml",
    "Disk_Partitioning_Scheme_common-indexterm-3.xml",
    "Disk_Partitioning_Scheme_common-indexterm-4.xml",
    "Disk_Partitioning_Scheme_common-indexterm-5.xml",
    "Disk_Partitioning_Scheme_common-listitem-1.xml",
    "Disk_Partitioning_Scheme_common-listitem-2.xml",
    "Disk_Partitioning_Scheme_common-note-1.xml",
    "Disk_Partitioning_Scheme_common-warning-1.xml",
    "Disk_Partitioning_Scheme_ppc_s390-para-1.xml",
    "Disk_Partitioning_Setup_common-admpara-1.xml",
    "Disk_Partitioning_Setup_common-important-1.xml",
    "Disk_Partitioning_Setup_common-important-2.xml",
    "Disk_Partitioning_Setup_common-indexterm-1.xml",
    "Disk_Partitioning_Setup_common-indexterm-2.xml",
    "Disk_Partitioning_Setup_common-newp-para-1.xml",
    "Disk_Partitioning_Setup_common-packageKit.xml",
    "Disk_Partitioning_Setup_common-para-2.xml",
    "Disk_Partitioning_Setup_common-para-3.xml",
    "Disk_Partitioning_Setup_common-para-4.xml",
    "Disk_Partitioning_Setup_common-para-5.xml",
    "Disk_Partitioning_Setup_common-para-6.xml",
    "Disk_Partitioning_Setup_common-para-7.xml",
    "Disk_Partitioning_Setup_common-para-8.xml",
    "Disk_Partitioning_Setup_ppc-newp-itemizedlist-1.xml",
    "Disk_Partitioning_Setup_ppc-para-1.xml",
    "Disk_Partitioning_Setup_s390-newp-itemizedlist-1.xml",
    "Disk_Partitioning_Setup_s390-para-1.xml",
    "Disk_Partitioning_Setup_x86-newp-itemizedlist-1.xml",
    "Disk_Partitioning_Setup_x86-para-1.xml",
    "Encrypt-ppc.xml",
    "Encrypt-s390.xml",
    "Encrypt-x86.xml",
    "Encrypt_common-para-1.xml",
    "Encrypt_common-para-2.xml",
    "Encrypt_common-para-3.xml",
    "Encrypt_common-warning-1.xml",
    "File_System_Types-ppc.xml",
    "File_System_Types-s390.xml",
    "File_System_Types-x86.xml",
    "FilterSpoke-ppc64.xml",
    "FilterSpoke-s390.xml",
    "FilterSpoke-x86.xml",
    "Graphical_Installation-ppc.xml",
    "Graphical_Installation-s390.xml",
    "Graphical_Installation-x86.xml",
    "Graphical_Installation_install-hub-common-para-1.xml",
    "Graphical_Installation_install-hub-common-para-2.xml",
    "Graphical_Installation_install-hub-common-para-3.xml",
    "Graphical_Installation_install-hub-common-para-4.xml",
    "Graphical_Installation_install-hub-common-para-6.xml",
    "Graphical_Installation_install-hub-common-para-7.xml",
    "Graphical_Installation_install-hub-common-para-8.xml",
    "Graphical_Installation_install-hub-ppc-para-5.xml",
    "Graphical_Installation_install-hub-s390-para-5.xml",
    "Graphical_Installation_install-hub-x86-para-5.xml",
    "InitialSetupHub-common.xml",
    "InitialSetup-text.xml",
    "Installation_Source-common-para-1.xml",
    "Installation_Source-common-para-3.xml",
    "Installation_Source-common-para-4.xml",
    "Installation_Source-common-para-6.xml",
    "Installation_Source-common-para-7.xml",
    "Installation_Source-common-variable-list.xml",
    "Installation_Source-ppc-para-5.xml",
    "Installation_Source-s390-para-1.xml",
    "Installation_Source-s390-para-5.xml",
    "Installation_Source-s390-variable-list.xml",
    "Installation_Source-x86-para-5.xml",
    "Kdump_common-para-1.xml",
    "Kdump_common-para-2.xml",
    "Kdump_common-para-3.xml",
    "KdumpSpoke-ppc64.xml",
    "KdumpSpoke-s390.xml",
    "KdumpSpoke-x86.xml",
    "Key_Board_Configuration-common-note-1.xml",
    "Key_Board_Configuration-common-note-2.xml",
    "Key_Board_Configuration-common-para-1.xml",
    "Key_Board_Configuration-common-para-2.xml",
    "Key_Board_Configuration-common-para-3.xml",
    "Key_Board_Configuration-common-para-4.xml",
    "Key_Board_Configuration-common-para-5.xml",
    "Key_Board_Configuration_x86_ppc-indexterm-1.xml",
    "Key_Board_Configuration_x86_ppc-indexterm-2.xml",
    "Key_Board_Configuration_x86_ppc-para-1.xml",
    "KeyboardSpoke-ppc64.xml",
    "KeyboardSpoke-s390.xml",
    "KeyboardSpoke-x86.xml",
    "LangSupportSpoke-ppc64.xml",
    "LangSupportSpoke-s390.xml",
    "LangSupportSpoke-x86.xml",
    "Language_Configuration_common-note-1.xml",
    "Language_Configuration_common-para-1.xml",
    "Language_Configuration_common-para-2.xml",
    "Language_Configuration_common-para-3.xml",
    "Language_Configuration_x86_ppc-indexterm-1.xml",
    "Language_Configuration_x86_ppc-indexterm-2.xml",
    "Language_Support_common-indexterm-1.xml",
    "Language_Support_common-para-1.xml",
    "Language_Support_common-para-2.xml",
    "Language_Support_common-para-3.xml",
    "Language_Support_common-tip-1.xml",
    "Manual_Partitioning_common-para-1.xml",
    "Manual_Partitioning_common-para-2.xml",
    "Manual_Partitioning_common-para-3.xml",
    "Netconfig_Adv_Interfaces_common-list-1.xml",
    "Netconfig_Adv_Interfaces_common-note-1.xml",
    "Netconfig_Adv_Interfaces_common-para-1.xml",
    "Netconfig_Adv_Interfaces_common-para-2.xml",
    "Netconfig_Adv_Interfaces_common-para-3.xml",
    "Netconfig_Adv_Interfaces_common-para-4.xml",
    "Netconfig_common-important-1.xml",
    "Netconfig_common-important-2.xml",
    "Netconfig_common-indexterm-1.xml",
    "Netconfig_common-indexterm-2.xml",
    "Netconfig_common-indexterm-4.xml",
    "Netconfig_common-list-1.xml",
    "Netconfig_common-note-1.xml",
    "Netconfig_common-note-2.xml",
    "Netconfig_common-para-1.xml",
    "Netconfig_common-para-10.xml",
    "Netconfig_common-para-2.xml",
    "Netconfig_common-para-20.xml",
    "Netconfig_common-para-21.xml",
    "Netconfig_common-para-3.xml",
    "Netconfig_common-para-4.xml",
    "Netconfig_common-para-5.xml",
    "Netconfig_s390-para-10.xml",
    "NetworkSpoke-ppc64.xml",
    "NetworkSpoke-s390.xml",
    "NetworkSpoke-x86.xml",
    "Note-Dedicate_One_Partition-ppc.xml",
    "Note-Dedicate_One_Partition-s390.xml",
    "Note-Dedicate_One_Partition.xml",
    "Note_menu-options-greyed-out.xml",
    "Package_Selection_common-indexterm-1.xml",
    "Package_Selection_common-indexterm-2.xml",
    "Package_Selection_common-indexterm-3.xml",
    "Package_Selection_common-indexterm-4.xml",
    "Package_Selection_common-indexterm-5.xml",
    "Package_Selection_common-indexterm-6.xml",
    "Package_Selection_common-itemizedlist-2.xml",
    "Package_Selection_common-para-1.xml",
    "Package_Selection_common-para-10.xml",
    "Package_Selection_common-para-2.xml",
    "Package_Selection_common-para-3.xml",
    "Package_Selection_common-para-4.xml",
    "Package_Selection_common-para-5.xml",
    "Package_Selection_common-para-7.xml",
    "Package_Selection_common-para-8.xml",
    "Package_Selection_Customizing-common-itemizedlist-1.xml",
    "Package_Selection_Customizing-common-para-10.xml",
    "Package_Selection_Customizing-common-para-11.xml",
    "Package_Selection_Customizing-common-para-8.xml",
    "PasswordSpoke-ppc64.xml",
    "PasswordSpoke-s390.xml",
    "PasswordSpoke-x86.xml",
    "ppc_Bootloader.xml",
    "Progress_Hub_common-para-1.xml",
    "Progress_Hub_common-para-2.xml",
    "Progress_Hub_common-para-3.xml",
    "Progress_Hub_common-para-4.xml",
    "Progress_Hub_common-para-5.xml",
    "Progress_Hub_common-para-6.xml",
    "Progress_Hub_common-para-7.xml",
    "ProgressHub-ppc64.xml",
    "ProgressHub-s390.xml",
    "ProgressHub-x86.xml",
    "Reclaim_Space-ppc.xml",
    "Reclaim_Space-s390.xml",
    "Reclaim_Space-x86.xml",
    "Reclaim_Space_common-para-1.xml",
    "Reclaim_Space_common-para-2.xml",
    "Reclaim_Space_common-para-3.xml",
    "Reclaim_Space_common-para-4.xml",
    "Reclaim_Space_common-para-5.xml",
    "Reclaim_Space_common-warning-1.xml",
    "Reclaim_Space_itemized-list-1.xml",
    "SecurityPolicySpoke-x86.xml",
    "SecurityPolicySpoke-ppc64.xml",
    "SecurityPolicySpoke-s390.xml",
    "SoftwareSpoke-ppc64.xml",
    "SoftwareSpoke-s390.xml",
    "SoftwareSpoke-x86.xml",
    "SourceSpoke-ppc64.xml",
    "SourceSpoke-s390.xml",
    "SourceSpoke-x86.xml",
    "Specialized_Storage_Devices-ppc.xml",
    "Specialized_Storage_Devices-s390.xml",
    "Specialized_Storage_Devices-x86.xml",
    "Specialized_Storage_Devices_common-important-1.xml",
    "Specialized_Storage_Devices_common-para-1.xml",
    "Specialized_Storage_Devices_common-para-2.xml",
    "Specialized_Storage_Devices_common-para-3.xml",
    "Specialized_Storage_Devices_common-para-5.xml",
    "Specialized_Storage_Devices_common-para-6.xml",
    "Specialized_Storage_Devices_common-para-7.xml",
    "Specialized_Storage_Devices_common-para-8.xml",
    "Specialized_Storage_Devices_common-variablelist-1.xml",
    "Storage_Devices_common-indexterm-1.xml",
    "Storage_Devices_common-indexterm-2.xml",
    "Storage_Devices_ppc-para-1.xml",
    "Storage_Devices_s390-para-1.xml",
    "Storage_Devices_x86-para-1.xml",
    "StorageSpoke-ppc64.xml",
    "StorageSpoke-s390.xml",
    "StorageSpoke-x86.xml",
    "SubscriptionManagerSpoke-common.xml",
    "SummaryHub-ppc64.xml",
    "SummaryHub-s390.xml",
    "SummaryHub-x86.xml",
    "Swap_Partrecommend-para-1.xml",
    "Swap_Partrecommend-para-2.xml",
    "Swap_Partrecommend-para-3.xml",
    "Swap_Partrecommend-ppc.xml",
    "Swap_Partrecommend.xml",
    "Time_Zone_common-indexterm-1.xml",
    "Time_Zone_common-indexterm-2.xml",
    "Time_Zone_common-indexterm-3.xml",
    "Time_Zone_common-indexterm-6.xml",
    "Time_Zone_common-indexterm-7.xml",
    "Time_Zone_common-itemizedlist-1.xml",
    "Time_Zone_common-note-1.xml",
    "Time_Zone_common-para-2.xml",
    "Time_Zone_common-para-3.xml",
    "Time_Zone_common-para-4.xml",
    "Time_Zone_common-para-5.xml",
    "Time_Zone_common-para-6.xml",
    "Time_Zone_common-para-7.xml",
    "Time_Zone_common-para-8.xml",
    "Time_Zone_common-tip-1.xml",
    "User_Account_Conf-common-para-1.xml",
    "User_Account_Conf-common-para-2.xml",
    "User_Account_Conf-common-para-3.xml",
    "User_Account_Conf-common-para-4.xml",
    "User_Account_Conf-common-para-5.xml",
    "User_Account_Conf-common-para-6.xml",
    "User_Account_Conf-common-para-7.xml",
    "UserSpoke-ppc64.xml",
    "UserSpoke-s390.xml",
    "UserSpoke-x86.xml",
    "WelcomeSpoke-ppc64.xml",
    "WelcomeSpoke-s390.xml",
    "WelcomeSpoke-x86.xml",
    "Write_changes_to_disk_common-para-1.xml",
    "Write_changes_to_disk_common-para-2.xml",
    "Write_changes_to_disk_common-para-3.xml",
    "Write_changes_to_disk_common-para-4.xml",
    "Write_changes_to_disk_common-para-5.xml",
    "Write_changes_to_disk_ppc.xml",
    "Write_changes_to_disk_s390.xml",
    "Write_changes_to_disk_x86.xml",
    "X86_Bootloader.xml",
    "Installation_Guide.ent"
]

def run_xmllint():
    for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.xml")):
        try:
            temp_file_path = "%s.temp" % path
            # xmllint outputs to stdout, so we catch the output to a temporary
            # file and then overwrite the original with the temporary file once
            # xmllint is done
            temp_file = open(temp_file_path, "w")
            subprocess.check_call(["xmllint", "--noent", path], stdout=temp_file)
            temp_file.close()
            shutil.move(temp_file_path, path)
        except subprocess.CalledProcessError:
            print("WARNING: running xmllint on %s failed" % path)

# does the input folder exist ?
if not os.path.isdir(INPUT_FOLDER):
    print("ERROR: input folder does not exists")
    exit(1)

# make sure that the output folder is empty
if os.path.exists(OUTPUT_FOLDER):
    # if it already exists, delete it
    shutil.rmtree(OUTPUT_FOLDER)
os.makedirs(OUTPUT_FOLDER)

print("copying relevant help content files")
for file_name in ANACONDA_HELP_FILES:
    origin = os.path.join(INPUT_FOLDER, file_name)
    destination = os.path.join(OUTPUT_FOLDER, file_name)
    if not os.path.isfile(origin):
        print("WARNING: required file %s is missing" % origin)
    shutil.copy(origin, destination)

print("removing non breakable spaces")
for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.ent")):
    os.system("sed 's/&nbsp;/ /g' -i %s %s" % (path, path))
for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.xml")):
    os.system("sed 's/&nbsp;/ /g' -i %s %s" % (path, path))

# run xmllint to resolve entities
print("running xmllint to resolve entities")
run_xmllint()

print("loading all XML files")

xml_files = {}
known_ids = {}

for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.xml")):
    print("loading: %s" % path)
    try:
        tree = ET.parse(path)
        root = tree.getroot()
        # find all elements that have an id attribute
        for element in root.iter():
            id = element.attrib.get("id")
            # the element has an id attribute
            if id:
                title = element.find('title')
                if hasattr(title, "text"):
                    # store the tile text and filename under the id
                    filename = os.path.split(path)[1]
                    known_ids[id] = (filename, title.text)
                else:
                    # some title elements might not have any text property
                    print("WARNING: id %s in %s has no title text" % (id, path))

        xml_files[path] = tree
    except ET.ParseError as err:
        print("WARNING: parsing failed:\n%s" % err)

print("%d XML files loaded" % len(xml_files))
print("%d ids found" % len(known_ids))

# remove pictures/figures
removed_figures = 0
removed_remarks = 0
rewritten_links = 0
outside_links = 0
print("removing figure & remark tags, rewriting links")
for path, tree in xml_files.items():
    root = tree.getroot()

    for figure in root.findall('.//figure'):
        parent = figure.getparent()
        parent.remove(figure)
        removed_figures += 1

    for remark in root.findall('.//remark'):
        parent = remark.getparent()
        parent.remove(remark)
        removed_remarks += 1

    # rewrite all links to a format digestible by Yelp
    for xref in root.findall('.//xref'):
        link_target = xref.attrib.get('linkend')
        if link_target:
            if link_target in known_ids:
                filename, title = known_ids[link_target]
                new_element = ET.Element("ulink")
                new_element.attrib["url"] = filename
                new_element.text = title
                new_element.tail = xref.tail
                # replace the old link element with the new one
                xref.getparent().replace(xref, new_element)
            else:
                # this link points outside of the help files currently
                # used by Anaconda, so replace it with "find it somewhere else"
                # template
                print("INFO: outside link, id: %s in %s" % (link_target, path))
                # lxml doesn't seem to be able to replace an element with a string,
                # so we will just clear the element and replace it with the templates
                # in a later sed pas :P
                tail = xref.tail
                # clear() removes the tail, which is in this case pretty much unrelated
                # to the element, so we need to make sure to save & restore it
                xref.clear()
                xref.tail = tail
                outside_links += 1
            rewritten_links += 1
        else:
            print("WARNING: %s has a xref link with missing linkend" % path)

print("%d figures and %d remarks have been removed" % (removed_figures, removed_remarks))
print("%d links have been rewritten, %d were outside links" % (rewritten_links, outside_links))

# write the modified XMLs to disk
print("saving modified XMLs to storage")
for path, tree in xml_files.items():
    tree.write(path)

# replate the oustide links here with sed as lxml is not able to do that for us
print("removing obsolete <xref/> tags")
template = "the full <citetitle>\&PRODUCT\; Installation Guide<\/citetitle>, available at \&IGURL\;"
for path in glob.glob(os.path.join(OUTPUT_FOLDER, "*.xml")):
    os.system("sed 's/<xref\/>/%s/g' -i %s %s" % (template, path, path))

# resolve any newly added entities
print("running xmllint to resolve any newly added entities")
run_xmllint()

# remove the entity file, it is no longer needed
print("removing the entity file")
os.remove(os.path.join(OUTPUT_FOLDER, "Installation_Guide.ent"))

print("adding placeholders:")
for placeholder in PLACEHOLDERS:
    shutil.copy(os.path.join(INPUT_FOLDER, placeholder), OUTPUT_FOLDER)
    print(placeholder)

print("done!")
exit(0)
