Blame Scripts/Modules/Tuneup/Modules/Xhtml/xhtml_setToc.sh

Alain Reguera Delgado b29c5b
#!/bin/bash
Alain Reguera Delgado cb2549
######################################################################
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado cb2549
#   tcar - The CentOS Artwork Repository automation tool.
Alain Reguera Delgado cb2549
#   Copyright © 2014 The CentOS Artwork SIG
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado cb2549
#   This program is free software; you can redistribute it and/or
Alain Reguera Delgado cb2549
#   modify it under the terms of the GNU General Public License as
Alain Reguera Delgado cb2549
#   published by the Free Software Foundation; either version 2 of the
Alain Reguera Delgado cb2549
#   License, or (at your option) any later version.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado cb2549
#   This program is distributed in the hope that it will be useful,
Alain Reguera Delgado cb2549
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
Alain Reguera Delgado cb2549
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Alain Reguera Delgado cb2549
#   General Public License for more details.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado cb2549
#   You should have received a copy of the GNU General Public License
Alain Reguera Delgado cb2549
#   along with this program; if not, write to the Free Software
Alain Reguera Delgado cb2549
#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado cb2549
#   Alain Reguera Delgado <al@centos.org.cu>
Alain Reguera Delgado cb2549
#   39 Street No. 4426 Cienfuegos, Cuba.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado cb2549
######################################################################
Alain Reguera Delgado cb2549
Alain Reguera Delgado cb2549
# Transform web page headings to make them accessible through a table
Alain Reguera Delgado cb2549
# of contents.  The table of contents is expanded in place, wherever
Alain Reguera Delgado cb2549
# the 
piece of code be in the page. Once the
Alain Reguera Delgado cb2549
# 
piece of code has be expanded, there is no
Alain Reguera Delgado cb2549
# need to put anything else in the page.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado cb2549
# In order for the tuneup functionality to transform headings, you
Alain Reguera Delgado cb2549
# need to put headings in just one line using one of the following
Alain Reguera Delgado cb2549
# forms:
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado cb2549
#   

Title

Alain Reguera Delgado cb2549
#   

Title

Alain Reguera Delgado cb2549
#   

Title

Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado cb2549
# In the example above, h1 can vary from h1 to h6. Closing tag must be
Alain Reguera Delgado cb2549
# present and also match the openning tag. The value of `name' and
Alain Reguera Delgado cb2549
# `href' options from the anchor element are set dynamically using the
Alain Reguera Delgado cb2549
# md5sum output of combining the page location, the head- string and
Alain Reguera Delgado cb2549
# the heading string.
Alain Reguera Delgado b29c5b
function xhtml_setToc {
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Define variables as local to avoid conflicts outside.
Alain Reguera Delgado b29c5b
    local COUNT=0
Alain Reguera Delgado b29c5b
    local PREVCOUNT=0
Alain Reguera Delgado b29c5b
    local -a FINAL
Alain Reguera Delgado b29c5b
    local -a TITLE
Alain Reguera Delgado b29c5b
    local -a MD5SM
Alain Reguera Delgado b29c5b
    local -a OPTNS
Alain Reguera Delgado b29c5b
    local -a CLASS
Alain Reguera Delgado b29c5b
    local -a LEVEL
Alain Reguera Delgado b29c5b
    local -a PARENT
Alain Reguera Delgado b29c5b
    local -a TOCENTRIES
Alain Reguera Delgado b29c5b
    local -a LINK
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Define table of content configuration file, the file used to
Alain Reguera Delgado b29c5b
    # produce the table of content XHTML output code.
Alain Reguera Delgado b29c5b
    local TOC_CONFIG=${TCAR_MODULE_DIR_CONFIGS}/toc.awk
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Verify table of content configuration file.
Alain Reguera Delgado b29c5b
    tcar_checkFiles -ef ${TOC_CONFIG}
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Define html heading regular expression pattern. Use parenthisis
Alain Reguera Delgado b29c5b
    # to save html action name, action value, and heading title.
Alain Reguera Delgado b29c5b
    local PATTERN='<h([1-6])(.*)>(<a.*[^\>]>)(.*[^<])</h[1-6]>'
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Verify list of html files. Are files really html files? If they
Alain Reguera Delgado b29c5b
    # don't, continue with the next one in the list.
Alain Reguera Delgado b29c5b
    if [[ ! $(file --brief ${FILE}) =~ '^(XHTML|HTML|XML)' ]];then
Alain Reguera Delgado b29c5b
        continue
Alain Reguera Delgado b29c5b
    fi
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Define list of headings to process. When building the heading,
Alain Reguera Delgado b29c5b
    # it is required to change spaces characters from its current
Alain Reguera Delgado b29c5b
    # decimal output to something different (e.g., its \040 octal
Alain Reguera Delgado b29c5b
    # alternative). This is required because the space character is
Alain Reguera Delgado b29c5b
    # used as egrep default field separator and spaces can be present
Alain Reguera Delgado b29c5b
    # inside heading strings we don't want to separate.
Alain Reguera Delgado b29c5b
    for HEADING in $(egrep "${PATTERN}" ${FILE} \
Alain Reguera Delgado b29c5b
        | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Define previous counter value using current counter
Alain Reguera Delgado b29c5b
        # value as reference.
Alain Reguera Delgado b29c5b
        if [[ ${COUNT} -ne 0 ]];then
Alain Reguera Delgado b29c5b
            PREVCOUNT=$((${COUNT}-1))
Alain Reguera Delgado b29c5b
        fi
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Define initial heading information.
Alain Reguera Delgado b29c5b
        FIRST[${COUNT}]=$(echo "${HEADING}" | sed -r "s!\\\040! !g")
Alain Reguera Delgado b29c5b
        TITLE[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\4!")
Alain Reguera Delgado b29c5b
        MD5SM[${COUNT}]=$(echo "${TITLE[${COUNT}]}" | md5sum | sed -r 's![[:space:]]+-$!!')
Alain Reguera Delgado b29c5b
        OPTNS[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\3!")
Alain Reguera Delgado b29c5b
        CLASS[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\2!")
Alain Reguera Delgado b29c5b
        LEVEL[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\1!")
Alain Reguera Delgado b29c5b
        PARENT[${COUNT}]=${LEVEL[${PREVCOUNT}]}
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Transform heading information using initial heading
Alain Reguera Delgado b29c5b
        # information as reference.
Alain Reguera Delgado b29c5b
        if [[ ${OPTNS[${COUNT}]} =~ '^$' ]];then
Alain Reguera Delgado b29c5b
            OPTNS[${COUNT}]=''
Alain Reguera Delgado b29c5b
        elif [[ ${OPTNS[${COUNT}]} =~ '^$' ]];then 
Alain Reguera Delgado b29c5b
            OPTNS[${COUNT}]=''
Alain Reguera Delgado b29c5b
        elif [[ ${OPTNS[${COUNT}]} =~ '^$' ]];then
Alain Reguera Delgado b29c5b
            OPTNS[${COUNT}]=''
Alain Reguera Delgado b29c5b
        fi
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Build final html heading structure.
Alain Reguera Delgado b29c5b
        FINAL[${COUNT}]='<h'${LEVEL[${COUNT}]}${CLASS[${COUNT}]}'>'${OPTNS[${COUNT}]}${TITLE[${COUNT}]}'</h'${LEVEL[${COUNT}]}'>'
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Build html heading link structure. These links are used by
Alain Reguera Delgado b29c5b
        # the table of contents later.
Alain Reguera Delgado b29c5b
        LINK[${COUNT}]=''${TITLE[${COUNT}]}''
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Build table of contents entry with numerical
Alain Reguera Delgado b29c5b
        # identifications. The numerical identification is what we use
Alain Reguera Delgado b29c5b
        # to determine the correct position of each heading link on
Alain Reguera Delgado b29c5b
        # the table of content.
Alain Reguera Delgado b29c5b
        TOCENTRIES[${COUNT}]="${COUNT}:${LEVEL[${COUNT}]}:${PARENT[${COUNT}]}:${LINK[${COUNT}]}"
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Update heading information inside the current file being
Alain Reguera Delgado b29c5b
        # processed. Use the first and final heading information.
Alain Reguera Delgado b29c5b
        sed -i -r "s!${FIRST[${COUNT}]}!${FINAL[${COUNT}]}!" ${FILE}
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Increase heading counter.
Alain Reguera Delgado b29c5b
        COUNT=$((${COUNT} + 1))
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    done
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Build the table of contents using heading numerical
Alain Reguera Delgado b29c5b
    # identifications as reference. The numerical identification
Alain Reguera Delgado b29c5b
    # describes the order of headings in one xhtml file. This
Alain Reguera Delgado b29c5b
    # information is processed by awk to make the appropriate
Alain Reguera Delgado b29c5b
    # replacements. Finnally, the result is stored in the TOC
Alain Reguera Delgado b29c5b
    # variable.
Alain Reguera Delgado b29c5b
    TOC=$(echo '
'
Alain Reguera Delgado b29c5b
        echo "

`gettext "Table of contents"`

"
Alain Reguera Delgado b29c5b
        for TOCENTRY in "${TOCENTRIES[@]}";do
Alain Reguera Delgado b29c5b
            echo ${TOCENTRY}
Alain Reguera Delgado b29c5b
        done \
Alain Reguera Delgado b29c5b
            | awk -f ${TOC_CONFIG})
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Update table of contents inside the current file being
Alain Reguera Delgado b29c5b
    # processed.
Alain Reguera Delgado b29c5b
    sed -i -r '/
[^<\/div].*<\/div>/c'"$(echo -e ${TOC})" ${FILE}
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Clean up variables to receive the next file.
Alain Reguera Delgado b29c5b
    unset FINAL
Alain Reguera Delgado b29c5b
    unset TITLE
Alain Reguera Delgado b29c5b
    unset MD5SM
Alain Reguera Delgado b29c5b
    unset OPTNS
Alain Reguera Delgado b29c5b
    unset CLASS
Alain Reguera Delgado b29c5b
    unset LEVEL
Alain Reguera Delgado b29c5b
    unset PARENT
Alain Reguera Delgado b29c5b
    unset TOCENTRIES
Alain Reguera Delgado b29c5b
    unset LINK
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
}