Blame tcar-scripts-tuneup/Modules/Xhtml/xhtml_setToc.sh

Alain Reguera Delgado 379a73
#!/bin/bash
Alain Reguera Delgado 379a73
###################################################################### 
Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
#   xhtml_setToc.sh -- This functionality transforms web page headings
Alain Reguera Delgado 379a73
#   to make them accessible through a table of contents.  The table of
Alain Reguera Delgado 379a73
#   contents is expanded in place, wherever the 
Alain Reguera Delgado 379a73
#   class="toc"> piece of code be in the page.  Once the 
Alain Reguera Delgado 379a73
#   class="toc"> piece of code has be expanded, there is no need
Alain Reguera Delgado 379a73
#   to put anything else in the page.
Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
#   In order for the tuneup functionality to transform headings, you
Alain Reguera Delgado 379a73
#   need to put headings in just one line using one of the following
Alain Reguera Delgado 379a73
#   forms:
Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
#   

Title

Alain Reguera Delgado 379a73
#   

Title

Alain Reguera Delgado 379a73
#   

Title

Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
#   In the example above, h1 can vary from h1 to h6. Closing tag must
Alain Reguera Delgado 379a73
#   be present and also match the openning tag. The value of `name'
Alain Reguera Delgado 379a73
#   and `href' options from the anchor element are set dynamically
Alain Reguera Delgado 379a73
#   using the md5sum output of combining the page location, the head-
Alain Reguera Delgado 379a73
#   string and the heading string.
Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
#   Written by:
Alain Reguera Delgado 379a73
#   * Alain Reguera Delgado <al@centos.org.cu>, 2009-2013
Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
# Copyright (C) 2009-2013 The CentOS Artwork SIG
Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
# This program is free software; you can redistribute it and/or modify
Alain Reguera Delgado 379a73
# it under the terms of the GNU General Public License as published by
Alain Reguera Delgado 379a73
# the Free Software Foundation; either version 2 of the License, or (at
Alain Reguera Delgado 379a73
# your option) any later version.
Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
# This program is distributed in the hope that it will be useful, but
Alain Reguera Delgado 379a73
# WITHOUT ANY WARRANTY; without even the implied warranty of
Alain Reguera Delgado 379a73
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Alain Reguera Delgado 379a73
# General Public License for more details.
Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
# You should have received a copy of the GNU General Public License
Alain Reguera Delgado 379a73
# along with this program; if not, write to the Free Software
Alain Reguera Delgado 379a73
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Alain Reguera Delgado 379a73
#
Alain Reguera Delgado 379a73
######################################################################
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
function xhtml_setToc {
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    # Define variables as local to avoid conflicts outside.
Alain Reguera Delgado 379a73
    local COUNT=0
Alain Reguera Delgado 379a73
    local PREVCOUNT=0
Alain Reguera Delgado 379a73
    local -a FINAL
Alain Reguera Delgado 379a73
    local -a TITLE
Alain Reguera Delgado 379a73
    local -a MD5SM
Alain Reguera Delgado 379a73
    local -a OPTNS
Alain Reguera Delgado 379a73
    local -a CLASS
Alain Reguera Delgado 379a73
    local -a LEVEL
Alain Reguera Delgado 379a73
    local -a PARENT
Alain Reguera Delgado 379a73
    local -a TOCENTRIES
Alain Reguera Delgado 379a73
    local -a LINK
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    # Define table of content configuration file, the file used to
Alain Reguera Delgado 379a73
    # produce the table of content XHTML output code.
Alain Reguera Delgado 379a73
    local TOC_CONFIG=${TCAR_MODULE_DIR_CONFIGS}/toc.awk
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    # Verify table of content configuration file.
Alain Reguera Delgado 379a73
    tcar_checkFiles -ef ${TOC_CONFIG}
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    # Define html heading regular expression pattern. Use parenthisis
Alain Reguera Delgado 379a73
    # to save html action name, action value, and heading title.
Alain Reguera Delgado 379a73
    local PATTERN='<h([1-6])(.*)>(<a.*[^\>]>)(.*[^<])</h[1-6]>'
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    # Verify list of html files. Are files really html files? If they
Alain Reguera Delgado 379a73
    # don't, continue with the next one in the list.
Alain Reguera Delgado 379a73
    if [[ ! $(file --brief ${FILE}) =~ '^(XHTML|HTML|XML)' ]];then
Alain Reguera Delgado 379a73
        continue
Alain Reguera Delgado 379a73
    fi
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    # Define list of headings to process. When building the heading,
Alain Reguera Delgado 379a73
    # it is required to change spaces characters from its current
Alain Reguera Delgado 379a73
    # decimal output to something different (e.g., its \040 octal
Alain Reguera Delgado 379a73
    # alternative). This is required because the space character is
Alain Reguera Delgado 379a73
    # used as egrep default field separator and spaces can be present
Alain Reguera Delgado 379a73
    # inside heading strings we don't want to separate.
Alain Reguera Delgado 379a73
    for HEADING in $(egrep "${PATTERN}" ${FILE} \
Alain Reguera Delgado 379a73
        | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
        # Define previous counter value using current counter
Alain Reguera Delgado 379a73
        # value as reference.
Alain Reguera Delgado 379a73
        if [[ ${COUNT} -ne 0 ]];then
Alain Reguera Delgado 379a73
            PREVCOUNT=$((${COUNT}-1))
Alain Reguera Delgado 379a73
        fi
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
        # Define initial heading information.
Alain Reguera Delgado 379a73
        FIRST[${COUNT}]=$(echo "${HEADING}" | sed -r "s!\\\040! !g")
Alain Reguera Delgado 379a73
        TITLE[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\4!")
Alain Reguera Delgado 379a73
        MD5SM[${COUNT}]=$(echo "${TITLE[${COUNT}]}" | md5sum | sed -r 's![[:space:]]+-$!!')
Alain Reguera Delgado 379a73
        OPTNS[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\3!")
Alain Reguera Delgado 379a73
        CLASS[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\2!")
Alain Reguera Delgado 379a73
        LEVEL[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\1!")
Alain Reguera Delgado 379a73
        PARENT[${COUNT}]=${LEVEL[${PREVCOUNT}]}
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
        # Transform heading information using initial heading
Alain Reguera Delgado 379a73
        # information as reference.
Alain Reguera Delgado 379a73
        if [[ ${OPTNS[${COUNT}]} =~ '^$' ]];then
Alain Reguera Delgado 379a73
            OPTNS[${COUNT}]=''
Alain Reguera Delgado 379a73
        elif [[ ${OPTNS[${COUNT}]} =~ '^$' ]];then 
Alain Reguera Delgado 379a73
            OPTNS[${COUNT}]=''
Alain Reguera Delgado 379a73
        elif [[ ${OPTNS[${COUNT}]} =~ '^$' ]];then
Alain Reguera Delgado 379a73
            OPTNS[${COUNT}]=''
Alain Reguera Delgado 379a73
        fi
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
        # Build final html heading structure.
Alain Reguera Delgado 379a73
        FINAL[${COUNT}]='<h'${LEVEL[${COUNT}]}${CLASS[${COUNT}]}'>'${OPTNS[${COUNT}]}${TITLE[${COUNT}]}'</h'${LEVEL[${COUNT}]}'>'
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
        # Build html heading link structure. These links are used by
Alain Reguera Delgado 379a73
        # the table of contents later.
Alain Reguera Delgado 379a73
        LINK[${COUNT}]=''${TITLE[${COUNT}]}''
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
        # Build table of contents entry with numerical
Alain Reguera Delgado 379a73
        # identifications. The numerical identification is what we use
Alain Reguera Delgado 379a73
        # to determine the correct position of each heading link on
Alain Reguera Delgado 379a73
        # the table of content.
Alain Reguera Delgado 379a73
        TOCENTRIES[${COUNT}]="${COUNT}:${LEVEL[${COUNT}]}:${PARENT[${COUNT}]}:${LINK[${COUNT}]}"
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
        # Update heading information inside the current file being
Alain Reguera Delgado 379a73
        # processed. Use the first and final heading information.
Alain Reguera Delgado 379a73
        sed -i -r "s!${FIRST[${COUNT}]}!${FINAL[${COUNT}]}!" ${FILE}
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
        # Increase heading counter.
Alain Reguera Delgado 379a73
        COUNT=$((${COUNT} + 1))
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    done
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    # Build the table of contents using heading numerical
Alain Reguera Delgado 379a73
    # identifications as reference. The numerical identification
Alain Reguera Delgado 379a73
    # describes the order of headings in one xhtml file. This
Alain Reguera Delgado 379a73
    # information is processed by awk to make the appropriate
Alain Reguera Delgado 379a73
    # replacements. Finnally, the result is stored in the TOC
Alain Reguera Delgado 379a73
    # variable.
Alain Reguera Delgado 379a73
    TOC=$(echo '
'
Alain Reguera Delgado 379a73
        echo "

`gettext "Table of contents"`

"
Alain Reguera Delgado 379a73
        for TOCENTRY in "${TOCENTRIES[@]}";do
Alain Reguera Delgado 379a73
            echo ${TOCENTRY}
Alain Reguera Delgado 379a73
        done \
Alain Reguera Delgado 379a73
            | awk -f ${TOC_CONFIG})
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    # Update table of contents inside the current file being
Alain Reguera Delgado 379a73
    # processed.
Alain Reguera Delgado 379a73
    sed -i -r '/
[^<\/div].*<\/div>/c'"$(echo -e ${TOC})" ${FILE}
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
    # Clean up variables to receive the next file.
Alain Reguera Delgado 379a73
    unset FINAL
Alain Reguera Delgado 379a73
    unset TITLE
Alain Reguera Delgado 379a73
    unset MD5SM
Alain Reguera Delgado 379a73
    unset OPTNS
Alain Reguera Delgado 379a73
    unset CLASS
Alain Reguera Delgado 379a73
    unset LEVEL
Alain Reguera Delgado 379a73
    unset PARENT
Alain Reguera Delgado 379a73
    unset TOCENTRIES
Alain Reguera Delgado 379a73
    unset LINK
Alain Reguera Delgado 379a73
Alain Reguera Delgado 379a73
}