Blame Scripts/Functions/Tuneup/Xhtml/xhtml_doToc.sh

9f1608
#!/bin/bash
9f3cc6
# 
88ac1b
# xhtml_doToc.sh -- This functionality transforms web page headings to
7e81b4
# make them accessible through a table of contents.  The table of
7e81b4
# contents is expanded in place, wherever the 
7e81b4
# piece of code be in the page.  Once the 
7e81b4
# piece of code has be expanded, there is no need to put anything else
7e81b4
# in the page.
9f1608
#
9f3cc6
# In order for the tuneup functionality to transform headings, you
9f3cc6
# need to put headings in just one line using one of the following
9f3cc6
# forms:
ffdd74
#
7cd8e9
# 

Title

7cd8e9
# 

Title

7cd8e9
# 

Title

ffdd74
#
9f3cc6
# In the example above, h1 can vary from h1 to h6. Closing tag must be
9f3cc6
# present and also match the openning tag. The value of `name' and
9f3cc6
# `href' options from the anchor element are set dynamically using the
9f3cc6
# md5sum output of combining the page location, the head- string and
9f3cc6
# the heading string.
ffdd74
#
2fe9b7
# Copyright (C) 2009, 2010, 2011 The CentOS Project
fa95b1
#
fa95b1
# This program is free software; you can redistribute it and/or modify
fa95b1
# it under the terms of the GNU General Public License as published by
dcd347
# the Free Software Foundation; either version 2 of the License, or (at
dcd347
# your option) any later version.
fa95b1
#
74a058
# This program is distributed in the hope that it will be useful, but
74a058
# WITHOUT ANY WARRANTY; without even the implied warranty of
9f1608
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
9f1608
# General Public License for more details.
9f1608
#
9f1608
# You should have received a copy of the GNU General Public License
9f1608
# along with this program; if not, write to the Free Software
dcd347
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
9f3cc6
#
9f1608
# ----------------------------------------------------------------------
9f1608
# $Id$
9f1608
# ----------------------------------------------------------------------
9f1608
88ac1b
function xhtml_doToc {
9f1608
ffdd74
    # Define variables as local to avoid conflicts outside.
ad45de
    local COUNT=0
c49cd9
    local PREVCOUNT=0
ffdd74
    local -a FINAL
ffdd74
    local -a TITLE
ad45de
    local -a MD5SM
ad45de
    local -a OPTNS
f82437
    local -a CLASS
c49cd9
    local -a LEVEL
c49cd9
    local -a PARENT
8aa7eb
    local -a TOCENTRIES
8aa7eb
    local -a LINK
ffdd74
7e81b4
    # Define table of content configuration file, the file used to
7e81b4
    # produce the table of content XHTML output code.
79f9ac
    local TOC_CONFIG=${TUNEUP_CONFIG_DIR}/toc.awk
7e81b4
7e81b4
    # Verify table of content configuration file.
7e81b4
    cli_checkFiles ${TOC_CONFIG}
7e81b4
8aa7eb
    # Define html heading regular expression pattern. Use parenthisis
79e7c6
    # to save html action name, action value, and heading title.
f82437
    local PATTERN='<h([1-6])(.*)>(<a.*[^\>]>)(.*[^<])</h[1-6]>'
89c744
89c744
    # Verify list of html files. Are files really html files? If they
89c744
    # don't, continue with the next one in the list.
89c744
    if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then
89c744
        continue
89c744
    fi
89c744
89c744
    # Define list of headings to process. When building the heading,
89c744
    # it is required to change spaces characters from its current
89c744
    # decimal output to something different (e.g., its \040 octal
89c744
    # alternative). This is required because the space character is
89c744
    # used as egrep default field separator and spaces can be present
89c744
    # inside heading strings we don't want to separate.
89c744
    for HEADING in $(egrep "$PATTERN" $FILE \
89c744
        | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
89c744
89c744
        # Define previous counter value using current counter
89c744
        # value as reference.
89c744
        if [[ $COUNT -ne 0 ]];then
89c744
            PREVCOUNT=$(($COUNT-1))
89c744
        fi
89c744
89c744
        # Define initial heading information.
89c744
        FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g")
f82437
        TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\4!")
89c744
        MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!')
f82437
        OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!")
f82437
        CLASS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!")
89c744
        LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!")
89c744
        PARENT[$COUNT]=${LEVEL[$PREVCOUNT]}
89c744
89c744
        # Transform heading information using initial heading
89c744
        # information as reference.
89c744
        if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
89c744
            OPTNS[$COUNT]=''
89c744
        elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then 
89c744
            OPTNS[$COUNT]=''
89c744
        elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
89c744
            OPTNS[$COUNT]=''
89c744
        fi
ffdd74
89c744
        # Build final html heading structure.
f82437
        FINAL[$COUNT]='<h'${LEVEL[$COUNT]}${CLASS[$COUNT]}'>'${OPTNS[$COUNT]}${TITLE[$COUNT]}'</h'${LEVEL[$COUNT]}'>'
216869
89c744
        # Build html heading link structure. These links are used by
89c744
        # the table of contents later.
89c744
        LINK[$COUNT]=''${TITLE[$COUNT]}''
46d906
89c744
        # Build table of contents entry with numerical
89c744
        # identifications. The numerical identification is what we use
89c744
        # to determine the correct position of each heading link on
89c744
        # the table of content.
89c744
        TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}"
ffdd74
89c744
        # Update heading information inside the current file being
89c744
        # processed. Use the first and final heading information.
89c744
        sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE
ffdd74
89c744
        # Increase heading counter.
89c744
        COUNT=$(($COUNT + 1))
8aa7eb
ffdd74
    done
ffdd74
89c744
    # Build the table of contents using heading numerical
89c744
    # identifications as reference. The numerical identification
7e81b4
    # describes the order of headings in one xhtml file. This
89c744
    # information is processed by awk to make the appropriate
89c744
    # replacements. Finnally, the result is stored in the TOC
89c744
    # variable.
89c744
    TOC=$(echo '
'
f82437
        echo "

`gettext "Table of contents"`

"
89c744
        for TOCENTRY in "${TOCENTRIES[@]}";do
89c744
            echo $TOCENTRY
89c744
        done \
7e81b4
            | awk -f ${TOC_CONFIG})
89c744
89c744
    # Update table of contents inside the current file being
89c744
    # processed.
89c744
    sed -i -r '/
[^<\/div].*<\/div>/c'"$(echo -e $TOC)" $FILE
89c744
89c744
    # Clean up variables to receive the next file.
89c744
    unset FINAL
89c744
    unset TITLE
89c744
    unset MD5SM
89c744
    unset OPTNS
f82437
    unset CLASS
89c744
    unset LEVEL
89c744
    unset PARENT
89c744
    unset TOCENTRIES
89c744
    unset LINK
89c744
9f1608
}