Blame Scripts/Bash/Functions/Html/html_updateHeadings.sh

9f1608
#!/bin/bash
9f1608
#
9f1608
# html_updateHeadings.sh -- This function transforms html headings to
ad45de
# to make them accessible (e.g., through a table of contents).
9f1608
#
7cd8e9
# - In order for this function to work, you need to put headings in
7cd8e9
# just one line and they must have the following formats:
ffdd74
#
7cd8e9
# 

Title

7cd8e9
# 

Title

7cd8e9
# 

Title

ffdd74
#
7cd8e9
# In the above examples, h1 alternates from h1 to h6. Closing tag
7cd8e9
# must be present and match the one opentaging. The value of 
7cd8e9
# name=""> and  options are the md5sum of page
7cd8e9
# location, plus the 'head-' string, plus the heading string. If
7cd8e9
# heading title or page location changes, the values of 
7cd8e9
# name=""> and  options will change too.
ffdd74
#
7cd8e9
# Copyright (C) 2009, 2010 Alain Reguera Delgado
9f1608
# 
7cd8e9
# This program is free software; you can redistribute it and/or
7cd8e9
# modify it under the terms of the GNU General Public License as
7cd8e9
# published by the Free Software Foundation; either version 2 of the
7cd8e9
# License, or (at your option) any later version.
9f1608
# 
9f1608
# This program is distributed in the hope that it will be useful, but
9f1608
# WITHOUT ANY WARRANTY; without even the implied warranty of
9f1608
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
9f1608
# General Public License for more details.
9f1608
#
9f1608
# You should have received a copy of the GNU General Public License
9f1608
# along with this program; if not, write to the Free Software
9f1608
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
9f1608
# USA.
9f1608
# 
9f1608
# ----------------------------------------------------------------------
9f1608
# $Id$
9f1608
# ----------------------------------------------------------------------
9f1608
9f1608
function html_updateHeadings {
9f1608
ffdd74
    # Define variables as local to avoid conflicts outside.
ad45de
    local COUNT=0
c49cd9
    local PREVCOUNT=0
ffdd74
    local PATTERN=''
ffdd74
    local -a FINAL
ffdd74
    local -a TITLE
ad45de
    local -a MD5SM
ad45de
    local -a OPTNS
c49cd9
    local -a LEVEL
c49cd9
    local -a PARENT
8aa7eb
    local -a TOCENTRIES
8aa7eb
    local -a LINK
ffdd74
8aa7eb
    # Define html heading regular expression pattern. Use parenthisis
79e7c6
    # to save html action name, action value, and heading title.
8aa7eb
    PATTERN="<h([1-9])>(<a.*[^\>]>)(.*[^<])</h[1-9]>"
ffdd74
46d906
    # Define short options we want to support.
46d906
    local ARGSS=""
46d906
46d906
    # Define long options we want to support.
46d906
    local ARGSL="filter:"
46d906
46d906
    # Parse arguments using getopt(1) command parser.
46d906
    cli_doParseArguments
46d906
46d906
    # Reset positional parameters using output from (getopt) argument
46d906
    # parser.
46d906
    eval set -- "$ARGUMENTS"
46d906
46d906
    # Define action to take for each option passed.
46d906
    while true; do
46d906
        case "$1" in
46d906
            --filter )
46d906
               REGEX="$2" 
46d906
               shift 2
46d906
               ;;
46d906
            * )
46d906
                break
46d906
        esac
46d906
    done
46d906
d95164
    # Re-define regular expression to match html files only.
46d906
    REGEX=$(echo "${REGEX}\.(html|htm)")
46d906
46d906
    # Define list of files to process.
46d906
    cli_getFilesList
46d906
46d906
    # Process list of files.
ffdd74
    for FILE in $FILES;do
ffdd74
8aa7eb
        # Verify list of html files. Are files really html files? If
ffdd74
        # they don't, continue with the next one in the list.
ffdd74
        if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then
ffdd74
            continue
ffdd74
        fi
ffdd74
ffdd74
        # Output action message.
a9264e
        cli_printMessage $FILE 'AsUpdatingLine'
c49cd9
c49cd9
        # Define list of headings to process. When building the
c49cd9
        # heading, it is required to change spaces characters from its
8aa7eb
        # current decimal output to something different (e.g., its
8aa7eb
        # \040 octal alternative). This is required because the space
c49cd9
        # character is used as egrep default field separator and
c49cd9
        # spaces can be present inside heading strings we don't want
c49cd9
        # to separate.
c49cd9
        for HEADING in $(egrep "$PATTERN" $FILE \
a9264e
            | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
c49cd9
c49cd9
            # Define previous counter value using current counter
c49cd9
            # value as reference.
c49cd9
            if [[ $COUNT -ne 0 ]];then
c49cd9
                PREVCOUNT=$(($COUNT-1))
c49cd9
            fi
ffdd74
c49cd9
            # Define initial heading information.
a9264e
            FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g")
c49cd9
            TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!")
c49cd9
            MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!')
c49cd9
            OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!")
c49cd9
            LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!")
c49cd9
            PARENT[$COUNT]=${LEVEL[$PREVCOUNT]}
c49cd9
c49cd9
            # Transform heading information using initial heading
c49cd9
            # information as reference.
8aa7eb
            if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
c49cd9
                OPTNS[$COUNT]=''
8aa7eb
            elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then 
c49cd9
                OPTNS[$COUNT]=''
8aa7eb
            elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
c49cd9
                OPTNS[$COUNT]=''
ad45de
            fi
ad45de
c49cd9
            # Build final html heading structure.
c49cd9
            FINAL[$COUNT]='<h'${LEVEL[$COUNT]}'>'${OPTNS[$COUNT]}${TITLE[$COUNT]}'</h'${LEVEL[$COUNT]}'>'
ffdd74
8aa7eb
            # Build html heading link structure. These links are used
8aa7eb
            # by the table of contents later.
c49cd9
            LINK[$COUNT]=''${TITLE[$COUNT]}''
ffdd74
c49cd9
            # Build table of contents entry with numerical
8aa7eb
            # identifications. The numerical identification is what we
8aa7eb
            # use to determine the correct position of each heading
8aa7eb
            # link on the table of content.
c49cd9
            TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}"
ffdd74
8aa7eb
            # Update heading information inside the current file being
8aa7eb
            # processed. Use the first and final heading information.
a9264e
            sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE
ad45de
c49cd9
            # Increase heading counter.
c49cd9
            COUNT=$(($COUNT + 1))
ad45de
c49cd9
        done
ad45de
8aa7eb
        # Build the table of contents using heading numerical
8aa7eb
        # identifications as reference. The numerical identification
8aa7eb
        # describes the order of headings in one html file. This
8aa7eb
        # information is processed by awk to make the appropriate
8aa7eb
        # replacements. Finnally, the result is stored in the TOC
8aa7eb
        # variable.
a9264e
        TOC=$(echo '
'
a9264e
            echo "

`gettext "Table of contents"`

"
a9264e
            for TOCENTRY in "${TOCENTRIES[@]}";do
a9264e
                echo $TOCENTRY
a9264e
            done \
0e71cb
                | awk -f /home/centos/artwork/trunk/Scripts/Bash/Functions/Html/Config/output_forHeadingsToc.awk)
a9264e
8aa7eb
        # Update table of contents inside the current file being
8aa7eb
        # processed.
a9264e
        sed -i -r '/
(.*)<\/div>/c'"$(echo -e $TOC)" $FILE
a9264e
a9264e
        # Reset counters.
a9264e
        COUNT=0
a9264e
        PREVCOUNT=0
ffdd74
8aa7eb
        # Clean up variables to receive the next file.
8aa7eb
        unset FINAL
8aa7eb
        unset TITLE
8aa7eb
        unset MD5SM
8aa7eb
        unset OPTNS
8aa7eb
        unset LEVEL
8aa7eb
        unset PARENT
8aa7eb
        unset TOCENTRIES
8aa7eb
        unset LINK
8aa7eb
ffdd74
    done
ffdd74
9f1608
}