Blame Scripts/Bash/Functions/Html/html_updateHeadings.sh

9f1608
#!/bin/bash
9f1608
#
9f1608
# html_updateHeadings.sh -- This function transforms html headings to
ad45de
# to make them accessible (e.g., through a table of contents).
9f1608
#
ffdd74
#   - In order for this function to work, you need to put headings in
ad45de
#     just one line and they must have the following formats:
ffdd74
#
ad45de
#       

Title

ad45de
#       

Title

ad45de
#       

Title

ffdd74
#
ad45de
#     In the above examples, h1 alternates from h1 to h6. Closing tag
a9264e
#     must be present and match the one opentaging. The value of 
ad45de
#     name=""> and  options are the md5sum of page
ad45de
#     location, plus the 'head-' string, plus the heading string. If
ad45de
#     heading title or page location changes, the values of 
ad45de
#     name=""> and  options will change too.
ffdd74
#
9f1608
# Copyright (C) 2009-2010 Alain Reguera Delgado
9f1608
# 
9f1608
# This program is free software; you can redistribute it and/or modify
9f1608
# it under the terms of the GNU General Public License as published by
9f1608
# the Free Software Foundation; either version 2 of the License, or
9f1608
# (at your option) any later version.
9f1608
# 
9f1608
# This program is distributed in the hope that it will be useful, but
9f1608
# WITHOUT ANY WARRANTY; without even the implied warranty of
9f1608
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
9f1608
# General Public License for more details.
9f1608
#
9f1608
# You should have received a copy of the GNU General Public License
9f1608
# along with this program; if not, write to the Free Software
9f1608
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
9f1608
# USA.
9f1608
# 
9f1608
# ----------------------------------------------------------------------
9f1608
# $Id$
9f1608
# ----------------------------------------------------------------------
9f1608
9f1608
function html_updateHeadings {
9f1608
ffdd74
    # Define variables as local to avoid conflicts outside.
ad45de
    local COUNT=0
c49cd9
    local PREVCOUNT=0
ffdd74
    local PATTERN=''
ffdd74
    local -a FINAL
ffdd74
    local -a TITLE
ad45de
    local -a MD5SM
ad45de
    local -a OPTNS
c49cd9
    local -a LEVEL
c49cd9
    local -a PARENT
8aa7eb
    local -a TOCENTRIES
8aa7eb
    local -a LINK
ffdd74
8aa7eb
    # Define html heading regular expression pattern. Use parenthisis
8aa7eb
    # to save html option name, option value, and heading title.
8aa7eb
    PATTERN="<h([1-9])>(<a.*[^\>]>)(.*[^<])</h[1-9]>"
ffdd74
ffdd74
    for FILE in $FILES;do
ffdd74
8aa7eb
        # Verify list of html files. Are files really html files? If
ffdd74
        # they don't, continue with the next one in the list.
ffdd74
        if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then
ffdd74
            continue
ffdd74
        fi
ffdd74
ffdd74
        # Output action message.
a9264e
        cli_printMessage $FILE 'AsUpdatingLine'
c49cd9
c49cd9
        # Define list of headings to process. When building the
c49cd9
        # heading, it is required to change spaces characters from its
8aa7eb
        # current decimal output to something different (e.g., its
8aa7eb
        # \040 octal alternative). This is required because the space
c49cd9
        # character is used as egrep default field separator and
c49cd9
        # spaces can be present inside heading strings we don't want
c49cd9
        # to separate.
c49cd9
        for HEADING in $(egrep "$PATTERN" $FILE \
a9264e
            | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
c49cd9
c49cd9
            # Define previous counter value using current counter
c49cd9
            # value as reference.
c49cd9
            if [[ $COUNT -ne 0 ]];then
c49cd9
                PREVCOUNT=$(($COUNT-1))
c49cd9
            fi
ffdd74
c49cd9
            # Define initial heading information.
a9264e
            FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g")
c49cd9
            TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!")
c49cd9
            MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!')
c49cd9
            OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!")
c49cd9
            LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!")
c49cd9
            PARENT[$COUNT]=${LEVEL[$PREVCOUNT]}
c49cd9
c49cd9
            # Transform heading information using initial heading
c49cd9
            # information as reference.
8aa7eb
            if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
c49cd9
                OPTNS[$COUNT]=''
8aa7eb
            elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then 
c49cd9
                OPTNS[$COUNT]=''
8aa7eb
            elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
c49cd9
                OPTNS[$COUNT]=''
ad45de
            fi
ad45de
c49cd9
            # Build final html heading structure.
c49cd9
            FINAL[$COUNT]='<h'${LEVEL[$COUNT]}'>'${OPTNS[$COUNT]}${TITLE[$COUNT]}'</h'${LEVEL[$COUNT]}'>'
ffdd74
8aa7eb
            # Build html heading link structure. These links are used
8aa7eb
            # by the table of contents later.
c49cd9
            LINK[$COUNT]=''${TITLE[$COUNT]}''
ffdd74
c49cd9
            # Build table of contents entry with numerical
8aa7eb
            # identifications. The numerical identification is what we
8aa7eb
            # use to determine the correct position of each heading
8aa7eb
            # link on the table of content.
c49cd9
            TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}"
ffdd74
8aa7eb
            # Update heading information inside the current file being
8aa7eb
            # processed. Use the first and final heading information.
a9264e
            sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE
ad45de
c49cd9
            # Increase heading counter.
c49cd9
            COUNT=$(($COUNT + 1))
ad45de
c49cd9
        done
ad45de
8aa7eb
        # Build the table of contents using heading numerical
8aa7eb
        # identifications as reference. The numerical identification
8aa7eb
        # describes the order of headings in one html file. This
8aa7eb
        # information is processed by awk to make the appropriate
8aa7eb
        # replacements. Finnally, the result is stored in the TOC
8aa7eb
        # variable.
a9264e
        TOC=$(echo '
'
a9264e
            echo "

`gettext "Table of contents"`

"
a9264e
            for TOCENTRY in "${TOCENTRIES[@]}";do
a9264e
                echo $TOCENTRY
a9264e
            done \
a9264e
                | awk 'BEGIN {FS=":"}
a9264e
                         {
a9264e
                         if ($1 == 0 && $2 == $3) { 
a9264e
                            opentags  = "
  • "
a9264e
                            closetags = ""
a9264e
                            }
a9264e
a9264e
                         if ($1 >  0 && $2 >  $3) {
a9264e
                            opentags  = "
  • "
a9264e
                            closetags = ""
a9264e
                            }
a9264e
a9264e
                         if ($1 >  0 && $2 == $3) { 
a9264e
                            opentags  = "
  • "
  • a9264e
                                closetags = ""
    a9264e
                                }
    a9264e
    a9264e
                             if ($1 >  0 && $2 <  $3) { 
    a9264e
                                    opentags = ""
    a9264e
                                for (i = 1; i <= ($3 - $2); i++) {
    a9264e
                                    opentags  = opentags ""
    a9264e
                                    closetags = ""
    a9264e
                                    }
    a9264e
                                    opentags = opentags "
  • "
  • a9264e
                                }
    a9264e
    a9264e
                             printf "%s%s%s\n",opentags,$4,closetags
    a9264e
                             }
    a9264e
    a9264e
                         END {
    a9264e
                             if ($1 > 0 && $2 >= $3 && $3 > 1) {
    a9264e
                                for (i = 1; i <= $3; i++) {
    a9264e
                                    print ""
    a9264e
                                }
    a9264e
                             }
    a9264e
                             if ($1 > 0 && $2 >= $3 && $3 == 1) {
    a9264e
                                    print ""
    a9264e
                                    print ""
    a9264e
                             }
    a9264e
                             if ($1 > 0 && $2 < $3) {
    a9264e
                                for (i = 1; i <= $2; i++) {
    a9264e
                                    print ""
    a9264e
                                }
    a9264e
                             }
    a9264e
                             print ""
    a9264e
                             }')
    a9264e
    a9264e
    8aa7eb
            # Update table of contents inside the current file being
    8aa7eb
            # processed.
    a9264e
            sed -i -r '/
    (.*)<\/div>/c'"$(echo -e $TOC)" $FILE
    a9264e
    a9264e
            # Reset counters.
    a9264e
            COUNT=0
    a9264e
            PREVCOUNT=0
    ffdd74
    8aa7eb
            # Clean up variables to receive the next file.
    8aa7eb
            unset FINAL
    8aa7eb
            unset TITLE
    8aa7eb
            unset MD5SM
    8aa7eb
            unset OPTNS
    8aa7eb
            unset LEVEL
    8aa7eb
            unset PARENT
    8aa7eb
            unset TOCENTRIES
    8aa7eb
            unset LINK
    8aa7eb
    ffdd74
        done
    ffdd74
    9f1608
    }