|
|
9f1608 |
#!/bin/bash
|
|
|
9f1608 |
#
|
|
|
9f1608 |
# html_updateHeadings.sh -- This function transforms html headings to
|
|
|
ad45de |
# to make them accessible (e.g., through a table of contents).
|
|
|
9f1608 |
#
|
|
|
7cd8e9 |
# - In order for this function to work, you need to put headings in
|
|
|
7cd8e9 |
# just one line and they must have the following formats:
|
|
|
ffdd74 |
#
|
|
|
7cd8e9 |
#
|
|
|
7cd8e9 |
#
|
|
|
7cd8e9 |
#
|
|
|
ffdd74 |
#
|
|
|
7cd8e9 |
# In the above examples, h1 alternates from h1 to h6. Closing tag
|
|
|
7cd8e9 |
# must be present and match the one opentaging. The value of
|
|
|
7cd8e9 |
# name=""> and options are the md5sum of page
|
|
|
7cd8e9 |
# location, plus the 'head-' string, plus the heading string. If
|
|
|
7cd8e9 |
# heading title or page location changes, the values of
|
|
|
7cd8e9 |
# name=""> and options will change too.
|
|
|
ffdd74 |
#
|
|
|
7cd8e9 |
# Copyright (C) 2009, 2010 Alain Reguera Delgado
|
|
|
9f1608 |
#
|
|
|
7cd8e9 |
# This program is free software; you can redistribute it and/or
|
|
|
7cd8e9 |
# modify it under the terms of the GNU General Public License as
|
|
|
7cd8e9 |
# published by the Free Software Foundation; either version 2 of the
|
|
|
7cd8e9 |
# License, or (at your option) any later version.
|
|
|
9f1608 |
#
|
|
|
9f1608 |
# This program is distributed in the hope that it will be useful, but
|
|
|
9f1608 |
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
9f1608 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
9f1608 |
# General Public License for more details.
|
|
|
9f1608 |
#
|
|
|
9f1608 |
# You should have received a copy of the GNU General Public License
|
|
|
9f1608 |
# along with this program; if not, write to the Free Software
|
|
|
9f1608 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
|
|
9f1608 |
# USA.
|
|
|
9f1608 |
#
|
|
|
9f1608 |
# ----------------------------------------------------------------------
|
|
|
9f1608 |
# $Id$
|
|
|
9f1608 |
# ----------------------------------------------------------------------
|
|
|
9f1608 |
|
|
|
9f1608 |
function html_updateHeadings {
|
|
|
9f1608 |
|
|
|
ffdd74 |
# Define variables as local to avoid conflicts outside.
|
|
|
ad45de |
local COUNT=0
|
|
|
c49cd9 |
local PREVCOUNT=0
|
|
|
ffdd74 |
local PATTERN=''
|
|
|
ffdd74 |
local -a FINAL
|
|
|
ffdd74 |
local -a TITLE
|
|
|
ad45de |
local -a MD5SM
|
|
|
ad45de |
local -a OPTNS
|
|
|
c49cd9 |
local -a LEVEL
|
|
|
c49cd9 |
local -a PARENT
|
|
|
8aa7eb |
local -a TOCENTRIES
|
|
|
8aa7eb |
local -a LINK
|
|
|
ffdd74 |
|
|
|
8aa7eb |
# Define html heading regular expression pattern. Use parenthisis
|
|
|
79e7c6 |
# to save html action name, action value, and heading title.
|
|
|
8aa7eb |
PATTERN="<h([1-9])>(<a.*[^\>]>)(.*[^<])</h[1-9]>"
|
|
|
ffdd74 |
|
|
|
46d906 |
# Define short options we want to support.
|
|
|
46d906 |
local ARGSS=""
|
|
|
46d906 |
|
|
|
46d906 |
# Define long options we want to support.
|
|
|
46d906 |
local ARGSL="filter:"
|
|
|
46d906 |
|
|
|
46d906 |
# Parse arguments using getopt(1) command parser.
|
|
|
46d906 |
cli_doParseArguments
|
|
|
46d906 |
|
|
|
46d906 |
# Reset positional parameters using output from (getopt) argument
|
|
|
46d906 |
# parser.
|
|
|
46d906 |
eval set -- "$ARGUMENTS"
|
|
|
46d906 |
|
|
|
46d906 |
# Define action to take for each option passed.
|
|
|
46d906 |
while true; do
|
|
|
46d906 |
case "$1" in
|
|
|
46d906 |
--filter )
|
|
|
46d906 |
REGEX="$2"
|
|
|
46d906 |
shift 2
|
|
|
46d906 |
;;
|
|
|
46d906 |
* )
|
|
|
46d906 |
break
|
|
|
46d906 |
esac
|
|
|
46d906 |
done
|
|
|
46d906 |
|
|
|
d95164 |
# Re-define regular expression to match html files only.
|
|
|
46d906 |
REGEX=$(echo "${REGEX}\.(html|htm)")
|
|
|
46d906 |
|
|
|
46d906 |
# Define list of files to process.
|
|
|
46d906 |
cli_getFilesList
|
|
|
46d906 |
|
|
|
46d906 |
# Process list of files.
|
|
|
ffdd74 |
for FILE in $FILES;do
|
|
|
ffdd74 |
|
|
|
8aa7eb |
# Verify list of html files. Are files really html files? If
|
|
|
ffdd74 |
# they don't, continue with the next one in the list.
|
|
|
ffdd74 |
if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then
|
|
|
ffdd74 |
continue
|
|
|
ffdd74 |
fi
|
|
|
ffdd74 |
|
|
|
ffdd74 |
# Output action message.
|
|
|
a9264e |
cli_printMessage $FILE 'AsUpdatingLine'
|
|
|
c49cd9 |
|
|
|
c49cd9 |
# Define list of headings to process. When building the
|
|
|
c49cd9 |
# heading, it is required to change spaces characters from its
|
|
|
8aa7eb |
# current decimal output to something different (e.g., its
|
|
|
8aa7eb |
# \040 octal alternative). This is required because the space
|
|
|
c49cd9 |
# character is used as egrep default field separator and
|
|
|
c49cd9 |
# spaces can be present inside heading strings we don't want
|
|
|
c49cd9 |
# to separate.
|
|
|
c49cd9 |
for HEADING in $(egrep "$PATTERN" $FILE \
|
|
|
a9264e |
| sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
|
|
|
c49cd9 |
|
|
|
c49cd9 |
# Define previous counter value using current counter
|
|
|
c49cd9 |
# value as reference.
|
|
|
c49cd9 |
if [[ $COUNT -ne 0 ]];then
|
|
|
c49cd9 |
PREVCOUNT=$(($COUNT-1))
|
|
|
c49cd9 |
fi
|
|
|
ffdd74 |
|
|
|
c49cd9 |
# Define initial heading information.
|
|
|
a9264e |
FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g")
|
|
|
c49cd9 |
TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!")
|
|
|
c49cd9 |
MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!')
|
|
|
c49cd9 |
OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!")
|
|
|
c49cd9 |
LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!")
|
|
|
c49cd9 |
PARENT[$COUNT]=${LEVEL[$PREVCOUNT]}
|
|
|
c49cd9 |
|
|
|
c49cd9 |
# Transform heading information using initial heading
|
|
|
c49cd9 |
# information as reference.
|
|
|
8aa7eb |
if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
|
|
|
c49cd9 |
OPTNS[$COUNT]=''
|
|
|
8aa7eb |
elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
|
|
|
c49cd9 |
OPTNS[$COUNT]=''
|
|
|
8aa7eb |
elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
|
|
|
c49cd9 |
OPTNS[$COUNT]=''
|
|
|
ad45de |
fi
|
|
|
ad45de |
|
|
|
c49cd9 |
# Build final html heading structure.
|
|
|
c49cd9 |
FINAL[$COUNT]='<h'${LEVEL[$COUNT]}'>'${OPTNS[$COUNT]}${TITLE[$COUNT]}'</h'${LEVEL[$COUNT]}'>'
|
|
|
ffdd74 |
|
|
|
8aa7eb |
# Build html heading link structure. These links are used
|
|
|
8aa7eb |
# by the table of contents later.
|
|
|
c49cd9 |
LINK[$COUNT]=''${TITLE[$COUNT]}''
|
|
|
ffdd74 |
|
|
|
c49cd9 |
# Build table of contents entry with numerical
|
|
|
8aa7eb |
# identifications. The numerical identification is what we
|
|
|
8aa7eb |
# use to determine the correct position of each heading
|
|
|
8aa7eb |
# link on the table of content.
|
|
|
c49cd9 |
TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}"
|
|
|
ffdd74 |
|
|
|
8aa7eb |
# Update heading information inside the current file being
|
|
|
8aa7eb |
# processed. Use the first and final heading information.
|
|
|
a9264e |
sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE
|
|
|
ad45de |
|
|
|
c49cd9 |
# Increase heading counter.
|
|
|
c49cd9 |
COUNT=$(($COUNT + 1))
|
|
|
ad45de |
|
|
|
c49cd9 |
done
|
|
|
ad45de |
|
|
|
8aa7eb |
# Build the table of contents using heading numerical
|
|
|
8aa7eb |
# identifications as reference. The numerical identification
|
|
|
8aa7eb |
# describes the order of headings in one html file. This
|
|
|
8aa7eb |
# information is processed by awk to make the appropriate
|
|
|
8aa7eb |
# replacements. Finnally, the result is stored in the TOC
|
|
|
8aa7eb |
# variable.
|
|
|
a9264e |
TOC=$(echo ''
|
|
|
a9264e |
echo "`gettext "Table of contents"`"
|
|
|
a9264e |
for TOCENTRY in "${TOCENTRIES[@]}";do
|
|
|
a9264e |
echo $TOCENTRY
|
|
|
a9264e |
done \
|
|
|
0e71cb |
| awk -f /home/centos/artwork/trunk/Scripts/Bash/Functions/Html/Config/output_forHeadingsToc.awk)
|
|
|
a9264e |
|
|
|
8aa7eb |
# Update table of contents inside the current file being
|
|
|
8aa7eb |
# processed.
|
|
|
a9264e |
sed -i -r '/(.*)<\/div>/c'"$(echo -e $TOC)" $FILE
|
|
|
a9264e |
|
|
|
a9264e |
# Reset counters.
|
|
|
a9264e |
COUNT=0
|
|
|
a9264e |
PREVCOUNT=0
|
|
|
ffdd74 |
|
|
|
8aa7eb |
# Clean up variables to receive the next file.
|
|
|
8aa7eb |
unset FINAL
|
|
|
8aa7eb |
unset TITLE
|
|
|
8aa7eb |
unset MD5SM
|
|
|
8aa7eb |
unset OPTNS
|
|
|
8aa7eb |
unset LEVEL
|
|
|
8aa7eb |
unset PARENT
|
|
|
8aa7eb |
unset TOCENTRIES
|
|
|
8aa7eb |
unset LINK
|
|
|
8aa7eb |
|
|
|
ffdd74 |
done
|
|
|
ffdd74 |
|
|
|
9f1608 |
}
|