From 89c744477e95efac93960f0e2896899c36101832 Mon Sep 17 00:00:00 2001 From: Alain Reguera Delgado Date: Mar 30 2011 00:48:44 +0000 Subject: Update Tuneup/tuneup_doXhtmlHeadings.sh. --- diff --git a/Scripts/Functions/Tuneup/tuneup_doXhtmlHeadings.sh b/Scripts/Functions/Tuneup/tuneup_doXhtmlHeadings.sh index b66e363..3b8a607 100644 --- a/Scripts/Functions/Tuneup/tuneup_doXhtmlHeadings.sh +++ b/Scripts/Functions/Tuneup/tuneup_doXhtmlHeadings.sh @@ -1,21 +1,20 @@ #!/bin/bash # # html_updateHeadings.sh -- This function transforms html headings to -# to make them accessible (e.g., through a table of contents). -# -# - In order for this function to work, you need to put headings in -# just one line and they must have the following formats: +# to make them accessible (e.g., through a table of contents). In +# order for this function to work, you need to put headings in just +# one line and they must have one of the following formats: # #

Title

#

Title

#

Title

# -# In the above examples, h1 alternates from h1 to h6. Closing tag -# must be present and match the one opentaging. The value of and options are the md5sum of page -# location, plus the 'head-' string, plus the heading string. If -# heading title or page location changes, the values of and options will change too. +# In the above examples, h1 can vary from h1 to h6. Closing tag must +# be present and match the openning tag. The value of and +# options are the md5sum of page location, plus the +# 'head-' string, plus the heading string. If heading title or page +# location changes, the values of and options +# will change too. # # Copyright (C) 2009-2011 Alain Reguera Delgado # @@ -38,12 +37,10 @@ # $Id$ # ---------------------------------------------------------------------- -function html_updateHeadings { +function tuneup_doXhtmlHeadings { # Define variables as local to avoid conflicts outside. local COUNT=0 - local FILE='' - local FILES='' local PREVCOUNT=0 local PATTERN='' local -a FINAL @@ -57,113 +54,101 @@ function html_updateHeadings { # Define html heading regular expression pattern. Use parenthisis # to save html action name, action value, and heading title. - PATTERN="(]>)(.*[^<])" + PATTERN="(]>)(.*[^<])" + + # Verify list of html files. Are files really html files? If they + # don't, continue with the next one in the list. + if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then + continue + fi + + # Output action message. + cli_printMessage $FILE 'AsUpdatingLine' + + # Define list of headings to process. When building the heading, + # it is required to change spaces characters from its current + # decimal output to something different (e.g., its \040 octal + # alternative). This is required because the space character is + # used as egrep default field separator and spaces can be present + # inside heading strings we don't want to separate. + for HEADING in $(egrep "$PATTERN" $FILE \ + | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do + + # Define previous counter value using current counter + # value as reference. + if [[ $COUNT -ne 0 ]];then + PREVCOUNT=$(($COUNT-1)) + fi + + # Define initial heading information. + FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g") + TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!") + MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!') + OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!") + LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!") + PARENT[$COUNT]=${LEVEL[$PREVCOUNT]} + + # Transform heading information using initial heading + # information as reference. + if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then + OPTNS[$COUNT]='' + elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then + OPTNS[$COUNT]='' + elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then + OPTNS[$COUNT]='' + fi - # Define list of files to process. - FILES=$(cli_getFilesList "$ACTIONVAL" "${FLAG_FILTER}.*\.(xhtml|html|htm)") + # Build final html heading structure. + FINAL[$COUNT]=''${OPTNS[$COUNT]}${TITLE[$COUNT]}'' - # Set action preamble. - cli_printActionPreamble "${FILES}" '' '' + # Build html heading link structure. These links are used by + # the table of contents later. + LINK[$COUNT]=''${TITLE[$COUNT]}'' - # Process list of files. - for FILE in $FILES;do + # Build table of contents entry with numerical + # identifications. The numerical identification is what we use + # to determine the correct position of each heading link on + # the table of content. + TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}" - # Verify list of html files. Are files really html files? If - # they don't, continue with the next one in the list. - if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then - continue - fi + # Update heading information inside the current file being + # processed. Use the first and final heading information. + sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE - # Output action message. - cli_printMessage $FILE 'AsUpdatingLine' - - # Define list of headings to process. When building the - # heading, it is required to change spaces characters from its - # current decimal output to something different (e.g., its - # \040 octal alternative). This is required because the space - # character is used as egrep default field separator and - # spaces can be present inside heading strings we don't want - # to separate. - for HEADING in $(egrep "$PATTERN" $FILE \ - | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do - - # Define previous counter value using current counter - # value as reference. - if [[ $COUNT -ne 0 ]];then - PREVCOUNT=$(($COUNT-1)) - fi - - # Define initial heading information. - FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g") - TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!") - MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!') - OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!") - LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!") - PARENT[$COUNT]=${LEVEL[$PREVCOUNT]} - - # Transform heading information using initial heading - # information as reference. - if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then - OPTNS[$COUNT]='' - elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then - OPTNS[$COUNT]='' - elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then - OPTNS[$COUNT]='' - fi - - # Build final html heading structure. - FINAL[$COUNT]=''${OPTNS[$COUNT]}${TITLE[$COUNT]}'' - - # Build html heading link structure. These links are used - # by the table of contents later. - LINK[$COUNT]=''${TITLE[$COUNT]}'' - - # Build table of contents entry with numerical - # identifications. The numerical identification is what we - # use to determine the correct position of each heading - # link on the table of content. - TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}" - - # Update heading information inside the current file being - # processed. Use the first and final heading information. - sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE - - # Increase heading counter. - COUNT=$(($COUNT + 1)) - - done - - # Build the table of contents using heading numerical - # identifications as reference. The numerical identification - # describes the order of headings in one html file. This - # information is processed by awk to make the appropriate - # replacements. Finnally, the result is stored in the TOC - # variable. - TOC=$(echo '
' - echo "

`gettext "Table of contents"`

" - for TOCENTRY in "${TOCENTRIES[@]}";do - echo $TOCENTRY - done \ - | awk -f ${CLI_BASEDIR}/Functions/Html/Config/output_forHeadingsToc.awk) - - # Update table of contents inside the current file being - # processed. - sed -i -r '/
(.*)<\/div>/c'"$(echo -e $TOC)" $FILE - - # Reset counters. - COUNT=0 - PREVCOUNT=0 - - # Clean up variables to receive the next file. - unset FINAL - unset TITLE - unset MD5SM - unset OPTNS - unset LEVEL - unset PARENT - unset TOCENTRIES - unset LINK + # Increase heading counter. + COUNT=$(($COUNT + 1)) done + # Build the table of contents using heading numerical + # identifications as reference. The numerical identification + # describes the order of headings in one html file. This + # information is processed by awk to make the appropriate + # replacements. Finnally, the result is stored in the TOC + # variable. + TOC=$(echo '
' + echo "

`gettext "Table of contents"`

" + for TOCENTRY in "${TOCENTRIES[@]}";do + echo $TOCENTRY + done \ + | awk -f ${FUNCCONFIG}/output_forHeadingsToc.awk) + + # Update table of contents inside the current file being + # processed. + sed -i -r '/
[^<\/div].*<\/div>/c'"$(echo -e $TOC)" $FILE + + # Reset counters. + COUNT=0 + PREVCOUNT=0 + + # Clean up variables to receive the next file. + unset FINAL + unset TITLE + unset MD5SM + unset OPTNS + unset LEVEL + unset PARENT + unset TOCENTRIES + unset LINK + }