8d1098 Add identity_renderHtmlPlaintext.sh.

Authored and Committed by areguera 13 years ago
Scripts/Bash/Cli/Functions/Identity/identity_renderHtmlPlaintext.sh ADDED
@@ -0,0 +1,67 @@
1
+ #!/bin/bash
2
+ #
3
+ # identity_renderHtmlPlaintext.sh -- This function takes one HTML file
4
+ # and produces one plain-text file (i.e., without markup inside).
5
+ #
6
+ # Copyright (C) 2009-2011 Alain Reguera Delgado
7
+ #
8
+ # This program is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU General Public License as
10
+ # published by the Free Software Foundation; either version 2 of the
11
+ # License, or (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful, but
14
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21
+ # USA.
22
+ #
23
+ # ----------------------------------------------------------------------
24
+ # $Id$
25
+ # ----------------------------------------------------------------------
26
+
27
+ function identity_renderHtmlPlaintext {
28
+
29
+ # Verify existence of HTML file.
30
+ cli_checkFiles ${FILE}.xhtml 'f'
31
+
32
+ local COMMAND=''
33
+ local OPTIONS=''
34
+
35
+ # Define the command path to text-based web browser and options
36
+ # used to produce plain-text files. Most of these programs have a
37
+ # dump option that print formatted plain-text versions of given
38
+ # HTML file to stdout.
39
+ if [[ -x '/usr/bin/lynx' ]];then
40
+ COMMAND='/usr/bin/lynx'
41
+ OPTIONS='-force_html -nolist -width 70 -dump'
42
+ elif [[ -x '/usr/bin/elinks' ]];then
43
+ COMMAND='/usr/bin/elinks'
44
+ OPTIONS='-force_html -no-numbering -no-references -width 70 -dump'
45
+ elif [[ -x '/usr/bin/w3m' ]];then
46
+ COMMAND='/usr/bin/w3m'
47
+ OPTIONS='-dump'
48
+ fi
49
+
50
+ if [[ $COMMAND != '' ]];then
51
+
52
+ # Print action message.
53
+ if [[ -f ${FILE}.txt ]];then
54
+ cli_printMessage "${FILE}.txt" 'AsUpdatingLine'
55
+ else
56
+ cli_printMessage "${FILE}.txt" 'AsCreatingLine'
57
+ fi
58
+
59
+ # Convert from HTML to plain-text without markup.
60
+ ${COMMAND} ${OPTIONS} ${FILE}.xhtml > ${FILE}.txt
61
+
62
+ else
63
+ cli_printMessage "`gettext "No way to convert from HTML to plain-text found."`" 'AsErrorLine'
64
+ cli_printMessage "$(caller)" 'AsToKnowMoreLine'
65
+ fi
66
+
67
+ }