update tags with new web references
[populate.git] / populate.sh
1 #!/bin/bash
2 # set -x
3 # export PS4='+${BASH_SOURCE}:${LINENO}:${FUNCNAME[0]}: '
4 #
5 #   populate.sh
6 #
7 #   Copyright (C) 2007 nastasi@alternativeoutput.it
8 #
9 #  This program is free software; you can redistribute it and/or modify
10 #  it under the terms of the GNU General Public License as published by
11 #  the Free Software Foundation; either version 2 of the License, or
12 #  (at your option) any later version.
13 #
14 #  This program is distributed in the hope that it will be useful, but
15 #  WITHOUT ANY WARRANTY; without even the implied warranty of
16 #  MERCHANTABLILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 #  General Public License for more details. You should have received a
18 #  copy of the GNU General Public License along with this program; if
19 #  not, write to the Free Software Foundation, Inc, 59 Temple Place -
20 #  Suite 330, Boston, MA 02111-1307, USA.
21 #
22
23 #
24 # Description of tag behaviors
25 # ----------------------------
26 #
27 #                         |                            |
28 #       Template Tag      |         Assoc. Var         |  Description
29 # ------------------------+----------------------------+-------------------              
30 #   #T_<tagname>#         | $T_<tagname>               |  simple tag
31 #   ##T_<blkname>:start## | $B_<tagname>               |  block tag begin
32 #        . . . . .        |                            |
33 #      #T_<tagname>#      | $T_<blkname>_<tagname>     |  tag inside a block
34 #      #T_<tagname>_X#    | $T_<blkname>_<tagname>_<#> |  indexed tag inside a block
35 #                         |                            |  (the first in the alphab. order must
36 #                         |                            |   exists to enable the entire block)
37 #        . . . . .        |                            |
38 #   ##T_<blkname>:stop##  |                            |  block tag finish
39 #
40
41 # TODO
42 #
43 #  Aggiungere la filtratura per tipo di output
44 #
45 #  loop formati
46 #+   loop lingue
47 #+     loop for recursion (general)
48 #+       get blocchi dal template
49 #+       sost TAG nei blocchi con le VAR
50 #+           loop for recursion (for each row of each block)
51 #+             per ogni set di var per quel blocco (flat o con indice)
52 #+             fino a quando la prima con indice non e' stringa vuota
53 #+           fine loop for recursion (for each row of each block)
54 #+       sostituire i TAG nel template con i blocchi
55 #+       sostituire i TAG nel template con le VAR
56 #
57 #+       produrre il file in output
58 #+     fine loop for recursion (general)
59 #+   fine loop lingue
60 #  fine loop formati
61 #
62 #
63
64 #  GLOBAL VARS
65 # debug=1
66 debug=""
67 verbose=""
68 NEWLI="#aCaPo#"
69
70 TMPFILE="/tmp/populate1_$$.tmp"
71 TMP2FILE="/tmp/populate2_$$.tmp"
72 TMP3FILE="/tmp/populate3_$$.tmp"
73 TMP4FILE="/tmp/populate4_$$.tmp"
74
75
76 #
77 #  FUNCTIONS
78 #
79 function usage() {
80     echo "Usage: $1 [-v] [-d] [-h] data-file"
81     echo "  -v     Verbose enable."
82     echo "  -d     Debug enable."
83     echo "  -h     This help."
84     exit 0
85 }
86
87 function vars_get() {
88     # echo "vars_get start"
89     sed -n 's/\(#T_[A-Z_]\+#\)/\n\1\n/gp' | grep '^#T_' | sort | uniq | sed 's/#\(T_[A-Z_]\+\)#/\1/g'
90     # echo "vars_get stop"
91     return 0
92 }
93
94 function unsedify() {
95     
96     (
97         if [ $# -eq 1 ]; then
98             echo "$1" 
99         else
100             cat
101         fi
102     ) | sed 's/\(["\\\/\$&]\)/\\\1/g'
103
104     return 0
105 }
106
107 function formatify() {
108     (
109         if [ $# -eq 2 ]; then
110             echo "$2" 
111         else
112             cat
113         fi
114     ) | (
115         if [ "$1" = "tex" ]; then
116             sed 's/&/\\&/g;s/à/\\`a/g;s/è/\\`e/g;s/ì/\\`i/g;s/ò/\\`o/g;s/ù/\\`u/g;s/áéíóú/\\'"'"'a/g;
117                  s/é/\\'"'"'e/g;s/í/\\'"'"'i/g;s/ó/\\'"'"'o/g;s/ú/\\'"'"'u/g;s/°/\$\^\\circ\$/g;
118                  s/<i>/\\textit{/g;s/<b>/\\textbf{/g;s/<\/[ib]>/}/g;
119                  s/<url>/\n<url>\n/g;s/<\/url>/\n<\/url>\n/g;' | \
120             sed '/^<url>$/,/^<\/url>$/ { s/_/\\_/g; };' | tee -a loggone.txt | tr -d '\n' | \
121             sed 's/<url>/URL: \\textit\{/g;s/<\/url>/\}/g;
122                  s/“/``/g;s/”/'"''"'/g;s/<lang:tex>//g;s/<\/lang:tex>//g;s/<lang:[^>]*>.*<\/lang:[^>]*>//g'
123         elif [ "$1" = "htm" ]; then
124             sed "s/&/&amp;/g;s/${NEWLI}${NEWLI}/<br>${NEWLI}/g;"'
125                  s/<url>\([^<]*\)<\/url>/<a href="\1">\1<\/a>/g;
126                  s/<lang:htm>//g;s/<\/lang:htm>//g;s/<lang:[^>]*>.*<\/lang:[^>]*>//g'
127         fi
128     )
129
130     
131 }
132
133 function debecho() {
134     if [ -z $debug ]; then
135        return 0
136     fi
137     echo "debug "$*
138     return 0
139 }
140
141 function populator() {
142     local tmplname lng frmt
143
144     tmplname="$1"
145     contname="$2"
146     lng="$3"
147     frmt="$4"
148
149     tmplname="${tmplname}.${frmt}"
150     if [ $lng ]; then
151         lngsfx="_`echo "$lng" | tr '[:lower:]' '[:upper:]'`"
152         filesfx="_$lng"
153     else
154         lngsfx=""
155         filesfx=""
156     fi
157
158     oldifs="$IFS"
159     IFS="
160 "
161     anotherloop=0
162
163     #
164     #  Recursion loop, resolve TAG created with a tag substitution, on the entire document.
165     #
166     while [ $anotherloop -eq 0 ]; do
167         debecho "ANOTHERLOOP"
168         #
169         # GET BLOCKS
170         #
171
172         # add normal tag before each block, output to TMPFILE
173         cat $tmplname | sed 's/^\(##\(T_[A-Z_0-9]\+\):start##\)$/#\2#\n\1/g' > $TMPFILE
174
175         # get all blocks from the template, output to TMP2FILE
176         cat $TMPFILE | sed -n '/##T_[A-Z_0-9]\+:start##/,/##T_[A-Z_0-9]\+:end##/p' > $TMP2FILE
177
178         # remove blocks from the template, output to TMP3FILE 
179         cat $TMPFILE | sed '/##T_[A-Z_0-9]\+:start##/,/##T_[A-Z_0-9]\+:end##/d' > $TMP3FILE
180
181         # get line number of the start of all blocks
182         blist="`cat $TMP2FILE | grep -n '^##T_[A-Z_0-9]\+:start##$' | cut -d : -f 1`"
183
184         # for each line number ...
185         for i in `echo "$blist"`; do
186             # extracts the current block
187             block_cur="`cat $TMP2FILE | sed -n $i',/##T_[A-Z_0-9]\+:end##/p'`"
188
189             # extracts the name of the block variable
190             block_var="`echo "$block_cur" | sed -n "1,1p" | sed 's/^\(##T\(_[A-Z_0-9]\+\):start##\)$/B\2/g'`"
191             # remove first and last line (block delimiters) and double '\' char, substitute \n with $NEWLI  
192             block_val="`echo "$block_cur" | sed '1d;$d;s/\\\\/\\\\\\\\/g' | sed "s/\$/$NEWLI\\\\\\/g"`"
193
194             # assign the value to the var
195             read $block_var <<< "$block_val"
196
197             debecho "vv------------------------------"
198             debecho "BLOCK_VAR: $block_var"
199             debecho "BLOCK_VAL: ${!block_var}"
200             debecho "^^------------------------------"
201         done
202
203         # 
204         #  SUBSTITUTE TAG INSIDE EACH BLOCK
205         #
206         if [ $debug ]; then
207             echo "debug blocks: ${!B_*}"
208         fi
209         for i in ${!B_*}; do
210             blk_var="`echo "$i" | sed 's/^B_/T_/g'`"
211             debecho "Tag_var: $tag_var"
212             blk_val=""
213             outindex=1
214             for idx in `seq 1 100`; do
215                 debecho "ISFIRST YES"
216                 isfirst="YES"
217
218                 blk_src="${!i}"
219                 #
220                 #  Recursion loop, resolve TAG created with a tag substitution, for each row of each block.
221                 #
222                 blk_anotherloop=0
223                 while [ $blk_anotherloop -eq 0 ]; do
224                     debecho "BLK_ANOTHERLOOP $i $idx"
225                     sedargs=""
226                     tlist="`echo "$blk_src" | vars_get`"
227                     debecho "TILIST $tlist"
228                 
229                     for e in $tlist; do
230                         debecho "LOO1"
231                     
232                         tag_var="`echo "$i $e" | sed 's/^B_\([A-Z_]\+\) T_\([A-Z_]\+\)/T_\1_\2/g'`"
233                         echo "$tag_var" | grep -q '_X$'
234                         if [ $? -eq 0 ]; then
235                             # fault back to original language if not defined
236                             for ll in $lngsfx ""; do
237                                 tag_ele="`echo "$tag_var" | sed 's/X$//g'`${idx}${ll}"
238                                 debecho "tag_ele: $tag_ele"
239                                 debecho "  BRA3"
240                                 declare -p $tag_ele >/dev/null 2>&1
241                                 if [ $? -eq 0 ]; then
242                                     break
243                                 else
244                                     if [ "$isfirst" = "YES" -a -z "$ll" ]; then
245                                         outindex=0
246                                         break 4
247                                     fi
248                                     if [ -z "$ll" ]; then
249                                         echo "WARN: undefined [$tag_ele]"
250                                     elif [ "$verbose" ]; then
251                                         tag_fback="`echo "$tag_var" | sed 's/X$//g'`${idx}"
252                                         echo "WARN: undefined [$tag_ele]; fault back to main [${!tag_fback}]"
253                                     fi
254                                 fi
255                                 if [ -z "$ll" ]; then
256                                     debecho "  ISFIRST NO"
257                                     isfirst="NO"
258                                 fi
259                             done
260
261                         else
262                             debecho "  BRA4"
263                             # faultback to original language if not defined
264                             for ll in $lngsfx ""; do
265                                 tag_ele="${tag_var}${ll}"
266                                 debecho "  tag_ele: $tag_ele"
267
268                                 declare -p $tag_ele >/dev/null 2>&1
269                                 if [ $? -eq 0 ]; then
270                                     break
271                                 else
272                                     if [ -z "$ll" ]; then
273                                         echo "WARN: undefined [$tag_ele]"
274                                     elif [ "$verbose" ]; then
275                                         echo "WARN: undefined [$tag_ele]; fault back to main [${!tag_var}]"
276                                     fi
277                                 fi
278                             done
279                         fi
280                         tag_val="`echo "${!tag_ele}" | sed "s/\$/$NEWLI/g" | tr -d '\n' | sed "s/$NEWLI\$//g" \
281                                 | formatify "$frmt"`"
282                         debecho "  tag_val: $tag_val"
283
284                         # TAG replacement
285                         tag_val="`unsedify "$tag_val"`"
286                         sedargs="${sedargs}s/#${e}#/$tag_val/g;"
287                     done # for e ...
288                     debecho "----------------------------------"
289                     debecho "$sedargs" 
290                     blk_val="${blk_val}`echo "$blk_src" | sed "$sedargs"`"
291                     if [ $? -ne 0 ]; then
292                         echo "$sedargs" > sedargs.txt
293                         exit 111
294                     fi
295                 
296                     echo "$blk_val" | grep -q '#\{1,2\}T_[A-Za-z_:]\+#\{1,2\}'
297                     if [ $? -eq 0 ]; then
298                         # echo "$blk_val" | grep '#\{1,2\}T_[A-Za-z_:]\+#\{1,2\}'
299                         # echo "POST BLK GREP"
300                         # read 
301                         blk_src="$blk_val"
302                         blk_val=""
303                         blk_anotherloop=0
304                     else
305                         blk_anotherloop=1
306                     fi
307                 done # while [ $blk_another_loop ....
308                 if [ $outindex -eq 0 ]; then
309                     break
310                 fi
311             done
312             
313             debecho "++++++++++++++++++++++++++++++++++"
314             debecho "BLK_VAL: $blk_val"
315             debecho "**********************************"
316             # read $blk_var <<< "$blk_val"
317             
318             # TAG replacement
319             blk_val="`unsedify "$blk_val" | sed "s/$NEWLI\$//g"`"
320             debecho "BLK_VAR: $blk_var: $blk_val"
321             debecho "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
322             cat $TMP3FILE | sed "s/#${blk_var}#/$blk_val/g"  >$TMP4FILE
323             mv $TMP4FILE $TMP3FILE    
324         done
325
326         # cat, sed to avoid multitag on single line, grep of tags, sort, uniq, sed to remove '#' chars
327         t_vars="`cat $TMP3FILE | vars_get`"
328
329         for tag_var in $t_vars; do 
330             # echo "TAG_VAR: $tag_var"
331             for ll in $lngsfx ""; do
332                 tag_ele="${tag_var}${ll}"
333                 declare -p $tag_ele >/dev/null 2>&1
334                 if [ $? -eq 0 ]; then
335                     break
336                 else
337                     if [ -z "$ll" ]; then
338                         echo "WARN: undefined [$tag_ele]"
339                     elif [ "$verbose" ]; then
340                         echo "WARN: undefined [$tag_ele]; fault back to main [${!tag_var}]"
341                     fi
342                 fi
343             done
344             # TAG replacement
345             tag_val="`echo "${!tag_ele}" | sed "s/\$/$NEWLI/g" | tr -d '\n' | sed "s/$NEWLI\$//g" \
346                   | formatify "$frmt" | unsedify`"
347             
348                 
349             cat $TMP3FILE | sed "s/#${tag_var}#/$tag_val/g" >$TMP4FILE
350             mv $TMP4FILE $TMP3FILE    
351         done
352         
353         cat $TMP3FILE | grep -q '#\{1,2\}T_[A-Za-z_:]\+#\{1,2\}'
354         if [ $? -eq 0 ]; then
355             # cat $TMP3FILE | grep -q '#\{1,2\}T_[A-Za-z_:]\+#\{1,2\}'
356             # echo POSTGREP
357             # read 
358             anotherloop=0
359             cat $TMP3FILE | sed "s/$NEWLI/\n/g" > $TMP4FILE
360             tmplname="$TMP4FILE"
361         else
362             anotherloop=1
363         fi
364     done #  while [ $anotherloop -eq 0 ]; do
365     cat $TMP3FILE | sed "s/$NEWLI/\n/g" > ${contname}${filesfx}.$frmt
366
367     rm -f $TMPFILE $TMP2FILE $TMP3FILE $TMP4FILE
368
369     IFS="$oldifs"
370
371     return 0
372 }
373
374 #
375 #  MAIN
376 #
377
378 # maybe parametrizable
379 # tmplname="temptest"
380 tmplname="cv_template"
381 while [ $# ]; do
382     case $1 in
383         -v) verbose="Y"
384             ;;
385         -d) debug="Y"
386             ;;
387         -h) usage $0
388             ;;
389         *)  break
390     esac
391     shift
392 done
393 contname="$1"
394
395 rm -f $TMPFILE $TMP2FILE $TMP3FILE $TMP4FILE
396
397 if [ ! -f ${contname}.data ]; then
398     echo "File ${contname}.data not exists"
399     exit 1
400 fi 
401
402 # source the data file from template scope
403 . ${tmplname}.data
404
405 # source the data file from user scope
406 . ${contname}.data
407
408 for frmt in tex htm; do
409     for lng in "" eng; do
410         echo "Format: $frmt  Lang: $lng"
411         populator "$tmplname" "$contname" "$lng" "$frmt"
412     done
413 done
414
415 exit 0