#!bash
###########################################################################
# http://www.debian.org/misc/bsd.license
# Copyright (c) 2005 Pierre-Jean TURPEAU
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
#    derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
###########################################################################

###########################################################################
# Title and comment extractor from airliners database for webshots albums.
#
# Parse photos.txt file found in webshots album, search for entries with a
# potential airliners id as title (a number), retrieve data through web
# page parsing, update photos.txt file and add both title and comment.
#
# A cache file can be specified to speed-up further processing of the same
# photos in other albums. If specified, this cache file is used as the
# primary source for title and comment. Web processing is then used as the
# secondary source in case of missing information (based on non empty
# title). The cache file is automatically updated with downloaded
# data.
#
###########################################################################


function usage()
{
	echo "$0 <photos.txt> [cache_file]"
}

function extract_aircraft_infos()
{
	grep "More:" $1 | head -2 | sed "s/^.*More: //g" > out

	L1=`head -1 out`
	L2=`tail -1 out`

	if [ "$L1" = "" -a "$L2" = "" ]
	then
		echo ""
	else
		echo "$L1 - $L2"
	fi
}

function extract_remarks()
{
	N=`cat -n $1 | grep -A 40 "Remark Photographer" | grep "More: " | tr -s [:space:] | cut -f1 | head -1 | tr -d [:space:]`

	N=`expr $N - 2`

	S=`cat -n $1 | grep "Remark Photographer" | tr -s [:space:] | cut -f1 | head -1 | tr -d [:space:]`

	T=`expr $N - $S - 1`

	if [ $T -lt 0 ]
	then
		T=0
	fi

	head -$N $1 | tail -$T | sed -e 's/^\ *//g' > out

	CAPTION=""
	for I in `cat out`
	do
		CAPTION="$CAPTION $I"
	done

	echo $CAPTION
}

function replace_infos()
{
	PHOTOSFILE=$1
	PHOTOID=$2
	TITLE=`echo $3 | sed -e 's/\ /\\\\d032/g' -e 's/:/\\\\d058/g' -e 's/°/\\\\d248/g'`
	CAPTION=`echo $4 | sed -e 's/\ /\\\\d032/g' -e 's/:/\\\\d058/g' -e 's/°/\\\\d248/g'`

	SUFFIX=`grep "TITLE_[0-9]*||\$PHOTOID" $PHOTOSFILE`

	if [ "$SUFFIX" = "" ]
	then
		echo Photo not found
		return
	fi

	OLDIFS=$IFS
	IFS="_|"
	set $SUFFIX
	IFS=$OLDIFS
	SUFFIX=$2

	OPT1="s:TITLE_$SUFFIX||$PHOTOID:\\0\\d032-\\d032$TITLE:"
	OPT2="s:CAPTION_$SUFFIX||:\\0$CAPTION:"

	sed -e $OPT1 -e $OPT2 $PHOTOSFILE > out
	mv out $PHOTOSFILE
}

if [ "$#" == 0 ]
then
	usage
	exit
fi

STARTTIME=`date +"%s"`

PHOTOSFILE="$1"
CACHEFILE="$2"

TITLES=`grep -e "TITLE_.*||" $PHOTOSFILE | sed -e "s/TITLE_[0-9]\+||[0-9]\+ .\+//" -e "s/TITLE_[0-9]\+||[^0-9]\+.*//"`

for I in $TITLES
do
	echo
	ID=`echo $I | cut -f1 -d"|" |cut -f2 -d"_"`
	AIRLINERSID=`echo $I | cut -f3 -d"|"`

	echo "Adding information to photo $ID ($AIRLINERSID)"

	if [ -n "${CACHEFILE}" ]
	then
			TITLE=`grep -A 2 "ITEM ${AIRLINERSID}" ${CACHEFILE} | tail -1`
			CAPTION=`grep -A 3 "ITEM ${AIRLINERSID}" ${CACHEFILE} | tail -1`
	fi

	if [ -z "$TITLE" ]
	then
		echo -n "    . caching source: "
		lynx --dump -nolist "http://www.airliners.net/open.file?id=$AIRLINERSID" > infile
		if [ $? -ne 0 ]
		then
			echo "Photo not found in database"
			continue
		else
			echo "OK"
		fi

		echo -n "    . extracting title: "
		TITLE=`extract_aircraft_infos infile`

		if [ -z "$TITLE" ]
		then
			echo "INVALID TITLE - SKIPPED"
			continue
		else
			echo "OK ($TITLE)"
		fi

		echo -n "    . extracting caption: "
		CAPTION=`extract_remarks infile`
		echo "OK ($CAPTION)"

		if [ -n "${CACHEFILE}" ]
		then
			echo -n "    . updating cache file: "

			AVAIL=`grep "ITEM ${AIRLINERSID}" ${CACHEFILE}`

			if [ -z "${AVAIL}" ]
			then

				echo "ITEM ${AIRLINERSID}" >> ${CACHEFILE}
				echo "$TITLE" >> $CACHEFILE
				echo "$CAPTION" >> $CACHEFILE
				echo "OK"
			else
				echo "SKIPPED"
			fi
		fi
	fi

	echo -n "    . replacing informations in file: "
	replace_infos $PHOTOSFILE $AIRLINERSID "$TITLE" "$CAPTION"
	echo "OK"
done

rm -f out
rm -f infile


ENDTIME=`date +"%s"`
TIMESPENT=`expr $ENDTIME - $STARTTIME`

OPT="1970-01-01 UTC $TIMESPENT seconds"
MINUTES=`date --date="$OPT" +"%M:%S"`

echo
echo "$TIMESPENT seconds consumed ($MINUTES)."

