#!/bin/bash
shopt -s nocaseglob

BASEDIR="/mnt/usb0/mp3/podCast"
FEEDS="${BASEDIR}/feeds.lst"
CACHEDIR="${BASEDIR}/cache"
LOGFILE="${BASEDIR}/log"
TMPFILE="${BASEDIR}/tmpfile"

# log
# logs the args to LOGFILE.
function log() {
   echo "${@}" >>"${LOGFILE}"
}

# getlist
# reads the feeds list to stdout
#
function getlist() {
   if [ ! -f ${FEEDS} ] ; then
      log "getlist cannot read feeds list: ${FEEDS}"
      return 1
   fi
   grep -v -e '^[;#]' -e '^$' "${FEEDS}"
}

# getfeed
# fetches the rss file into cache and cats it to stdout
#
function getfeed() {
   # If we aren't given an URL, exit with a return code.
   if [[ "${1}" = "" ]] ; then 
      log "getfeed called without an argument."
      return 1
   fi

   # Try creating the cache dir.
   if [ ! -d "${CACHEDIR}" ] ; then
      if ! mkdir -p "${CACHEDIR}" ; then 
         log "Unable to make cachedir: ${CACHEDIR}"
         return 2
      fi
   fi

   # We have an url and a place to put it, let's try to get it.
   outfile="${CACHEDIR}/$(echo -n "${1}"|md5sum|awk {'print $1'})"
   if ! wget -N -q -O "${outfile}" "${1}" ; then 
      log "wget exited with an error code trying to fetch: ${1}"
      return 3
   fi

   if [ ! -f "${outfile}" ] ; then
      log "cached feed went missing: ${outfile}"
      return 4
   fi
   cat "${outfile}"
}

# tagmp3 <filename>
#
function tagmp3() {

   fname="${@}"

   if [ ! -f "${fname}" ] ; then
      echo "Error, file not found: ${fname}"
   else
      # Set all the tags
      #
      tit2="${TITLE}"
      talb="${CHANNEL}"
      year=$(date --date="${DATE}" +%Y)
      if [[ "${year}" = "" ]] ; then year=$(date %Y) ; fi
      tyer="${year}"
      tcon="podCast"

      # If the id3tag is already used in the file, unset the variable
      # This way we won't set the id3tag when the author has set one already.
      while read line ; do
         first=$(echo "${line}"|awk {'print $1'})
         case "${first}" in
            'TIT2') tit2="" ;;      # comment this lines out to force overwriting the tag.
            'TALB') talb="" ;;      # comment this lines out to force overwriting the tag.
            'TYER') tyer="" ;;      # comment this lines out to force overwriting the tag.
#            'tcon') tcon="" ;;      # Uncomment to keep existing genre.
         esac
      done < <(id3v2 -l "${fname}")

      # for the tags that are set, tag the file.
      if [[ "${tit2}" != "" ]] ; then echo Setting title ; id3v2 --TIT2="${tit2}" "${fname}" ; fi
      if [[ "${talb}" != "" ]] ; then echo Setting album ; id3v2 --TALB="${talb}" "${fname}" ; fi
      if [[ "${tyer}" != "" ]] ; then echo Setting year  ; id3v2 --TYER="${tyer}" "${fname}" ; fi
      if [[ "${tcon}" != "" ]] ; then echo Setting genre ; id3v2 --TCON="${tcon}" "${fname}" ; fi
   fi
}


# getmp3 <url>
#
function getmp3() {
   if [[ "${CHANNEL}" = "" ]] ; then CHANNEL="${TITLE}" ; fi
   if [[ "${CHANNEL}" = "" ]] ; then 
      echo "Unable to determine the channel name, skipping."
      return
   fi

   # Remove html entities, convert spaces to _s and strip non alphanumeric characters
   #
   dirname=$(echo ${CHANNEL}|sed -e 's/\&.*\;//g' -e 's/ /_/g' -e 's/[^a-zA-Z0-9\._]//gi')
   dirname="${BASEDIR}/${dirname}"

   # Try to make the directory, and at the same time test to see if it exists.
   #
   if ! mkdir -p "${dirname}" ; then
      echo "Unable to make the directory: ${dirname}"
      return
   fi
   cd "${dirname}"

   # Check the URL, and use alternates if necessary.
   #
   if [[ ! "${ENCL}" =~ 'http.*mp3' ]] ; then
      if [[ "${LINK}" =~ 'http.*mp3' ]] ; then
         # a link tag in the item looks like an mp3.
         ENCL="${LINK}"
      elif [[ "${HREF}" =~ 'http.*mp3' ]] ; then
         # an anchor tag in the item looks like an mp3.
         ENCL="${HREF}"
      fi
   fi

   # If we found an enclosure for that item, attempt to fetch the mp3.
   #
   if [[ "${ENCL}" != "" ]] ; then
      if ! wget -nc "${ENCL}" ; then
         echo "Error fetching ${ENCL}"
      else
         echo "Channel: "$CHANNEL
         echo "Title:   "$TITLE
         echo "ENCL:    "$ENCL
         echo ""
      fi

      # We assume the filename on the URL is what we'll get.
      # This could cause problems if we are redirected.
      filename=$(basename "${ENCL}")
      if [ -f "${filename}" ] ; then
         tagmp3 "${filename}"
      fi

   fi
   TITLE=""; LINK=""; DATE=""; ENCL=""; HREF=""
}


# Main loop
#
while read URL ; do
   CHANNEL=""

   # This is the heart of the XML parse, if you want to call it that.
   # This makes a lot of assumptions about the structure of the xml.
   while read LINE; do
      TAG=$(echo ${LINE}|sed -n 's/<\([^>\ ]*\).*/\1/p')
      if [[ "${CHANNEL}" = "" ]] && [[ "${TAG}" =~ "title" ]]; then
         CHANNEL=$(echo ${LINE} | sed -n -e 's/<title>\([^<]*\)<\/title>/\1/pi'|sed -e 's/[\r\n]//')
      fi

      # Looking for tags.  
      # Assumptions:
      #     each of these tags is on the same line.
      #     /item will be present.
      #     the mp3 url is in the enclosure url parameter or within the link tag.
      # 
      case "${TAG}" in
         'title')     TITLE=$(echo "${LINE}" | sed -n 's/<title>\([^<]*\)<\/title>/\1/pi') ;;
         'link')      LINK=$(echo "${LINE}" | sed -n 's/.*<link>\([^<]*\)<\/link>/\1/pi') ;;
         'pubDate')   DATE=$(echo "${LINE}" | sed -n 's/.*<pubDate>\([^<]*\)<\/pubDate>/\1/pi') ;;
         'enclosure') ENCL=$(echo "${LINE}" | sed -n 's/.*<enclosure url=["'\'']\([^"'\'']*\)["'\''].*/\1/pi') ;;
         '/item')     getmp3 ;;
      esac
      if [[ "${LINE}" =~ 'href=["'\''][^"'\'']*[\.]mp3["'\'']' ]] ; then
         HREF=$(echo "${LINE}" | sed -n 's/.*href=["'\'']\([^"'\'']*\).mp3["'\''].*/\1.mp3/pi')
      fi
# Use this instead if using an older version of BASH that doesn't support =~
#      if echo "${LINE}" | grep 'href=["'\''][^"'\'']*[\.]mp3["'\'']' >/dev/null ; then
#         HREF=$(echo "${LINE}" | sed -n 's/.*href=["'\'']\([^"'\'']*\).mp3["'\''].*/\1.mp3/pi')
#      fi
   done < <(getfeed "${URL}")
done < <(getlist)



