#!/bin/bash
shopt -s nocaseglob

BASEDIR="/mnt/usb0/mp3/podCast"
FEEDS="${BASEDIR}/feeds.lst"
CACHEDIR="${BASEDIR}/cache"
LOGFILE="${BASEDIR}/log"

function log() {
   echo "${1}" >>"${LOGFILE}"
}

function getlist() {
   if [ ! -f ${FEEDS} ] ; then
      log "getlist cannot read feeds list: ${FEEDS}"
      return 1
   fi
   grep -v -e '^[;#]' -e '^$' "${FEEDS}"
}

function getfeed() {
   # If we aren't given an URL, exit with a return code.
   if [[ "${1}" = "" ]] ; then 
      log "getfeed called without an argument."
      return 1
   fi

   # Try creating the cache dir.
   if [ ! -d "${CACHEDIR}" ] ; then
      if ! mkdir -p "${CACHEDIR}" ; then 
         log "Unable to make cachedir: ${CACHEDIR}"
         return 2
      fi
   fi

   # We have an url and a place to put it, let's try to get it.
   outfile="${CACHEDIR}/$(echo -n "${1}"|md5sum|awk {'print $1'})"
   if ! wget -N -q -O "${outfile}" "${1}" ; then 
      log "wget exited with an error code trying to fetch: ${1}"
      return 3
   fi

   if [ ! -f "${outfile}" ] ; then
      log "cached feed went missing: ${outfile}"
      return 4
   fi
   cat "${outfile}"
}

function getmp3() {
   if [[ "${CHANNEL}" = "" ]] ; then CHANNEL="${TITLE}" ; fi
   if [[ "${CHANNEL}" = "" ]] ; then 
      echo "Unable to determine the channel name, skipping."
      return
   fi

   # Remove html entities, convert spaces to _s and strip non alphanumeric characters
   dirname=$(echo ${CHANNEL}|sed -e 's/\&.*\;//g' -e 's/ /_/g' -e 's/[^a-zA-Z0-9\._]//gi')
   dirname="${BASEDIR}/${dirname}"

   # Try to make the directory, and at the same time test to see if it exists.
   if ! mkdir -p "${dirname}" ; then
      echo "Unable to make the directory: ${dirname}"
      return
   fi

   cd "${dirname}"

   # Check the URL, and use alternates if necessary.
   if [[ ! "${ENCL}" =~ 'http.*mp3' ]] ; then
      if [[ "${LINK}" =~ 'http.*mp3' ]] ; then
         ENCL="${LINK}"
      elif [[ "${HREF}" =~ 'http.*mp3' ]] ; then
         ENCL="${HREF}"
      fi
   fi

   if [[ "${ENCL}" = "" ]] ; then
      echo "Unable to determine mp3 URL, skipping."
   else
      # -nc is noclobber, means don't fetch if a file of that name exists.
      if ! wget -nc "${ENCL}" ; then
         echo "Error fetching ${ENCL}"
      else
         echo "Channel: "$CHANNEL
         echo "Title:   "$TITLE
         echo "ENCL:    "$ENCL
         echo ""
      fi
   fi
   TITLE=""; LINK=""; DATE=""; ENCL=""; HREF=""
}

while read URL ; do
   CHANNEL=""
   while read LINE; do
      TAG=$(echo ${LINE}|sed -n 's/<\([^>\ ]*\).*/\1/p')
      if [[ "${CHANNEL}" = "" ]] && [[ "${TAG}" =~ "title" ]]; then
         CHANNEL=$(echo ${LINE} | sed -n -e 's/<title>\([^<]*\)<\/title>/\1/pi'|sed -e 's/[\r\n]//')
      fi
      case "${TAG}" in
         'title')     TITLE=$(echo "${LINE}" | sed -n 's/<title>\([^<]*\)<\/title>/\1/pi') ;;
         'link')      LINK=$(echo "${LINE}" | sed -n 's/.*<link>\([^<]*\)<\/link>/\1/pi') ;;
         'pubDate')   DATE=$(echo "${LINE}" | sed -n 's/.*<pubDate>\([^<]*\)<\/pubDate>/\1/pi') ;;
         'enclosure') ENCL=$(echo "${LINE}" | sed -n 's/.*<enclosure url=["'\'']\([^"'\'']*\)["'\''].*/\1/pi') ;;
         '/item')     getmp3 ;;
      esac
      if [[ "${LINE}" =~ 'href=["'\''][^"'\'']*[\.]mp3["'\'']' ]] ; then
         HREF=$(echo "${LINE}" | sed -n 's/.*href=["'\'']\([^"'\'']*\).mp3["'\''].*/\1.mp3/pi')
      fi
# Use this instead if using an older version of BASH that doesn't support =~
#      if echo "${LINE}" | grep 'href=["'\''][^"'\'']*[\.]mp3["'\'']' >/dev/null ; then
#         HREF=$(echo "${LINE}" | sed -n 's/.*href=["'\'']\([^"'\'']*\).mp3["'\''].*/\1.mp3/pi')
#      fi
   done < <(getfeed "${URL}") 
done < <(getlist)

