/**
 * huffman.scala - Computes huffman compression algorithm.
 *
 *
 * Autor: Pedro Garcia Freitas [sawp@sawp.com.br]
 * License: Creative Commons
 *      <http: //creativecommons.org/licenses/by-nc-nd/2.5/en/>
 *
 * Apr 2011
*/

type Node = (Double, Any)

def huffman(list: List[(Double, String)]): List[(String, String)] = {
  def appendCode(m: String, x: List[Node]): List[Node]  = 
    for ((i,j) < - x) yield (i, m + j)

  def sortByFrequency(x: List[Node]) = {
    def sorter(e1: Node, e2: Node) = 
      if (e1._1 < e2._1) true else false
    x.sortWith(sorter)
  }

  def group(l: List[Node]): List[Node] = {
    val ll = sortByFrequency(l)
    ll match {
      case Tuple2(f1: Double, f2: List[Node]) :: 
           Tuple2(g1: Double, g2: List[Node]) :: t =>
        group((f1 + g1, appendCode("0", f2) ::: appendCode("1", g2)) :: t)
      case _ => ll
    }
  }

  def combining(e: List[Node]) = e match {
    case c :: t => c._2
    case _ => error("Not defined")
  }

  val k = for ((i,j) < - list) yield (i, List((j, "")))
  val combined = group(k)
  val result = combining(combined)
  result.asInstanceOf[List[(String, String)]]
}

def calculateAverageLength(probs: List[(Double, String)], 
                           codes: List[(String, String)]): Double = {
  def getProbabilities(probs: List[(Double, String)]) = 
    for ((prob, letter) <- probs) yield prob

  def getCodeLength(cods: List[(String, String)]) = 
    for ((letter, code) <- cods) yield code.length

  def calculateAverage(codeLengths: List[Int], probs: List[Double]) = {
    val zipedCodesLengths = codeLengths.zip(probs)
    val len = for((codelen, prob) <- zipedCodesLengths) yield codelen * prob
    val average = len.sum
    average
  }

  val probabilities = getProbabilities(probs)
  val codeLengths = getCodeLength(codes)
  val average = calculateAverage(codeLengths, probabilities)
  average
}

def calculateEntropy(probs: List[(Double, String)]) = {
  def getProbabilities(probs: List[(Double, String)]) = 
    for ((prob, letter) <- probs) yield prob

  def getLocalEntropy(probabilities: List[Double]) = 
    for (prob <- probabilities) yield prob * (math.log(prob) / math.log(2.0))

  def getEntropy(localEntropy: List[Double]) = 
    -localEntropy.sum

  val probabilities = getProbabilities(probs)
  val localEntropy = getLocalEntropy(probabilities)
  val entropy = getEntropy(localEntropy)
  entropy
}


/* Tests */
val probs = (0.4, "A") :: (0.2, "B") :: (0.1, "C") :: (0.05, "D") ::
  (0.1, "E") :: (0.075, "F") :: (0.075, "G") :: Nil
val codes = huffman(probs)
val averagelen = calculateAverageLength(probs, codes)
val entropy = calculateEntropy(probs)

println("\n")
println("Frequency and Symbols: " + probs)
println("Huffman code: " + codes)
println("Average length: " + averagelen)
println("Entropy: " + entropy)