/**
* huffman.scala - Computes huffman compression algorithm.
*
*
* Autor: Pedro Garcia Freitas [sawp@sawp.com.br]
* License: Creative Commons
* <http: //creativecommons.org/licenses/by-nc-nd/2.5/en/>
*
* Apr 2011
*/
type Node = (Double, Any)
def huffman(list: List[(Double, String)]): List[(String, String)] = {
def appendCode(m: String, x: List[Node]): List[Node] =
for ((i,j) < - x) yield (i, m + j)
def sortByFrequency(x: List[Node]) = {
def sorter(e1: Node, e2: Node) =
if (e1._1 < e2._1) true else false
x.sortWith(sorter)
}
def group(l: List[Node]): List[Node] = {
val ll = sortByFrequency(l)
ll match {
case Tuple2(f1: Double, f2: List[Node]) ::
Tuple2(g1: Double, g2: List[Node]) :: t =>
group((f1 + g1, appendCode("0", f2) ::: appendCode("1", g2)) :: t)
case _ => ll
}
}
def combining(e: List[Node]) = e match {
case c :: t => c._2
case _ => error("Not defined")
}
val k = for ((i,j) < - list) yield (i, List((j, "")))
val combined = group(k)
val result = combining(combined)
result.asInstanceOf[List[(String, String)]]
}
def calculateAverageLength(probs: List[(Double, String)],
codes: List[(String, String)]): Double = {
def getProbabilities(probs: List[(Double, String)]) =
for ((prob, letter) <- probs) yield prob
def getCodeLength(cods: List[(String, String)]) =
for ((letter, code) <- cods) yield code.length
def calculateAverage(codeLengths: List[Int], probs: List[Double]) = {
val zipedCodesLengths = codeLengths.zip(probs)
val len = for((codelen, prob) <- zipedCodesLengths) yield codelen * prob
val average = len.sum
average
}
val probabilities = getProbabilities(probs)
val codeLengths = getCodeLength(codes)
val average = calculateAverage(codeLengths, probabilities)
average
}
def calculateEntropy(probs: List[(Double, String)]) = {
def getProbabilities(probs: List[(Double, String)]) =
for ((prob, letter) <- probs) yield prob
def getLocalEntropy(probabilities: List[Double]) =
for (prob <- probabilities) yield prob * (math.log(prob) / math.log(2.0))
def getEntropy(localEntropy: List[Double]) =
-localEntropy.sum
val probabilities = getProbabilities(probs)
val localEntropy = getLocalEntropy(probabilities)
val entropy = getEntropy(localEntropy)
entropy
}
/* Tests */
val probs = (0.4, "A") :: (0.2, "B") :: (0.1, "C") :: (0.05, "D") ::
(0.1, "E") :: (0.075, "F") :: (0.075, "G") :: Nil
val codes = huffman(probs)
val averagelen = calculateAverageLength(probs, codes)
val entropy = calculateEntropy(probs)
println("\n")
println("Frequency and Symbols: " + probs)
println("Huffman code: " + codes)
println("Average length: " + averagelen)
println("Entropy: " + entropy)