Simple Huffman coding implementation (original) (raw)

This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters

[ Show hidden characters]({{ revealButtonHref }})

from queue import PriorityQueue
from collections import Counter
from dataclasses import dataclass, field
from typing import Any
@dataclass(order=True)
class Node:
count: int
value: Any=field(compare=False)
def huffman(symbol_list):
"""
This function generates the huffman tree for the given input.
The input is a list of "symbols".
"""
# figure out the frequency of each symbol
counts = Counter(symbol_list).most_common()
total = len(symbol_list)
if len(counts) < 2:
# 0 or 1 unique symbols, so no sense in performing huffman coding
return
queue = PriorityQueue()
for (val,count) in counts:
queue.put(Node(count=count, value=val))
# Create the huffman tree
largest_node_count = 0
while total != largest_node_count:
node1 = queue.get(False)
node2 = queue.get(False)
new_count = node1.count + node2.count
largest_node_count = new_count if new_count > largest_node_count else largest_node_count
queue.put(Node(count=new_count, value=(node1,node2)))
huffman_tree_root = queue.get(False)
# generate the symbol to huffman code mapping
lookup_table = huffman_tree_to_table(huffman_tree_root, "", {})
return lookup_table
def huffman_tree_to_table(root, prefix, lookup_table):
"""Converts the Huffman tree rooted at "root" to a lookup table"""
if type(root.value) != tuple:
# leaf node
lookup_table[root.value] = prefix
else:
huffman_tree_to_table(root.value[0], prefix + "0", lookup_table)
huffman_tree_to_table(root.value[1], prefix + "1", lookup_table)
return lookup_table
def text_to_huffman_code(input_text):
"""Helper function to convert an input string into its huffman symbol table"""
return huffman([c for c in input_text])