Demo entry 6659050



Submitted by anonymous on Nov 08, 2017 at 20:48
Language: Python 3. Code size: 1.3 kB.

from collections import Counter
from heapq import heappush, heappop, heapify
import re

def encode(dic):
      heap = [[v, [k, ""]] for k, v in dic.items()]
      while len(heap) > 1:
            l = heappop(heap)
            r = heappop(heap)
            for left in l[1:]:
                  left[1] = '0' + left[1]
            for right in r[1:]:
                  right[1] = '1' + right[1]
            heappush(heap, [l[0] + r[0]] + l[1:] + r[1:])
      return sorted(heappop(heap)[1:], key=lambda p: dic[p[0]], reverse=True)

regex = re.compile("[^a-zA-Z, !?.’]")

with open ("text.txt", "r",encoding='utf-8') as f:
      dic = Counter()
      for x in f:
            x = regex.sub('', x)
            x = x.lower()
            dic += Counter(x)

huff = encode(dic)
print ("Symbol\tHuffman Code")
for p in huff:
    print ("%s\t%s" % (p[0], p[1]))

total_char = sum(dic.values())
print ("If use 5-bits for each character:",total_char*5)

total_num = 0

for i in dic.items():
      key = i[0]
      for j in huff:
            if key == j[0]:
                  total_num = total_num + i[1]*len(j[1])
print ("If use huffman coding:",total_num)
print ("The total number of bit saves:" ,total_char*5 - total_num)

This snippet took 0.00 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).