1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
| import heapq import os
class HuffmanNode: def __init__(self, char, freq): self.char = char self.freq = freq self.left = None self.right = None
def __lt__(self, other): return self.freq < other.freq
def build_huffman_tree(frequencies): heap = [HuffmanNode(char, freq) for char, freq in frequencies.items()] heapq.heapify(heap)
while len(heap) > 1: left = heapq.heappop(heap) right = heapq.heappop(heap) merged = HuffmanNode(None, left.freq + right.freq) merged.left = left merged.right = right heapq.heappush(heap, merged)
return heap[0]
def build_huffman_codes(node, current_code, huffman_codes): if node is None: return
if node.char is not None: huffman_codes[node.char] = current_code return
build_huffman_codes(node.left, current_code + '0', huffman_codes) build_huffman_codes(node.right, current_code + '1', huffman_codes)
def compress(input_file, output_file): with open(input_file, 'rb') as f: data = f.read()
frequencies = {} for byte in data: if byte not in frequencies: frequencies[byte] = 0 frequencies[byte] += 1
root = build_huffman_tree(frequencies) huffman_codes = {} build_huffman_codes(root, '', huffman_codes)
compressed_data = '' for byte in data: compressed_data += huffman_codes[byte]
padding = 8 - len(compressed_data) % 8 compressed_data += '0' * padding
with open(output_file, 'wb') as f: f.write(bytes([len(frequencies)])) for byte, freq in frequencies.items(): f.write(bytes([byte, (freq >> 24) & 0xFF, (freq >> 16) & 0xFF, (freq >> 8) & 0xFF, freq & 0xFF]))
for i in range(0, len(compressed_data), 8): byte = compressed_data[i:i+8] f.write(bytes([int(byte, 2)]))
if __name__ == "__main__": input_file = 'input.txt' compressed_file = 'compressed.bin' decompressed_file = 'decompressed.txt'
compress(input_file, compressed_file)
decompress(compressed_file, decompressed_file)
|