import math def Q_statistik(): with open('Text.txt', 'r', encoding='latin-1') as file: text = file.read() freq = {} for char in text: if char in freq: freq[char] += 1 else: freq[char] = 1 total_chars = sum(freq.values()) prob = {} info_content = {} entropy = 0 for char, count in freq.items(): prob[char] = count / total_chars info_content[char] = -math.log2(prob[char]) entropy += prob[char] * info_content[char] return freq, prob, info_content, entropy def fano_encoder(probabilities): sorted_probs = sorted(probabilities.items(), key=lambda x: x[1], reverse=True) def fano_recursive(symbols): if len(symbols) == 1: return {symbols[0][0]: ""} total = sum([symbol[1] for symbol in symbols]) acc = 0 split_index = 0 for i in range(len(symbols)): acc += symbols[i][1] if acc >= total / 2: split_index = i break left_symbols = symbols[:split_index + 1] right_symbols = symbols[split_index + 1:] left_codes = fano_recursive(left_symbols) right_codes = fano_recursive(right_symbols) for key in left_codes: left_codes[key] = '0' + left_codes[key] for key in right_codes: right_codes[key] = '1' + right_codes[key] left_codes.update(right_codes) return left_codes return fano_recursive(sorted_probs) def Q_Fanoencoder(): _, probabilities, _, _ = Q_statistik() fano_codes = fano_encoder(probabilities) with open('Text.txt', 'r', encoding='latin-1') as file: text = file.read() encoded_text = ''.join([fano_codes[char] for char in text]) coding_table_bin = ''.join( [format(ord(char), '08b') + format(len(code), '08b') + code for char, code in fano_codes.items()] ) coding_table_length = len(coding_table_bin) header = f'{coding_table_length:016b}' # 2 bytes for the length in binary final_encoded_text = header + coding_table_bin + encoded_text with open('EncodedText.txt', 'w', encoding='latin-1') as file: file.write(final_encoded_text) return text, encoded_text, fano_codes def Q_decoder(): with open('EncodedText.txt', 'r', encoding='latin-1') as file: encoded_text = file.read() coding_table_length = int(encoded_text[:16], 2) coding_table_bin = encoded_text[16:16 + coding_table_length] encoded_text = encoded_text[16 + coding_table_length:] # Codebuch fano_codes = {} i = 0 while i < len(coding_table_bin): char_bin = coding_table_bin[i:i + 8] char = chr(int(char_bin, 2)) i += 8 code_length_bin = coding_table_bin[i:i + 8] code_length = int(code_length_bin, 2) i += 8 code = coding_table_bin[i:i + code_length] fano_codes[code] = char i += code_length # Text decoded_text = '' current_code = '' for bit in encoded_text: current_code += bit if current_code in fano_codes: decoded_text += fano_codes[current_code] current_code = '' with open('DecodedText.txt', 'w', encoding='latin-1') as file: file.write(decoded_text) def calculate_compression_rate(original_text, encoded_text, fano_codes): original_length_bits = len(original_text) * 8 encoded_length_bits = len(encoded_text) code_table_bits = sum([len(code) for code in fano_codes.values()]) * 8 + len(fano_codes) * 8 total_encoded_bits = encoded_length_bits + code_table_bits compression_rate = (original_length_bits - total_encoded_bits) / original_length_bits * 100 return original_length_bits, total_encoded_bits, compression_rate def main(): frequencies, probabilities, information_content, entropy = Q_statistik() print("Häufigkeiten:", frequencies) print("Wahrscheinlichkeiten:", probabilities) print("Informationsgehalt:", information_content) print("Entropie:", entropy) original_text, encoded_text, fano_codes = Q_Fanoencoder() original_length_bits, total_encoded_bits, compression_rate = ( calculate_compression_rate(original_text, encoded_text, fano_codes) ) print(f"Originale Länge in Bits: {original_length_bits}") print(f"Gesamte codierte Länge in Bits (inkl. Codetabelle): {total_encoded_bits}") print(f"Kompressionsrate: {compression_rate:.2f}%") Q_decoder() if __name__ == '__main__': main()