152 lines
4.5 KiB
Python
152 lines
4.5 KiB
Python
import math
|
|
|
|
|
|
def Q_statistik():
|
|
with open('Text.txt', 'r', encoding='latin-1') as file:
|
|
text = file.read()
|
|
|
|
freq = {}
|
|
for char in text:
|
|
if char in freq:
|
|
freq[char] += 1
|
|
else:
|
|
freq[char] = 1
|
|
|
|
total_chars = sum(freq.values())
|
|
|
|
prob = {}
|
|
info_content = {}
|
|
entropy = 0
|
|
|
|
for char, count in freq.items():
|
|
prob[char] = count / total_chars
|
|
info_content[char] = -math.log2(prob[char])
|
|
entropy += prob[char] * info_content[char]
|
|
|
|
return freq, prob, info_content, entropy
|
|
|
|
|
|
def fano_encoder(probabilities):
|
|
sorted_probs = sorted(probabilities.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
def fano_recursive(symbols):
|
|
if len(symbols) == 1:
|
|
return {symbols[0][0]: ""}
|
|
|
|
total = sum([symbol[1] for symbol in symbols])
|
|
acc = 0
|
|
split_index = 0
|
|
for i in range(len(symbols)):
|
|
acc += symbols[i][1]
|
|
if acc >= total / 2:
|
|
split_index = i
|
|
break
|
|
|
|
left_symbols = symbols[:split_index + 1]
|
|
right_symbols = symbols[split_index + 1:]
|
|
|
|
left_codes = fano_recursive(left_symbols)
|
|
right_codes = fano_recursive(right_symbols)
|
|
|
|
for key in left_codes:
|
|
left_codes[key] = '0' + left_codes[key]
|
|
for key in right_codes:
|
|
right_codes[key] = '1' + right_codes[key]
|
|
|
|
left_codes.update(right_codes)
|
|
return left_codes
|
|
|
|
return fano_recursive(sorted_probs)
|
|
|
|
|
|
def Q_Fanoencoder():
|
|
_, probabilities, _, _ = Q_statistik()
|
|
fano_codes = fano_encoder(probabilities)
|
|
|
|
with open('Text.txt', 'r', encoding='latin-1') as file:
|
|
text = file.read()
|
|
|
|
encoded_text = ''.join([fano_codes[char] for char in text])
|
|
|
|
coding_table_bin = ''.join(
|
|
[format(ord(char), '08b') +
|
|
format(len(code), '08b') +
|
|
code
|
|
for char, code in fano_codes.items()]
|
|
)
|
|
|
|
coding_table_length = len(coding_table_bin)
|
|
header = f'{coding_table_length:016b}' # 2 bytes for the length in binary
|
|
final_encoded_text = header + coding_table_bin + encoded_text
|
|
|
|
with open('EncodedText.txt', 'w', encoding='latin-1') as file:
|
|
file.write(final_encoded_text)
|
|
|
|
return text, encoded_text, fano_codes
|
|
|
|
|
|
def Q_decoder():
|
|
with open('EncodedText.txt', 'r', encoding='latin-1') as file:
|
|
encoded_text = file.read()
|
|
|
|
coding_table_length = int(encoded_text[:16], 2)
|
|
|
|
coding_table_bin = encoded_text[16:16 + coding_table_length]
|
|
encoded_text = encoded_text[16 + coding_table_length:]
|
|
|
|
# Codebuch
|
|
fano_codes = {}
|
|
i = 0
|
|
while i < len(coding_table_bin):
|
|
char_bin = coding_table_bin[i:i + 8]
|
|
char = chr(int(char_bin, 2))
|
|
i += 8
|
|
code_length_bin = coding_table_bin[i:i + 8]
|
|
code_length = int(code_length_bin, 2)
|
|
i += 8
|
|
code = coding_table_bin[i:i + code_length]
|
|
fano_codes[code] = char
|
|
i += code_length
|
|
|
|
# Text
|
|
decoded_text = ''
|
|
current_code = ''
|
|
for bit in encoded_text:
|
|
current_code += bit
|
|
if current_code in fano_codes:
|
|
decoded_text += fano_codes[current_code]
|
|
current_code = ''
|
|
|
|
with open('DecodedText.txt', 'w', encoding='latin-1') as file:
|
|
file.write(decoded_text)
|
|
|
|
|
|
def calculate_compression_rate(original_text, encoded_text, fano_codes):
|
|
original_length_bits = len(original_text) * 8
|
|
encoded_length_bits = len(encoded_text)
|
|
code_table_bits = sum([len(code) for code in fano_codes.values()]) * 8 + len(fano_codes) * 8
|
|
total_encoded_bits = encoded_length_bits + code_table_bits
|
|
compression_rate = (original_length_bits - total_encoded_bits) / original_length_bits * 100
|
|
|
|
return original_length_bits, total_encoded_bits, compression_rate
|
|
|
|
|
|
def main():
|
|
frequencies, probabilities, information_content, entropy = Q_statistik()
|
|
print("Häufigkeiten:", frequencies)
|
|
print("Wahrscheinlichkeiten:", probabilities)
|
|
print("Informationsgehalt:", information_content)
|
|
print("Entropie:", entropy)
|
|
original_text, encoded_text, fano_codes = Q_Fanoencoder()
|
|
original_length_bits, total_encoded_bits, compression_rate = (
|
|
calculate_compression_rate(original_text, encoded_text, fano_codes)
|
|
)
|
|
print(f"Originale Länge in Bits: {original_length_bits}")
|
|
print(f"Gesamte codierte Länge in Bits (inkl. Codetabelle): {total_encoded_bits}")
|
|
print(f"Kompressionsrate: {compression_rate:.2f}%")
|
|
Q_decoder()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|