encoder and compare
This commit is contained in:
68
main.py
68
main.py
@@ -26,13 +26,79 @@ def Q_statistik():
|
||||
return freq, prob, info_content, entropy
|
||||
|
||||
|
||||
def fano_encoder(probabilities):
|
||||
# Key = probability im Dictionary
|
||||
sorted_probs = sorted(probabilities.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
def fano_recursive(symbols):
|
||||
if len(symbols) == 1: # Keine neuen Teilmengen möglich
|
||||
return {symbols[0][0]: ""}
|
||||
|
||||
total = sum([symbol[1] for symbol in symbols])
|
||||
acc = 0
|
||||
split_index = 0
|
||||
for i in range(len(symbols)):
|
||||
acc += symbols[i][1]
|
||||
if acc >= total / 2:
|
||||
split_index = i
|
||||
break
|
||||
|
||||
left_symbols = symbols[:split_index + 1]
|
||||
right_symbols = symbols[split_index + 1:]
|
||||
|
||||
left_codes = fano_recursive(left_symbols)
|
||||
right_codes = fano_recursive(right_symbols)
|
||||
|
||||
for key in left_codes:
|
||||
left_codes[key] = '0' + left_codes[key]
|
||||
for key in right_codes:
|
||||
right_codes[key] = '1' + right_codes[key]
|
||||
|
||||
left_codes.update(right_codes)
|
||||
return left_codes
|
||||
|
||||
return fano_recursive(sorted_probs)
|
||||
|
||||
|
||||
def Q_Fanoencoder(input_path='Text.txt', output_path='EncodedText.txt'):
|
||||
_, probabilities, _, _ = Q_statistik()
|
||||
fano_codes = fano_encoder(probabilities)
|
||||
|
||||
with open(input_path, 'r', encoding='utf-8') as file:
|
||||
text = file.read()
|
||||
|
||||
encoded_text = ''.join([fano_codes[char] for char in text])
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as file:
|
||||
file.write(encoded_text)
|
||||
|
||||
return text, encoded_text, fano_codes
|
||||
|
||||
|
||||
def calculate_compression_rate(original_text, encoded_text, fano_codes):
|
||||
original_length_bits = len(original_text) * 8
|
||||
encoded_length_bits = len(encoded_text)
|
||||
code_table_bits = sum([len(code) for code in fano_codes.values()]) * 8 + len(fano_codes) * 8
|
||||
total_encoded_bits = encoded_length_bits + code_table_bits
|
||||
compression_rate = (original_length_bits - total_encoded_bits) / original_length_bits * 100
|
||||
|
||||
return original_length_bits, total_encoded_bits, compression_rate
|
||||
|
||||
|
||||
def main():
|
||||
frequencies, probabilities, information_content, entropy = Q_statistik()
|
||||
print("Häufigkeiten:", frequencies)
|
||||
print("Wahrscheinlichkeiten:", probabilities)
|
||||
print("Informationsgehalt:", information_content)
|
||||
print("Entropie:", entropy)
|
||||
original_text, encoded_text, fano_codes = Q_Fanoencoder()
|
||||
original_length_bits, total_encoded_bits, compression_rate = (
|
||||
calculate_compression_rate(original_text, encoded_text, fano_codes)
|
||||
)
|
||||
print(f"Originale Länge in Bits: {original_length_bits}")
|
||||
print(f"Gesamte codierte Länge in Bits (inkl. Codetabelle): {total_encoded_bits}")
|
||||
print(f"Kompressionsrate: {compression_rate:.2f}%")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user