I'm trying to write code that will collect statistics on the sequence of characters in the text and generate new text based on it. This works when collecting statistics with one character, but as soon as I try to increase the number of characters, I get KeyError:
import zipfile
from random import randint
from pprint import pprint
# file_zip = 'voyna-i-mir.txt.zip'
# zip = zipfile.ZipFile(file_zip, 'r')
# for file in zip.namelist():
# zip.extract(file)
origin = 'voyna-i-mir.txt'
statistic = {}
chain = ' '
with open(origin, 'r', encoding='cp1251') as file:
for sting in file:
#print(sting)
for symbol in sting:
if chain in statistic:
if symbol in statistic[chain]:
statistic[chain][symbol] += 1
else:
statistic[chain][symbol] = 1
else:
statistic[chain] = {symbol: 1}
chain = chain[1:] + symbol
dictionary = {}
stat_generator = {}
for chain, symbol_stat in statistic.items():
dictionary[chain] = 0
stat_generator[chain] = []
for symbol, count in symbol_stat.items():
dictionary[chain] += count
stat_generator[chain].append([count, symbol])
stat_generator[chain].sort(reverse=True)
gen = 1000
was_print = 0
chain = ' '
while was_print < gen:
symbol_stat = stat_generator[chain]
total = dictionary[chain]
random = randint(1, total)
position = 0
for count, symbol in symbol_stat:
position += count
if random <= position:
break
print(symbol, end='')
was_print += 1
chain = chain[1:] + symbol
The error I received
Traceback (most recent call last):
File "C:\Users\roman\Desktop\skillbox\[Skillbox] Профессия Python-
разработчик\9. Работа с файлами и форматированный
вывод-20210102T191733Z-001\9. Работа с файлами и форматированный
вывод\lesson_009\python_snippets\test.py", line 46, in <module>
symbol_stat = stat_generator[chain]
~~~~~~~~~~~~~~^^^^^^^
KeyError: ' -–'