-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_counter.py
77 lines (64 loc) · 3.02 KB
/
main_counter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import json
import os
import string # Contains ASCII characters
from collections import Counter
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")
class LetterFrequency:
letters = list(string.ascii_uppercase) # List of characters that we are going to count them,
# since we're going to convert all characters to uppercase we use upparecase letters
frequency = Counter() # How many times each letter is repeated.
percentage = dict() # The percentage of each letter based on hhow many time each letter is repeated.
total_letters = 0
def __init__(self, img=None, path="books"):
"""
The constructor of the class `LetterFrequency`:
:param img: name of the image file that you want to save the image of results, default is nothing.
:param path: path of the text files that you want to get the letter frequency of them, default is books.
"""
self.path = path
self.img = img
self.files = os.listdir(path)
for letter in self.letters:
self.frequency[letter] = 0
def count_letters(self):
"""
Counts the letters that are in text files in specified path.
"""
for fname in self.files:
print("READING:", fname) # Prints the name of the file
with open(os.path.join(self.path, fname), encoding="utf-8") as f:
# Opens each file, then for each letter if it exists in the `self.letters`
# it increments the value of the specific letter in `self.frequency`
text = f.read()
for letter in text:
letter = letter.upper() # Converts the letters to uppercase, Because `self.letters`
# consists of all uppercase letters and we don't care about the case
if not letter in self.letters:
continue
self.frequency.update(letter)
self.total_letters = sum(self.frequency.values())
print("Total letters:", self.total_letters)
def calculate_percentage(self):
"""
A simple function that calculates the percentage of each letter based on `self.frequency`
"""
self.percentage = self.frequency.copy()
for letter in self.percentage:
# Divides the total count of each letter then multiplies it by 100 to get the percentage
# The number is then rounded for the sake of readability.
self.percentage[letter] = round(self.percentage[letter]*100/self.total_letters, 3)
print(self.percentage)
def plot(self):
"""
Plots the results.
"""
plt.bar(self.percentage.keys(), self.percentage.values())
plt.title("English Letters Frequency")
plt.xlabel("Letters")
plt.ylabel("Percentage")
plt.grid(True)
plt.tight_layout()
if self.img is not None:
plt.savefig(self.img)
plt.show()