python_examples/freq.py

29 lines
895 B
Python
Raw Normal View History

2023-08-23 11:11:51 -04:00
# Most frequently used words in a text
from collections import Counter
from string import ascii_letters
2023-08-23 12:02:52 -04:00
2023-08-23 11:11:51 -04:00
def top_3_words(text):
letters = set([x for x in ascii_letters])
2023-08-23 12:02:52 -04:00
letters.add("'")
letters.add(" ")
cleaned_text = "".join([x.lower() for x in text if x in letters])
2023-08-23 11:11:51 -04:00
text_counter = Counter([word for word in cleaned_text.split()])
2023-08-23 12:02:52 -04:00
del text_counter["'"]
2023-08-23 11:11:51 -04:00
keys_to_delete = []
for key in text_counter:
new = Counter(key)
2023-08-23 12:02:52 -04:00
if new["'"] > 1:
2023-08-23 11:11:51 -04:00
keys_to_delete.append(key)
for key in keys_to_delete:
del text_counter[key]
return sorted(text_counter, key=text_counter.get, reverse=True)[:3]
2023-08-23 12:02:52 -04:00
2023-08-23 11:11:51 -04:00
print(top_3_words("a a a b c c d d d d e e e e e"))
print(top_3_words(" //wont won't won't "))
print(top_3_words("e e e e DDD ddd DdD: ddd ddd aa aA Aa, bb cc cC e e e"))
print(top_3_words(" ' "))
2023-08-23 12:02:52 -04:00
print(top_3_words(" ''' "))