Add adjectives file and script
This commit is contained in:
parent
15238e1aa0
commit
03c2769f3e
3 changed files with 18072 additions and 0 deletions
14045
all_english_adjectives.txt
Normal file
14045
all_english_adjectives.txt
Normal file
File diff suppressed because it is too large
Load diff
4001
english_adjectives.txt
Normal file
4001
english_adjectives.txt
Normal file
File diff suppressed because it is too large
Load diff
26
get_adj.py
Normal file
26
get_adj.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
import nltk
|
||||||
|
from nltk.corpus import wordnet
|
||||||
|
from nltk.corpus import stopwords
|
||||||
|
from nltk import FreqDist
|
||||||
|
|
||||||
|
# Download necessary resources
|
||||||
|
nltk.download('wordnet')
|
||||||
|
nltk.download('stopwords')
|
||||||
|
|
||||||
|
# Load all the adjectives from WordNet
|
||||||
|
adjectives = set([synset.lemmas()[0].name() for synset in wordnet.all_synsets(wordnet.ADJ)])
|
||||||
|
|
||||||
|
# Filter out stopwords
|
||||||
|
stop_words = set(stopwords.words('english'))
|
||||||
|
adjectives = [adjective for adjective in adjectives if adjective not in stop_words]
|
||||||
|
|
||||||
|
# Calculate the frequency distribution of adjectives
|
||||||
|
fdist = FreqDist(adjectives)
|
||||||
|
|
||||||
|
# Get the 4000 most common adjectives
|
||||||
|
most_common_adjectives = fdist.most_common(4000+1)
|
||||||
|
|
||||||
|
# Output the adjectives to a text file
|
||||||
|
with open('english_adjectives.txt', 'w') as file:
|
||||||
|
file.write('\n'.join([adjective for adjective, count in most_common_adjectives]))
|
||||||
|
|
Loading…
Reference in a new issue