data-files/get_adj.py

import nltk
from nltk.corpus import wordnet
from nltk.corpus import stopwords
from nltk import FreqDist

# Download necessary resources
nltk.download('wordnet')
nltk.download('stopwords')

# Load all the adjectives from WordNet
adjectives = set([synset.lemmas()[0].name() for synset in wordnet.all_synsets(wordnet.ADJ)])

# Filter out stopwords
stop_words = set(stopwords.words('english'))
adjectives = [adjective for adjective in adjectives if adjective not in stop_words]

# Calculate the frequency distribution of adjectives
fdist = FreqDist(adjectives)

# Get the 4000 most common adjectives
most_common_adjectives = fdist.most_common(4000+1)

# Output the adjectives to a text file
with open('english_adjectives.txt', 'w') as file:
    file.write('\n'.join([adjective for adjective, count in most_common_adjectives]))
Add adjectives file and script 2024-11-11 19:10:33 -06:00			`import nltk`
			`from nltk.corpus import wordnet`
			`from nltk.corpus import stopwords`
			`from nltk import FreqDist`

			`# Download necessary resources`
			`nltk.download('wordnet')`
			`nltk.download('stopwords')`

			`# Load all the adjectives from WordNet`
			`adjectives = set([synset.lemmas()[0].name() for synset in wordnet.all_synsets(wordnet.ADJ)])`

			`# Filter out stopwords`
			`stop_words = set(stopwords.words('english'))`
			`adjectives = [adjective for adjective in adjectives if adjective not in stop_words]`

			`# Calculate the frequency distribution of adjectives`
			`fdist = FreqDist(adjectives)`

			`# Get the 4000 most common adjectives`
			`most_common_adjectives = fdist.most_common(4000+1)`

			`# Output the adjectives to a text file`
			`with open('english_adjectives.txt', 'w') as file:`
			`file.write('\n'.join([adjective for adjective, count in most_common_adjectives]))`