Skip to content

Commit

Permalink
add nltk requirement
Browse files Browse the repository at this point in the history
  • Loading branch information
senderle committed Nov 3, 2020
1 parent 728b6d4 commit 2c71434
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
22 changes: 21 additions & 1 deletion lexpart/vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,25 @@

import numpy
from pyhash import city_64
from nltk.corpus import wordnet

from .util import docs_tokens, temp_test_corpus

import nltk


def get_wordnet():
wordnet = None
try:
from nltk.corpus import wordnet
return wordnet
except ImportError:
nltk.download('wordnet')
try:
from nltk.corpus import wordnet
return wordnet
except ImportError:
return None


def stable_random_matrix(words, dimension, _hashfunc=city_64(0)):
"""
Expand Down Expand Up @@ -134,6 +149,11 @@ def from_corpus(cls, docpath, vocab_max, min_count,

word, count = map(numpy.array, zip(*ct.most_common(vocab_max)))
if synset_potential:
wordnet = get_wordnet()
if wordnet is None:
print("The synset_potential option requires the nltk "
"wordnet corpus, but it could not be downloaded."
"Falling back to the default.")
synset_lens = numpy.array([len(wordnet.synsets(w)) for w in word])
potential = numpy.log10(synset_lens + 1) * 0.3 + 1
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
# your project is installed. For an analysis of "install_requires" vs pip's
# requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires=['numpy', 'numba', 'pyhash'],
install_requires=['numpy', 'numba', 'pyhash', 'nltk'],

# List additional groups of dependencies here (e.g. development
# dependencies). You can install these using the following syntax,
Expand Down

0 comments on commit 2c71434

Please sign in to comment.