Posterior a observar en diferentes repositorios y sitios webs al final hallamos la resolución que te enseñaremos ahora.
Solución:
Este es más un enfoque heurístico. Lo acabo de codificar para disculparme por el estilo. Utiliza derivationally_related_forms () de wordnet. He implementado nounify. Supongo que verbify funciona de manera análoga. Por lo que he probado funciona bastante bien:
from nltk.corpus import wordnet as wn
def nounify(verb_word):
""" Transform a verb to the closest noun: die -> death """
verb_synsets = wn.synsets(verb_word, pos="v")
# Word not found
if not verb_synsets:
return []
# Get all verb lemmas of the word
verb_lemmas = [l for s in verb_synsets
for l in s.lemmas if s.name.split('.')[1] == 'v']
# Get related forms
derivationally_related_forms = [(l, l.derivationally_related_forms())
for l in verb_lemmas]
# filter only the nouns
related_noun_lemmas = [l for drf in derivationally_related_forms
for l in drf[1] if l.synset.name.split('.')[1] == 'n']
# Extract the words from the lemmas
words = [l.name for l in related_noun_lemmas]
len_words = len(words)
# Build the result in the form of a list containing tuples (word, probability)
result = [(w, float(words.count(w))/len_words) for w in set(words)]
result.sort(key=lambda w: -w[1])
# return all the possibilities sorted by probability
return result
Aquí hay una función que en teoría es capaz de convertir palabras entre sustantivo / verbo / adjetivo / adverbio que actualicé desde aquí (originalmente escrito por pantanos, creo) para que sea compatible con nltk 3.2.5 ahora que synset.lemmas
y sysnset.name
son funciones.
from nltk.corpus import wordnet as wn
# Just to make it a bit more readable
WN_NOUN = 'n'
WN_VERB = 'v'
WN_ADJECTIVE = 'a'
WN_ADJECTIVE_SATELLITE = 's'
WN_ADVERB = 'r'
def convert(word, from_pos, to_pos):
""" Transform words given from/to POS tags """
synsets = wn.synsets(word, pos=from_pos)
# Word not found
if not synsets:
return []
# Get all lemmas of the word (consider 'a'and 's' equivalent)
lemmas = []
for s in synsets:
for l in s.lemmas():
if s.name().split('.')[1] == from_pos or from_pos in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE) and s.name().split('.')[1] in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE):
lemmas += [l]
# Get related forms
derivationally_related_forms = [(l, l.derivationally_related_forms()) for l in lemmas]
# filter only the desired pos (consider 'a' and 's' equivalent)
related_noun_lemmas = []
for drf in derivationally_related_forms:
for l in drf[1]:
if l.synset().name().split('.')[1] == to_pos or to_pos in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE) and l.synset().name().split('.')[1] in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE):
related_noun_lemmas += [l]
# Extract the words from the lemmas
words = [l.name() for l in related_noun_lemmas]
len_words = len(words)
# Build the result in the form of a list containing tuples (word, probability)
result = [(w, float(words.count(w)) / len_words) for w in set(words)]
result.sort(key=lambda w:-w[1])
# return all the possibilities sorted by probability
return result
convert('direct', 'a', 'r')
convert('direct', 'a', 'n')
convert('quick', 'a', 'r')
convert('quickly', 'r', 'a')
convert('hunger', 'n', 'v')
convert('run', 'v', 'a')
convert('tired', 'a', 'r')
convert('tired', 'a', 'v')
convert('tired', 'a', 'n')
convert('tired', 'a', 's')
convert('wonder', 'v', 'n')
convert('wonder', 'n', 'a')
Como puede ver a continuación, no funciona tan bien. No puede cambiar entre adjetivo y adverbio (mi objetivo específico), pero da algunos resultados interesantes en otros casos.
>>> convert('direct', 'a', 'r')
[]
>>> convert('direct', 'a', 'n')
[('directness', 0.6666666666666666), ('line', 0.3333333333333333)]
>>> convert('quick', 'a', 'r')
[]
>>> convert('quickly', 'r', 'a')
[]
>>> convert('hunger', 'n', 'v')
[('hunger', 0.75), ('thirst', 0.25)]
>>> convert('run', 'v', 'a')
[('persistent', 0.16666666666666666), ('executive', 0.16666666666666666), ('operative', 0.16666666666666666), ('prevalent', 0.16666666666666666), ('meltable', 0.16666666666666666), ('operant', 0.16666666666666666)]
>>> convert('tired', 'a', 'r')
[]
>>> convert('tired', 'a', 'v')
[]
>>> convert('tired', 'a', 'n')
[('triteness', 0.25), ('banality', 0.25), ('tiredness', 0.25), ('commonplace', 0.25)]
>>> convert('tired', 'a', 's')
[]
>>> convert('wonder', 'v', 'n')
[('wonder', 0.3333333333333333), ('wonderer', 0.2222222222222222), ('marveller', 0.1111111111111111), ('marvel', 0.1111111111111111), ('wonderment', 0.1111111111111111), ('question', 0.1111111111111111)]
>>> convert('wonder', 'n', 'a')
[('curious', 0.4), ('wondrous', 0.2), ('marvelous', 0.2), ('marvellous', 0.2)]
Espero que esto pueda salvar a alguien de un pequeño problema.
Entiendo que esto no responde toda su pregunta, pero responde una gran parte. Vería http://nodebox.net/code/index.php/Linguistics#verb_conjugation Esta biblioteca de Python es capaz de conjugar verbos y reconocer si una palabra es un verbo, sustantivo o adjetivo.
CÓDIGO DE EJEMPLO
print en.verb.present("gave")
print en.verb.present("gave", person=3, negate=False)
>>> give
>>> gives
También puede categorizar palabras.
print en.is_noun("banana")
>>> True
La descarga está en la parte superior del enlace.