-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtag_senses_bootstrap.py
More file actions
executable file
·43 lines (36 loc) · 1.32 KB
/
tag_senses_bootstrap.py
File metadata and controls
executable file
·43 lines (36 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/python
import sys
import wsd
import nltk
from nltk.corpus.reader.plaintext import PlaintextCorpusReader
from nltk.text import *
if len(sys.argv) != 4:
print "Usage:", sys.argv[0], "word sense1 sense2"
exit(-1)
focal_word = sys.argv[1]
senses = [sys.argv[2], sys.argv[3]]
#focal_word = "plant"
#senses = ["manufacturing","life"]
corpus = PlaintextCorpusReader('outcorpus/', '.*')
collocations = [ wsd.BigramLeft(senses, 0), wsd.BigramRight(senses, 1), wsd.BigramScope(senses, 2, [2, 10]) ]
decision_list = wsd.DecisionList()
decision_list.load("senses_bootstrap_" + focal_word + ".csv")
i = 0
for infile in sorted(corpus.fileids()):
print i, "/", len(corpus.fileids())
i += 1
words = corpus.words(infile)
text = Text(words)
c = nltk.ConcordanceIndex(text.tokens)
offsets = c.offsets(focal_word)
for offset in offsets:
for collocation in collocations:
tokens = collocation.get_collocation(text, offset)
if tokens == None: continue
sense = decision_list.get_sense(tokens, collocation.index)
if sense == None: continue
collocation.add_collocation(text, offset, sense)
collocation.update_decision_list(decision_list)
#decision_list.add_sense(sense, tokens, collocation.index, score)
print sense
decision_list.save("senses_bootstrap_" + focal_word + ".csv")