1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
func disambiguateWordsEnhanced(sentence string, meanings map[string][]string) map[string]string {
domainKeywords := map[string]string{
"financial": "money bank deposit withdraw account credit loan",
"natural": "river lake water mud shore flow stream",
"technology": "computer click screen mouse keyboard software",
"music": "play band guitar song note instrument sound",
"sports": "ball throw pitch catch game team player",
}
tokenize := func(text string) []string {
words := strings.Fields(strings.ToLower(text))
var tokens []string
for _, word := range words {
cleaned := strings.Map(func(r rune) rune {
if unicode.IsPunct(r) {
return -1
}
return r
}, word)
if cleaned != "" {
tokens = append(tokens, cleaned)
}
}
return tokens
}
identifyDomain := func(context []string) string {
domainScores := make(map[string]float64)
for domain, keywords := range domainKeywords {
domainWords := tokenize(keywords)
score := 0.0
for _, contextWord := range context {
for _, domainWord := range domainWords {
if contextWord == domainWord {
score += 1.0
}
}
}
domainScores[domain] = score
}
bestDomain := "general"
bestScore := 0.0
for domain, score := range domainScores {
if score > bestScore {
bestScore = score
bestDomain = domain
}
}
return bestDomain
}
calculateEnhancedSimilarity := func(meaning string, context []string, domain string) float64 {
meaningWords := tokenize(meaning)
score := 0.0
// Domain keywords bonus
if keywords, exists := domainKeywords[domain]; exists {
domainWords := tokenize(keywords)
for _, meaningWord := range meaningWords {
for _, domainWord := range domainWords {
if meaningWord == domainWord {
score += 3.0
}
}
}
}
// Context similarity
for _, meaningWord := range meaningWords {
for _, contextWord := range context {
if meaningWord == contextWord {
score += 2.0
} else if strings.Contains(meaningWord, contextWord) || strings.Contains(contextWord, meaningWord) {
score += 0.8
}
}
}
return score
}
result := make(map[string]string)
words := tokenize(sentence)
contextSet := make(map[string]bool)
for _, word := range words {
contextSet[word] = true
}
var context []string
for word := range contextSet {
context = append(context, word)
}
domain := identifyDomain(context)
for _, word := range words {
if senses, exists := meanings[word]; exists {
if len(senses) == 1 {
result[word] = senses[0]
} else {
bestSense := senses[0]
bestScore := calculateEnhancedSimilarity(senses[0], context, domain)
for i := 1; i < len(senses); i++ {
score := calculateEnhancedSimilarity(senses[i], context, domain)
if score > bestScore {
bestScore = score
bestSense = senses[i]
}
}
result[word] = bestSense
}
}
}
return result
}
|