6 from utils.error_handling
import Warning
7 from utils.io
import ENCODING
10 """! "Lexicon is a class containing all the lexical entries of a given language within the entire resource." (LMF)
13 """! @brief Constructor.
14 Lexicon instances are owned by LexicalResource.
15 @return A Lexicon instance.
32 """! @brief Destructor.
33 Release LexicalEntry instances.
40 """! @brief Set lexicon identifier.
41 @param id The identifier to set.
42 @return Lexicon instance.
48 """! @brief Get identifier.
49 @return Lexicon attribute 'id'.
54 """! @brief Set lexicon entry source.
55 @param entry_source The entry source to set.
56 @return Lexicon instance.
62 """! @brief Get entry source.
63 @return Lexicon attribute 'entrySource'.
68 """! @brief Set lexicon language.
69 @param language The language to set.
70 @return Lexicon instance.
76 """! @brief Get language.
77 @return Lexicon attribute 'language'.
82 """! @brief Set lexicon language script.
83 @param language_script The language script to set.
84 @return Lexicon instance.
90 """! @brief Get language script.
91 @return Lexicon attribute 'languageScript'.
96 """! @brief Set lexicon label.
97 @param label The label to set.
98 @return Lexicon instance.
104 """! @brief Get label.
105 @return Lexicon attribute 'label'.
110 """! @brief Set lexicon type.
111 @param lexicon_type The lexicon type to set.
112 @return Lexicon instance.
118 """! @brief Get lexicon type.
119 @return Lexicon attribute 'lexiconType'.
124 raise NotImplementedError
127 raise NotImplementedError
130 """! @brief Set lexicon local path.
131 @param local_path The absolute path to audio files to set.
132 @return Lexicon instance.
138 """! @brief Get lexicon local path.
139 @return Lexicon attribute 'localPath'.
144 """! @brief Get all lexical entries maintained by the lexicon.
145 @return A Python set of lexical entries.
150 """! @brief Add a lexical entry to the lexicon.
151 @param lexical_entry A LexicalEntry instance to add to the Lexicon.
152 @return Lexicon instance.
154 self.lexical_entry.append(lexical_entry)
158 """! @brief Remove a lexical entry from the lexicon.
159 @param lexical_entry The LexicalEntry instance to remove from the Lexicon.
160 @return Lexicon instance.
162 self.lexical_entry.remove(lexical_entry)
166 """! @brief Count number of lexical entries of the lexicon.
167 @return The number of lexical entries without duplicates maintained by the lexicon.
171 def sort_homonym_numbers(self, items=lambda lexical_entry: lexical_entry.get_lexeme(), condition=
lambda lexical_entry:
True):
172 """! @brief Sort similar given items of lexical entries contained in the lexicon according to their homonym number.
173 @param items Lambda function giving the item to sort. Default value is 'lambda lexical_entry: lexical_entry.get_lexeme()', which means that the items to sort are lexemes.
174 @param condition Lambda function giving a condition to apply classification.
175 @return The sorted Python list of lexical entries.
178 """Compare 2 elements between each other.
180 if items(x) == items(y)
and condition(x):
182 nb_x = x.get_homonymNumber()
185 nb_y = y.get_homonymNumber()
195 print Warning(
"Several lexical entries '%s' exist. Please solve this issue by precising the homonym number." % items(x).encode(ENCODING))
198 self.lexical_entry.sort(cmp=compare)
201 def sort_lexical_entries(self, items=lambda lexical_entry: lexical_entry.get_lexeme(), sort_order=
None, comparison=
None):
202 """! @brief Sort given items of lexical entries contained in the lexicon according to a certain order.
203 @param items Lambda function giving the item to sort. Default value is 'lambda lexical_entry: lexical_entry.get_lexeme()', which means that the items to sort are lexemes.
204 @param sort_order Default value is 'None', which means that the lexicographical ordering uses the ASCII ordering.
205 @param comparison Function to compare items. If 'None', a default function to compare character by character is provided.
206 @return The sorted Python list of lexical entries.
209 from pylmflib
import options
212 """Compare 2 elements between each other.
215 x = x.replace(
u"\u0301",
'').replace(
u"\u0302",
'')
216 y = y.replace(
u"\u0301",
'').replace(
u"\u0302",
'')
217 for i
in range(min(len(x), len(y))):
219 if type(sort_order)
is not type(dict()):
220 if sort_order(x[i]) == sort_order(y[i]):
223 if sort_order(x[i]) < sort_order(y[i]):
226 elif sort_order(x[i]) > sort_order(y[i]):
229 if sort_order[x[i]] == sort_order[y[i]]:
232 if sort_order[x[i]] < sort_order[y[i]]:
235 elif sort_order[x[i]] > sort_order[y[i]]:
240 print Warning(
"Cannot compare " + x[i].encode(ENCODING) +
" and " + y[i].encode(ENCODING))
250 elif len(x) > len(y):
255 items_and_entries = [(items(lexical_entry), lexical_entry)
for lexical_entry
in self.
lexical_entry]
256 if sort_order
is None:
258 items_and_entries.sort()
262 if comparison
is None:
264 items_and_entries.sort(cmp=comparison, key=
lambda l: l[0])
266 sorted_entries = [item_and_entry[1]
for item_and_entry
in items_and_entries]
273 """! @brief Find all lexical entries which characteristics meet the given condition.
274 @param filter Function or lambda function taking a lexical entry as argument, and returning True or False; for instance 'lambda lexical_entry: lexical_entry.get_lexeme() == "Hello"'.
275 @return A Python list of LexicalEntry instances.
279 if filter(lexical_entry):
280 lexical_entries.append(lexical_entry)
281 return lexical_entries
284 """! @brief Check all cross-references in the lexicon.
285 Fill the private attribute '__lexicalEntry' of each RelatedForm instance for all lexical entries.
286 @return Lexicon instance.
289 from string
import digits
295 for related_form
in lexical_entry.get_related_forms():
297 related_lexeme = related_form.get_lexeme()
299 related_homonym_number =
None
300 if related_lexeme[-1]
in digits:
301 related_homonym_number = related_lexeme[-1]
302 related_lexeme = related_lexeme[:-1]
303 found_entry = self.
find_lexical_entries(
lambda lexical_entry: lexical_entry.get_lexeme() == related_lexeme)
305 if len(found_entry) == 2:
306 if found_entry[0].is_subentry()
and not found_entry[1].is_subentry():
308 found_entry = found_entry[:1]
309 elif not found_entry[0].is_subentry()
and found_entry[1].is_subentry():
311 found_entry = found_entry[1:]
312 if len(found_entry) < 1:
314 print Warning(
"Lexical entry '%s' does not exist. Please solve this issue by checking the related form of lexical entry '%s'." % (related_lexeme.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING)))
315 elif len(found_entry) > 1:
318 if related_homonym_number
is not None:
319 for related_entry
in found_entry:
320 if related_entry.get_homonymNumber() == related_homonym_number:
321 related_homonym.append(related_entry)
322 if len(related_homonym) != 1:
323 print Warning(
"Several lexical entries '%s' exist. Please solve this issue by renaming lexical entries correctly or by precising the homonym number." % related_lexeme.encode(ENCODING))
326 related_form.set_lexical_entry(related_homonym[0])
329 related_form.set_lexical_entry(found_entry[0])
330 for component
in lexical_entry.get_components():
332 found_entries = self.
find_lexical_entries(
lambda lexical_entry: lexical_entry.get_lexeme() == component.get_lexeme())
333 for found_entry
in found_entries:
335 if found_entry.is_component():
337 component.set_lexical_entry(found_entry)
342 """! @brief Reset boolean to be able to check all cross-references in the lexicon again.
343 Reset the private attribute '__checked'.
344 @return Lexicon instance.
350 """This method converts the lexicon into LaTeX format.
def get_id
Get identifier.
def remove_lexical_entry
Remove a lexical entry from the lexicon.
def set_language
Set lexicon language.
def sort_homonym_numbers
Sort similar given items of lexical entries contained in the lexicon according to their homonym numbe...
def set_lexiconType
Set lexicon type.
def get_languageScript
Get language script.
def set_languageScript
Set lexicon language script.
def set_entrySource
Set lexicon entry source.
def get_localPath
Get lexicon local path.
def check_cross_references
Check all cross-references in the lexicon.
def sort_lexical_entries
Sort given items of lexical entries contained in the lexicon according to a certain order...
def reset_check
Reset boolean to be able to check all cross-references in the lexicon again.
def get_lexical_entries
Get all lexical entries maintained by the lexicon.
def add_lexical_entry
Add a lexical entry to the lexicon.
def set_id
Set lexicon identifier.
def get_language
Get language.
def count_lexical_entries
Count number of lexical entries of the lexicon.
"Lexicon is a class containing all the lexical entries of a given language within the entire resource...
def set_localPath
Set lexicon local path.
def set_label
Set lexicon label.
def get_entrySource
Get entry source.
def get_lexiconType
Get lexicon type.
def find_lexical_entries
Find all lexical entries which characteristics meet the given condition.
lexical_entry
All LexicalEntry instances are maintained by Lexicon There is one or more LexicalEntry instances per ...