Python LMF library
 All Classes Namespaces Files Functions Variables
lexicon.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 """! @package core
4 """
5 
6 from utils.error_handling import Warning
7 from utils.io import ENCODING
8 
9 class Lexicon():
10  """! "Lexicon is a class containing all the lexical entries of a given language within the entire resource." (LMF)
11  """
12  def __init__(self, id=None):
13  """! @brief Constructor.
14  Lexicon instances are owned by LexicalResource.
15  @return A Lexicon instance.
16  """
17  self.set_id(id)
18  self.language = None
19  self.languageScript = None
20  self.label = None
21  self.lexiconType = None
22  self.entrySource = None
23  self.vowelHarmony = None
24  self.localPath = None
25  ## All LexicalEntry instances are maintained by Lexicon
26  # There is one or more LexicalEntry instances per Lexicon
27  self.lexical_entry = []
28  # To know if cross references have already been verified or not
29  self.__checked = False
30 
31  def __del__(self):
32  """! @brief Destructor.
33  Release LexicalEntry instances.
34  """
35  for lexical_entry in self.lexical_entry:
36  del lexical_entry
37  del self.lexical_entry[:]
38 
39  def set_id(self, id):
40  """! @brief Set lexicon identifier.
41  @param id The identifier to set.
42  @return Lexicon instance.
43  """
44  self.id = id
45  return self
46 
47  def get_id(self):
48  """! @brief Get identifier.
49  @return Lexicon attribute 'id'.
50  """
51  return self.id
52 
53  def set_entrySource(self, entry_source):
54  """! @brief Set lexicon entry source.
55  @param entry_source The entry source to set.
56  @return Lexicon instance.
57  """
58  self.entrySource = entry_source
59  return self
60 
61  def get_entrySource(self):
62  """! @brief Get entry source.
63  @return Lexicon attribute 'entrySource'.
64  """
65  return self.entrySource
66 
67  def set_language(self, language):
68  """! @brief Set lexicon language.
69  @param language The language to set.
70  @return Lexicon instance.
71  """
72  self.language = language
73  return self
74 
75  def get_language(self):
76  """! @brief Get language.
77  @return Lexicon attribute 'language'.
78  """
79  return self.language
80 
81  def set_languageScript(self, language_script):
82  """! @brief Set lexicon language script.
83  @param language_script The language script to set.
84  @return Lexicon instance.
85  """
86  self.languageScript = language_script
87  return self
88 
89  def get_languageScript(self):
90  """! @brief Get language script.
91  @return Lexicon attribute 'languageScript'.
92  """
93  return self.languageScript
94 
95  def set_label(self, label):
96  """! @brief Set lexicon label.
97  @param label The label to set.
98  @return Lexicon instance.
99  """
100  self.label = label
101  return self
102 
103  def get_label(self):
104  """! @brief Get label.
105  @return Lexicon attribute 'label'.
106  """
107  return self.label
108 
109  def set_lexiconType(self, lexicon_type):
110  """! @brief Set lexicon type.
111  @param lexicon_type The lexicon type to set.
112  @return Lexicon instance.
113  """
114  self.lexiconType = lexicon_type
115  return self
116 
117  def get_lexiconType(self):
118  """! @brief Get lexicon type.
119  @return Lexicon attribute 'lexiconType'.
120  """
121  return self.lexiconType
122 
123  def set_vowelHarmony(self, vowel_harmony):
124  raise NotImplementedError
125 
126  def get_vowelHarmony(self):
127  raise NotImplementedError
128 
129  def set_localPath(self, local_path):
130  """! @brief Set lexicon local path.
131  @param local_path The absolute path to audio files to set.
132  @return Lexicon instance.
133  """
134  self.localPath = local_path
135  return self
136 
137  def get_localPath(self):
138  """! @brief Get lexicon local path.
139  @return Lexicon attribute 'localPath'.
140  """
141  return self.localPath
142 
144  """! @brief Get all lexical entries maintained by the lexicon.
145  @return A Python set of lexical entries.
146  """
147  return self.lexical_entry
148 
149  def add_lexical_entry(self, lexical_entry):
150  """! @brief Add a lexical entry to the lexicon.
151  @param lexical_entry A LexicalEntry instance to add to the Lexicon.
152  @return Lexicon instance.
153  """
154  self.lexical_entry.append(lexical_entry)
155  return self
156 
157  def remove_lexical_entry(self, lexical_entry):
158  """! @brief Remove a lexical entry from the lexicon.
159  @param lexical_entry The LexicalEntry instance to remove from the Lexicon.
160  @return Lexicon instance.
161  """
162  self.lexical_entry.remove(lexical_entry)
163  return self
164 
166  """! @brief Count number of lexical entries of the lexicon.
167  @return The number of lexical entries without duplicates maintained by the lexicon.
168  """
169  return len(self.get_lexical_entries())
170 
171  def sort_homonym_numbers(self, items=lambda lexical_entry: lexical_entry.get_lexeme(), condition=lambda lexical_entry: True):
172  """! @brief Sort similar given items of lexical entries contained in the lexicon according to their homonym number.
173  @param items Lambda function giving the item to sort. Default value is 'lambda lexical_entry: lexical_entry.get_lexeme()', which means that the items to sort are lexemes.
174  @param condition Lambda function giving a condition to apply classification.
175  @return The sorted Python list of lexical entries.
176  """
177  def compare(x, y):
178  """Compare 2 elements between each other.
179  """
180  if items(x) == items(y) and condition(x):
181  # Classify similar entries by homonym number
182  nb_x = x.get_homonymNumber()
183  if nb_x is None:
184  nb_x = 0
185  nb_y = y.get_homonymNumber()
186  if nb_y is None:
187  nb_y = 0
188  # If the 1st one is lower than the 2nd one, its rank is decremented
189  if nb_x < nb_y:
190  return -1
191  # If the 1st one is greater than the 2nd one, its rank is incremented
192  elif nb_x > nb_y:
193  return 1
194  else:
195  print Warning("Several lexical entries '%s' exist. Please solve this issue by precising the homonym number." % items(x).encode(ENCODING))
196  # Do nothing
197  return 0
198  self.lexical_entry.sort(cmp=compare)
199  return self.lexical_entry
200 
201  def sort_lexical_entries(self, items=lambda lexical_entry: lexical_entry.get_lexeme(), sort_order=None, comparison=None):
202  """! @brief Sort given items of lexical entries contained in the lexicon according to a certain order.
203  @param items Lambda function giving the item to sort. Default value is 'lambda lexical_entry: lexical_entry.get_lexeme()', which means that the items to sort are lexemes.
204  @param sort_order Default value is 'None', which means that the lexicographical ordering uses the ASCII ordering.
205  @param comparison Function to compare items. If 'None', a default function to compare character by character is provided.
206  @return The sorted Python list of lexical entries.
207  """
208  # To access options
209  from pylmflib import options
210  global options
211  def compare(x, y):
212  """Compare 2 elements between each other.
213  """
214  # Before comparing, remove acute accents from strings if any
215  x = x.replace(u"\u0301", '').replace(u"\u0302", '')
216  y = y.replace(u"\u0301", '').replace(u"\u0302", '')
217  for i in range(min(len(x), len(y))):
218  try:
219  if type(sort_order) is not type(dict()):
220  if sort_order(x[i]) == sort_order(y[i]):
221  continue
222  # If the 1st one is lower than the 2nd one, its rank is decremented
223  if sort_order(x[i]) < sort_order(y[i]):
224  return -1
225  # If the 1st one is greater than the 2nd one, its rank is incremented
226  elif sort_order(x[i]) > sort_order(y[i]):
227  return 1
228  else:
229  if sort_order[x[i]] == sort_order[y[i]]:
230  continue
231  # If the 1st one is lower than the 2nd one, its rank is decremented
232  if sort_order[x[i]] < sort_order[y[i]]:
233  return -1
234  # If the 1st one is greater than the 2nd one, its rank is incremented
235  elif sort_order[x[i]] > sort_order[y[i]]:
236  return 1
237  # Handle other characters
238  except KeyError:
239  if options.verbose:
240  print Warning("Cannot compare " + x[i].encode(ENCODING) + " and " + y[i].encode(ENCODING))
241  if x[i] == y[i]:
242  continue
243  if x[i] < y[i]:
244  return -1
245  elif x[i] > y[i]:
246  return 1
247  # If both strings do not have the same length, they do not equal => the smallest string is the shortest one
248  if len(x) < len(y):
249  return -1
250  elif len(x) > len(y):
251  return 1
252  # If all characters match, both equal => do nothing
253  return 0
254  # Create a list of tuples associating items and their lexical entries: [(item1, entry1), (item2, entry2), ...]
255  items_and_entries = [(items(lexical_entry), lexical_entry) for lexical_entry in self.lexical_entry]
256  if sort_order is None:
257  # Sort given items in alphabetical order
258  items_and_entries.sort()
259  else:
260  # sorted(iterable, cmp, key, reverse)
261  # list.sort(cmp, key, reverse)
262  if comparison is None:
263  comparison = compare
264  items_and_entries.sort(cmp=comparison, key=lambda l: l[0])
265  # Retrieve lexical entries to create a sorted list
266  sorted_entries = [item_and_entry[1] for item_and_entry in items_and_entries]
267  # Delete the old list of lexical entries and set the new one
268  del self.lexical_entry[:]
269  self.lexical_entry = sorted_entries
270  return self.lexical_entry
271 
272  def find_lexical_entries(self, filter):
273  """! @brief Find all lexical entries which characteristics meet the given condition.
274  @param filter Function or lambda function taking a lexical entry as argument, and returning True or False; for instance 'lambda lexical_entry: lexical_entry.get_lexeme() == "Hello"'.
275  @return A Python list of LexicalEntry instances.
276  """
277  lexical_entries = []
278  for lexical_entry in self.get_lexical_entries():
279  if filter(lexical_entry):
280  lexical_entries.append(lexical_entry)
281  return lexical_entries
282 
284  """! @brief Check all cross-references in the lexicon.
285  Fill the private attribute '__lexicalEntry' of each RelatedForm instance for all lexical entries.
286  @return Lexicon instance.
287  """
288  import os
289  from string import digits
290  if self.__checked:
291  return self
292  # Verifiy cross references only once
293  self.__checked = True
294  for lexical_entry in self.get_lexical_entries():
295  for related_form in lexical_entry.get_related_forms():
296  # From RelatedForm targets attribute, retrieve the pointed LexicalEntry instance
297  related_lexeme = related_form.get_lexeme()
298  # Check if there is an homonym number at the end of the related lexeme
299  related_homonym_number = None
300  if related_lexeme[-1] in digits:
301  related_homonym_number = related_lexeme[-1]
302  related_lexeme = related_lexeme[:-1]
303  found_entry = self.find_lexical_entries(lambda lexical_entry: lexical_entry.get_lexeme() == related_lexeme)
304  # Remove duplicate subentries from check if any
305  if len(found_entry) == 2:
306  if found_entry[0].is_subentry() and not found_entry[1].is_subentry():
307  # Keep only the first subentry
308  found_entry = found_entry[:1]
309  elif not found_entry[0].is_subentry() and found_entry[1].is_subentry():
310  # Keep only the second subentry
311  found_entry = found_entry[1:]
312  if len(found_entry) < 1:
313  # No lexical entry with this lexeme exists
314  print Warning("Lexical entry '%s' does not exist. Please solve this issue by checking the related form of lexical entry '%s'." % (related_lexeme.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING)))
315  elif len(found_entry) > 1:
316  # Several lexical entries with this lexeme exist => consider homonym number if any
317  related_homonym = []
318  if related_homonym_number is not None:
319  for related_entry in found_entry:
320  if related_entry.get_homonymNumber() == related_homonym_number:
321  related_homonym.append(related_entry)
322  if len(related_homonym) != 1:
323  print Warning("Several lexical entries '%s' exist. Please solve this issue by renaming lexical entries correctly or by precising the homonym number." % related_lexeme.encode(ENCODING))
324  else:
325  # Save the found lexical entry
326  related_form.set_lexical_entry(related_homonym[0])
327  else:
328  # Save the found lexical entry
329  related_form.set_lexical_entry(found_entry[0])
330  for component in lexical_entry.get_components():
331  # From Component targets attribute, retrieve the pointed LexicalEntry instance
332  found_entries = self.find_lexical_entries(lambda lexical_entry: lexical_entry.get_lexeme() == component.get_lexeme())
333  for found_entry in found_entries:
334  # Check that the found entry is a component
335  if found_entry.is_component():
336  # Save the found lexical entry
337  component.set_lexical_entry(found_entry)
338  break
339  return self
340 
341  def reset_check(self):
342  """! @brief Reset boolean to be able to check all cross-references in the lexicon again.
343  Reset the private attribute '__checked'.
344  @return Lexicon instance.
345  """
346  self.__checked = False
347  return self
348 
349  def convert_to_latex(self):
350  """This method converts the lexicon into LaTeX format.
351  """
352  pass
def remove_lexical_entry
Remove a lexical entry from the lexicon.
Definition: lexicon.py:157
def set_language
Set lexicon language.
Definition: lexicon.py:67
def sort_homonym_numbers
Sort similar given items of lexical entries contained in the lexicon according to their homonym numbe...
Definition: lexicon.py:171
def set_lexiconType
Set lexicon type.
Definition: lexicon.py:109
def get_languageScript
Get language script.
Definition: lexicon.py:89
def set_languageScript
Set lexicon language script.
Definition: lexicon.py:81
def set_entrySource
Set lexicon entry source.
Definition: lexicon.py:53
def get_localPath
Get lexicon local path.
Definition: lexicon.py:137
def check_cross_references
Check all cross-references in the lexicon.
Definition: lexicon.py:283
def sort_lexical_entries
Sort given items of lexical entries contained in the lexicon according to a certain order...
Definition: lexicon.py:201
def reset_check
Reset boolean to be able to check all cross-references in the lexicon again.
Definition: lexicon.py:341
def get_lexical_entries
Get all lexical entries maintained by the lexicon.
Definition: lexicon.py:143
def add_lexical_entry
Add a lexical entry to the lexicon.
Definition: lexicon.py:149
def set_id
Set lexicon identifier.
Definition: lexicon.py:39
def count_lexical_entries
Count number of lexical entries of the lexicon.
Definition: lexicon.py:165
"Lexicon is a class containing all the lexical entries of a given language within the entire resource...
Definition: lexicon.py:9
def set_localPath
Set lexicon local path.
Definition: lexicon.py:129
def set_label
Set lexicon label.
Definition: lexicon.py:95
def get_entrySource
Get entry source.
Definition: lexicon.py:61
def get_lexiconType
Get lexicon type.
Definition: lexicon.py:117
def find_lexical_entries
Find all lexical entries which characteristics meet the given condition.
Definition: lexicon.py:272
lexical_entry
All LexicalEntry instances are maintained by Lexicon There is one or more LexicalEntry instances per ...
Definition: lexicon.py:27