Python LMF library
 All Classes Namespaces Files Functions Variables
doc.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 # -*- coding: utf-8 -*-
3 
4 """! @package config
5 """
6 
7 from config.mdf import pd_grammaticalNumber, pd_person, pd_anymacy, pd_clusivity
8 from utils.error_handling import OutputError
9 from utils.io import ENCODING, EOL
10 
11 ## To define languages and fonts
12 import config
13 
14 ## Function giving order in which information must be written in docx and mapping between LMF representation and docx (output)
15 def lmf_to_doc(lexicon, document, items=lambda lexical_entry: lexical_entry.get_lexeme(), sort_order=None, paradigms=False, reverse=False):
16  """! @brief Function to convert LMF lexical entry information to be written into docx commands.
17  @param lexicon The Lexicon LMF instance to display.
18  @param document The docx document to fill in.
19  @param items Lambda function giving the item to sort. Default value is 'lambda lexical_entry: lexical_entry.get_lexeme()', which means that the items to sort are lexemes.
20  @param sort_order Python list. Default value is 'None', which means that the document output is alphabetically ordered.
21  @param paradigms A boolean value to introduce paradigms in document or not.
22  @param reverse A boolean value to set if a reverse dictionary is wanted.
23  """
24  # Lexicon is already ordered
25  level = 0
26  previous_character = ''
27  current_character = ''
28  for lexical_entry in lexicon.get_lexical_entries():
29  if type(sort_order) is type(dict()) or type(sort_order) is type(lambda l:l):
30  # Check if current element is a lexeme starting with a different character than previous lexeme
31  try:
32  current_character = items(lexical_entry)[0]
33  if sort_order[items(lexical_entry)[0:1]]:
34  current_character = items(lexical_entry)[0:1]
35  if sort_order[items(lexical_entry)[0:2]]:
36  current_character = items(lexical_entry)[0:2]
37  except IndexError:
38  pass
39  except KeyError:
40  pass
41  except TypeError:
42  pass
43  try:
44  if ( (type(sort_order) is not type(dict())) and ((previous_character == '') or (sort_order(current_character) != sort_order(previous_character))) ) \
45  or ( (type(sort_order) is type(dict())) and (int(sort_order[current_character]) != int(sort_order[previous_character])) ):
46  # Do not consider special characters
47  if previous_character != '':
48  document.add_page_break()
49  previous_character = current_character
50  title = ''
51  if type(sort_order) is not type(dict()):
52  title += ' ' + current_character
53  else:
54  for key,value in sorted(sort_order.items(), key=lambda x: x[1]):
55  if int(value) == int(sort_order[current_character]):
56  title += ' ' + key
57  document.add_heading("-" + title + " -".decode(ENCODING), level=level+1)
58  except KeyError:
59  print Warning("Cannot sort item %s" % items(lexical_entry).encode(ENCODING))
60  except IndexError:
61  # Item is an empty string
62  pass
63  else:
64  raise OutputError(object, "Sort order must be a dictionary.")
65  if not reverse:
66  # Lexeme
67  lexeme = lexical_entry.get_lexeme()
68  if lexical_entry.get_homonymNumber() is not None:
69  # Add homonym number to lexeme
70  lexeme += " (" + str(lexical_entry.get_homonymNumber()) + ")"
71  # Add morphology if any
72  morph = ""
73  for morphology in lexical_entry.get_morphologies():
74  morph += " " + morphology
75  # Add dialect if any
76  dialect = ""
77  for sense in lexical_entry.get_senses():
78  for usage_note in sense.find_usage_notes(language=config.xml.vernacular):
79  dialect += " [" + usage_note + "]"
80  p = document.add_paragraph()
81  p.add_run(lexeme).bold = True
82  if morph != "":
83  p.add_run(" Morph. :").italic = True
84  p.add_run(morph)
85  p.add_run(dialect)
86  # Dialectal variants
87  write_title = True
88  for repr in lexical_entry.get_form_representations():
89  if repr.get_geographicalVariant() is not None:
90  if write_title:
91  p.add_run(" Var. : ")
92  write_title = False
93  else:
94  p.add_run(" ; ")
95  p.add_run(repr.get_geographicalVariant()).bold = True
96  if repr.get_dialect() is not None:
97  p.add_run(" [" + repr.get_dialect() + "]")
98  # Part of speech in italic
99  if lexical_entry.get_partOfSpeech() is not None:
100  p.add_run(". ")
101  p.add_run(lexical_entry.get_partOfSpeech()).italic = True
102  p.add_run(".")
103  # Note grammaticale
104  if len(lexical_entry.find_notes(type="grammar")) != 0:
105  p = document.add_paragraph()
106  p.add_run(" ")
107  p.add_run("[Note grammaticale :")
108  for note in lexical_entry.find_notes(type="grammar", language=config.xml.regional):
109  p.add_run(" ")
110  p.add_run(note).bold = True
111  try:
112  for note in lexical_entry.find_notes(type="grammar", language=config.xml.French):
113  p.add_run(" ")
114  p.add_run(note)
115  except AttributeError:
116  for note in lexical_entry.find_notes(type="grammar", language=config.xml.English):
117  p.add_run(" ")
118  p.add_run(note)
119  for note in lexical_entry.find_notes(type="grammar", language=config.xml.vernacular):
120  p.add_run(" ")
121  p.add_run(note)
122  for note in lexical_entry.find_notes(type="grammar", language=config.xml.national):
123  p.add_run(" ")
124  p.add_run(note)
125  # Italic
126  for note in lexical_entry.find_notes(type="grammar"):
127  p.add_run(" ")
128  p.add_run(note).italic = True
129  p.add_run("].")
130  for sense in lexical_entry.get_senses():
131  # Glosses
132  glosses = ""
133  if sense.get_senseNumber() is not None:
134  p = document.add_paragraph()
135  p.add_run(" " + sense.get_senseNumber() + ")")
136  for gloss in sense.find_glosses(language=config.xml.vernacular):
137  glosses += " " + gloss + "."
138  if glosses == "":
139  glosses = glosses.rstrip(".")
140  try:
141  for gloss in sense.find_glosses(language=config.xml.French):
142  glosses += " " + gloss + "."
143  except AttributeError:
144  for gloss in sense.find_glosses(language=config.xml.English):
145  glosses += " " + gloss + "."
146  glosses = glosses.rstrip(".")
147  if glosses != "" and glosses[-1] != '.' and glosses[-1] != '!' and glosses[-1] != '?':
148  glosses += "."
149  p.add_run(glosses)
150  # Scientific name
151  if lexical_entry.get_scientific_name() is not None:
152  p.add_run(" ")
153  p.add_run(lexical_entry.get_scientific_name()).italic = True
154  p.add_run(".")
155  # Examples
156  for context in sense.get_contexts():
157  p = document.add_paragraph()
158  examples = ""
159  vernacular_forms = context.find_written_forms(language=config.xml.vernacular)
160  for example in vernacular_forms:
161  p.add_run(" ")
162  p.add_run(example.split('[')[0]).bold = True
163  for element in example.split('[')[1:]:
164  p.add_run('[' + element)
165  try:
166  fra_forms = context.find_written_forms(language=config.xml.French)
167  if len(vernacular_forms) != 0 and len(fra_forms) != 0:
168  p.add_run(" ")
169  for example in fra_forms:
170  p.add_run(example)
171  if len(fra_forms) != 0 and fra_forms[0][-1] != '!' and fra_forms[0][-1] != '?':
172  p.add_run(".")
173  except AttributeError:
174  pass
175  # Links
176  if len(lexical_entry.get_related_forms("simple link")) != 0:
177  p = document.add_paragraph()
178  p.add_run(" Voir :").italic = True
179  for related_form in lexical_entry.get_related_forms("simple link"):
180  if related_form.get_lexical_entry() is not None:
181  # TODO : hyperlink
182  pass
183  p.add_run(" ")
184  p.add_run(related_form.get_lexeme().split('[')[0]).bold = True
185  for element in related_form.get_lexeme().split('[')[1:]:
186  p.add_run('[' + element)
187  try:
188  for written_form in related_form.find_written_forms(language=config.xml.French):
189  p.add_run(" " + written_form)
190  except AttributeError:
191  for written_form in related_form.find_written_forms(language=config.xml.English):
192  p.add_run(" " + written_form)
193  p.add_run(".")
194  # Notes
195  if len(lexical_entry.find_notes(type="general")) != 0:
196  p = document.add_paragraph()
197  p.add_run(" ")
198  p.add_run("[Note :")
199  for note in lexical_entry.find_notes(type="general"):
200  p.add_run(" ")
201  p.add_run(note)
202  p.add_run("].")
203  # Note phonologique
204  if len(lexical_entry.find_notes(type="phonology")) != 0:
205  p = document.add_paragraph()
206  p.add_run(" ")
207  p.add_run("[Note phonologique :")
208  for note in lexical_entry.find_notes(type="phonology"):
209  p.add_run(" ")
210  p.add_run(note)
211  p.add_run("].")
212  # Note anthropologique
213  if len(lexical_entry.find_notes(type="anthropology")) != 0:
214  p = document.add_paragraph()
215  p.add_run(" ")
216  p.add_run("[Note anthropologique :")
217  for note in lexical_entry.find_notes(type="anthropology"):
218  p.add_run(" ")
219  p.add_run(note)
220  p.add_run("].")
221  if paradigms:
222  if len(lexical_entry.get_word_forms()) != 0:
223  # Intense quote
224  document.add_paragraph('Paradigms', style='IntenseQuote')
225  # Table
226  table = document.add_table(rows=1, cols=2)
227  hdr_cells = table.rows[0].cells
228  hdr_cells[0].text = 'Paradigm'
229  hdr_cells[1].text = 'Form'
230  for item in lexical_entry.find_paradigms(grammatical_number=pd_grammaticalNumber["sg"]):
231  row_cells = table.add_row().cells
232  row_cells[0].text = "sg"
233  row_cells[1].text = item
234  for item in lexical_entry.find_paradigms(grammatical_number=pd_grammaticalNumber["pl"]):
235  row_cells = table.add_row().cells
236  row_cells[0].text = "pl"
237  row_cells[1].text = item
238  for item in lexical_entry.find_paradigms(person=pd_person[1], grammatical_number=pd_grammaticalNumber['s']):
239  row_cells = table.add_row().cells
240  row_cells[0].text = "1s"
241  row_cells[1].text = item
242  for item in lexical_entry.find_paradigms(person=pd_person[2], grammatical_number=pd_grammaticalNumber['s']):
243  row_cells = table.add_row().cells
244  row_cells[0].text = "2s"
245  row_cells[1].text = item
246  for item in lexical_entry.find_paradigms(person=pd_person[3], grammatical_number=pd_grammaticalNumber['s']):
247  row_cells = table.add_row().cells
248  row_cells[0].text = "3s"
249  row_cells[1].text = item
250  for item in lexical_entry.find_paradigms(anymacy=pd_anymacy[4], grammatical_number=pd_grammaticalNumber['s']):
251  row_cells = table.add_row().cells
252  row_cells[0].text = "4s"
253  row_cells[1].text = item
254  for item in lexical_entry.find_paradigms(person=pd_person[1], grammatical_number=pd_grammaticalNumber['d']):
255  row_cells = table.add_row().cells
256  row_cells[0].text = "1d"
257  row_cells[1].text = item
258  for item in lexical_entry.find_paradigms(person=pd_person[2], grammatical_number=pd_grammaticalNumber['d']):
259  row_cells = table.add_row().cells
260  row_cells[0].text = "2d"
261  row_cells[1].text = item
262  for item in lexical_entry.find_paradigms(person=pd_person[3], grammatical_number=pd_grammaticalNumber['d']):
263  row_cells = table.add_row().cells
264  row_cells[0].text = "3d"
265  row_cells[1].text = item
266  for item in lexical_entry.find_paradigms(anymacy=pd_anymacy[4], grammatical_number=pd_grammaticalNumber['d']):
267  row_cells = table.add_row().cells
268  row_cells[0].text = "4d"
269  row_cells[1].text = item
270  for item in lexical_entry.find_paradigms(person=pd_person[1], grammatical_number=pd_grammaticalNumber['p']):
271  row_cells = table.add_row().cells
272  row_cells[0].text = "1p"
273  row_cells[1].text = item
274  for item in lexical_entry.find_paradigms(person=pd_person[1], grammatical_number=pd_grammaticalNumber['p'], clusivity=pd_clusivity['e']):
275  row_cells = table.add_row().cells
276  row_cells[0].text = "1e"
277  row_cells[1].text = item
278  for item in lexical_entry.find_paradigms(person=pd_person[1], grammatical_number=pd_grammaticalNumber['p'], clusivity=pd_clusivity['i']):
279  row_cells = table.add_row().cells
280  row_cells[0].text = "1i"
281  row_cells[1].text = item
282  for item in lexical_entry.find_paradigms(person=pd_person[2], grammatical_number=pd_grammaticalNumber['p']):
283  row_cells = table.add_row().cells
284  row_cells[0].text = "2p"
285  row_cells[1].text = item
286  for item in lexical_entry.find_paradigms(person=pd_person[3], grammatical_number=pd_grammaticalNumber['p']):
287  row_cells = table.add_row().cells
288  row_cells[0].text = "3p"
289  row_cells[1].text = item
290  for item in lexical_entry.find_paradigms(anymacy=pd_anymacy[4], grammatical_number=pd_grammaticalNumber['p']):
291  row_cells = table.add_row().cells
292  row_cells[0].text = "4p"
293  row_cells[1].text = item
294  if len(lexical_entry.get_word_forms()) != 0:
295  p = document.add_paragraph()
296  # Handle subentries
297  for related_form in lexical_entry.get_related_forms("subentry"):
298  if related_form.get_lexical_entry() is not None:
299  p = document.add_paragraph()
300  p.add_run(" ")
301  p.add_run(related_form.get_lexeme().split('[')[0]).bold = True
302  for element in related_form.get_lexeme().split('[')[1:]:
303  p.add_run('[' + element.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE"))
304  for sense in related_form.get_lexical_entry().get_senses():
305  glosses = ""
306  for gloss in sense.find_glosses(language=config.xml.vernacular):
307  glosses += " " + gloss + "."
308  if glosses == "":
309  glosses = glosses.rstrip(".")
310  try:
311  for gloss in sense.find_glosses(language=config.xml.French):
312  glosses += " " + gloss + "."
313  except AttributeError:
314  for gloss in sense.find_glosses(language=config.xml.English):
315  glosses += " " + gloss + "."
316  if glosses == "":
317  glosses = glosses.rstrip(".")
318  p.add_run(glosses)
319  p.add_run(EOL)
320  else: # reverse
321  # English gloss
322  is_gloss = False
323  for sense in lexical_entry.get_senses():
324  for gloss in sense.find_glosses(language=config.xml.English):
325  if not is_gloss:
326  # Paragraph
327  p = document.add_paragraph()
328  # Write gloss in bold, except characters that are between brackets or square brackets
329  brackets = 0
330  bold = True
331  for c in gloss:
332  if c == '(' or c == '[':
333  # Write following characters in non-bold
334  brackets += 1
335  if brackets > 0:
336  bold = False
337  else:
338  bold = True
339  p.add_run(c).bold = bold
340  elif c == ')' or c == ']':
341  # Write following characters in bold
342  p.add_run(c).bold = bold
343  brackets -= 1
344  if brackets > 0:
345  bold = False
346  else:
347  bold = True
348  else:
349  p.add_run(c).bold = bold
350  if gloss[-1] != '?' and gloss[-1] != '!' and gloss[-1] != '.':
351  p.add_run(".")
352  p.add_run(" ")
353  is_gloss = True
354  if is_gloss:
355  # Scientific name
356  if lexical_entry.get_scientific_name() is not None:
357  p.add_run(lexical_entry.get_scientific_name()).italic = True
358  p.add_run(". ")
359  # Lexeme
360  p.add_run(lexical_entry.get_lexeme())
361  if lexical_entry.get_lexeme()[-1] != '?' and lexical_entry.get_lexeme()[-1] != '!' and lexical_entry.get_lexeme()[-1] != '.':
362  p.add_run(".")
def lmf_to_doc
Function giving order in which information must be written in docx and mapping between LMF representa...
Definition: doc.py:15