Python LMF library
 All Classes Namespaces Files Functions Variables
uid.py
Go to the documentation of this file.
1 #/usr/bin/python
2 # -*- coding: utf-8 -*-
3 
4 """! @package utils.uid
5 """
6 
7 # Get command line arguments
8 from optparse import OptionParser
9 parser = OptionParser()
10 parser.add_option("-i", "--input", dest="input", action="store", help="input MDF file")
11 parser.add_option("-o", "--output", dest="output", action="store", help="output MDF file")
12 options = parser.parse_args()[0]
13 
14 # Open input and output files
15 try:
16  in_file = open(options.input, "r", encoding='utf-8')
17  out_file = open(options.output, "w", encoding='utf-8')
18 except TypeError:
19  in_file = open(options.input, "r")
20  out_file = open(options.output, "w")
21 
22 # Define EOL depending on operating system
23 import os
24 if os.name == 'posix':
25  # Unix-style end of line
26  EOL = '\n'
27 else:
28  # Windows-style end of line
29  EOL = '\r\n'
30 
31 import sys
32 sys.path.append('./pylmflib/utils/ipa2sampa')
33 from ipa2sampa import uni2sampa
34 
35 # To generate UID, we need to keep values of 'lx' and 'hm'
36 import re
37 pattern = r"^\\(\w{2,3}) ?(.*)$"
38 lx = ""
39 mkr = "lx"
40 sf = []
41 hm = ""
42 for line in in_file.readlines():
43  result = re.search(pattern, line)
44  if result:
45  if result.group(1) == "lx" or result.group(1) == "se":
46  lx = result.group(2)
47  if result.group(1) == "se":
48  mkr = "se"
49  elif result.group(1) == "sf":
50  sf.append(result.group(2))
51  elif result.group(1) == "hm":
52  hm = result.group(2)
53  if lx != "":
54  # Generate UID and remove spaces around separation character
55  uid = uni2sampa(lx).replace(" | ", "|")
56  # Concatenate homonym number if any, otherwise add '1'
57  uid += str(hm)
58  if hm == "":
59  uid += str("1")
60  out_file.write("\\" + mkr + " <id=\"" + uid.encode('utf-8') + "\"> " + lx + EOL)
61  out_file.write("\\sf " + uid.replace('|', u"€").replace('?', 'Q').replace('*', 'F').encode('utf-8') + ".wav" + EOL)
62  for i in range (0, len(sf)):
63  out_file.write("\\sf " + sf[i] + EOL)
64  out_file.write("\\hm " + hm + EOL)
65  # Reset loop variables
66  lx = ""
67  mkr = "lx"
68  sf = []
69  hm = ""
70  else:
71  out_file.write(line)
72  else:
73  out_file.write(line)
74  else:
75  out_file.write(line)
76 
77 # Do not forget to close files
78 in_file.close()
79 out_file.close()