Python LMF library
 All Classes Namespaces Files Functions Variables
ipa2sampa.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 # author : Mattis List
4 # email : mattis.list@lingpy.org
5 # created : 2015-01-20 15:26
6 # modified : 2015-01-20 15:50
7 
8 """! @package utils.ipa2sampa
9 """
10 
11 from __future__ import print_function,unicode_literals
12 import unicodedata
13 import codecs
14 
15 # load sampa and ipa csv file
16 data = codecs.open('./pylmflib/utils/ipa2sampa/sampa.csv', 'r', 'utf-8')
17 
18 # load source and target items
19 sota = []
20 
21 for line in data:
22  if not line.strip() or line.startswith('#'):
23  pass
24  else:
25  so,ta = unicodedata.normalize("NFD",line.strip()).split('\t')
26  try:
27  ta = eval('"""'+ta+'"""')
28  except:
29  pass
30  sota += [(so,ta)]
31 sota = dict([(b,a) for a,b in sota])
32 
33 def uni2sampa(sequence):
34  """
35  Convert sequence in unicode-ipa to ascii-sampa.
36 
37  Notes
38  -----
39  Forked from LingPy's version for ipa2sampa, which is based on code
40  taken from Peter Kleiweg
41  (http://www.let.rug.nl/~kleiweg/L04/devel/python/xsampa.html).
42  """
43  result = ''
44  if type(sequence) == str:
45  sequence = unicode(sequence, 'utf-8')
46 
47  seq = [k for k in unicodedata.normalize('NFD', sequence)]
48 
49  while seq:
50  seg = seq.pop(0)
51  try:
52  out = sota[seg]
53  except KeyError:
54  out = seg
55  result += out
56  return result
57 
58 if __name__ == '__main__':
59 
60  with codecs.open('./pylmflib/utils/ipa2sampa/tokens.test','r','utf-8') as f:
61  for line in f:
62  seq = line.strip()
63  print(seq,'\t',uni2sampa(seq))