#!/usr/bin/env python
# -*- encoding: japanese.euc_jp -*-

try:
  import cdb
except ImportError:
  import pycdb as cdb
import sys, sexpr
from abstfilter import AbstractFeeder, AbstractFilter, AbstractConsumer


##  ConnectReader
##
class ConnectReader(AbstractConsumer):

  def __init__(self, matname, encin="japanese.euc_jp", encout="japanese.euc_jp"):
    AbstractConsumer.__init__(self)
    self.matdb = cdb.cdbmake(matname, matname+".tmp")
    self.tab = [ (0, u"ʸƬ,*,*,*"), (1, u"ʸ,*,*,*") ] # predefined
    self.syms = dict.fromkeys(map(lambda (x,y):y, self.tab), 1)
    (self.encin, self.encout) = (encin, encout)
    self.mat = {}
    return

  def feed(self, x):
    (seq, cost) = x
    
    def intern(w):
      w[0] = "-".join(w[0])
      w += ["*"]*(4-len(w))
      s = unicode(",".join(w), self.encin)
      if not self.syms.has_key(s):
        (pos, ktype, kform, base) = w
        p = 2
        if pos == "*": p += 1
        if ktype == "*": p += 1
        if kform == "*": p += 1
        if base == "*": p += 3
        # vague patterns are given higher numbers.
        self.tab.append((p, s))
        self.syms[s] = 1
      return s

    if len(seq) == 2:
      assert len(seq[0]) == 1 and len(seq[1]) == 1
      self.mat[(intern(seq[0][0]), intern(seq[1][0]))] = int(cost)
    return

  def read(self, fname):
    self.fname = fname
    f = file(fname)
    p = sexpr.SExprReader(self)
    for line in f.xreadlines():
      p.feed(line)
    f.close()
    return self

  def close(self):
    self.tab.sort(lambda (p1,s1), (p2,s2): cmp(p1, p2))
    for (i, (p,s)) in enumerate(self.tab):
      self.syms[s] = i
    for ((s1,s2), v) in self.mat.iteritems():
      k = "%d-%d" % (self.syms[s1], self.syms[s2])
      self.matdb.add(k.encode(self.encout), str(v))
    self.matdb.finish()
    return self

  def export(self, header=1, out=sys.stdout):
    def enc(s):
      return 'u"%s"' % s.encode(self.encout)
    if header:
      out.write("#!/usr/bin/env python\n")
      out.write("# -*- encoding: %s -*-\n\n" % self.encout)
      out.write("# generated from %s\n\n" % self.fname)

    out.write("TABLE = {\n")
    # more specific pattern first
    for (p,s) in self.tab:
      out.write("  %s: %d,\n" % (enc(s), self.syms[s]))
    out.write("}\n")
    out.flush()
    return self


# main
if __name__ == "__main__":
  if len(sys.argv) != 3:
    print >>sys.stderr, "usage: make_matrix.py matrix.cdb connect.cha"
    sys.exit(2)
  ConnectReader(sys.argv[1]).read(sys.argv[2]).close().export()
