
import re
import pyrvapi_ext as API
from . import regex_tree as RT
import time, sys
import math

class generic_parser(object):

  def flush(self):
    if self.pause > 0:
      sys.stdout.write('sleepping\n')
      time.sleep(self.pause)

    API.flush()

  def __init__(
    self,
    rvapi_grid,
    show_progs = False,
    hide_refs = False,
    pause = 0,
    summary = None,
    graph_tables = True,
    **kwargs
  ):
    self.show_progs = show_progs
    self.prog_name = ''
    self.pause = pause
    self.summary = summary
    self.graph_tables = graph_tables

    re_numbers = (' *([+-]?[0-9]*.?[0-9]+) *',)* 4
    self.rec_limits = re.compile('%s\|%sx%s\|%s' %re_numbers)

    re_graphs = '\s*:([^:]+):([^:]+):([0-9]+(?:,[0-9]+)+):\s*'
    self.rec_graphs = re.compile(re_graphs)

    re_refs = '\A\s*:\s*Reference.*:\s*\Z(?i)' if hide_refs else '\Zz'
    self.rec_refs = re.compile(re_refs)

    dd = r'^\$\$'
    notd = r'(^[^\$]+)'
    skip_1 = r'^#CCP4I.*'
    skip = r'^[^#\$]+'
    bf = r'^#+'
    bs = r'^[\s\n]+'
    bc = r'^(\s*CCP4[^#]+)'

    ignored_1 = RT.LogDataLine(skip_1)                              # logs from i1
    ignored = RT.LogDataLine(skip)
    banner_f1 = RT.LogDataLine(bf)
    banner_s1 = RT.LogDataLine(bs)
    banner_f2 = RT.LogDataLine(bf)
    banner_content = RT.LogDataLine(bc)
    banner_f3 = RT.LogDataLine(bf)
    banner_s4 = RT.LogDataLine(bs)
    banner_f4 = RT.LogDataLine(bf)
    item_started = RT.LogDataLine('^\$(TABLE|TEXT|SUMMARY)')
    item_title = RT.LogDataLine(notd)
    item_graphs_started = RT.LogDataLine('^\$(GRAPHS|SCATTER)')
    item_graphs = RT.LogDataLine(notd)
    item_dd1 = RT.LogDataLine(dd)
    item_header = RT.LogDataLine(notd)
    item_dd2 = RT.LogDataLine(dd)
    item_message = RT.LogDataLine(notd)
    item_dd3 = RT.LogDataLine(dd)
    item_body = RT.LogDataLine(notd)
    item_dd4 = RT.LogDataLine(dd)
    ignored_dd = RT.LogDataLine(dd)                                 # refmac bug

    ignored_1.add_next(ignored_1, ignored, item_started, banner_f1) # logs from i1
#   ignored.add_next(ignored, item_started, banner_f1)              # refmac bug
    ignored.add_next(ignored, item_started, banner_f1, ignored_dd)  # refmac bug
    ignored_dd.add_next(ignored, item_started)                      # refmac bug
    banner_f1.add_next(banner_s1, ignored)
    banner_s1.add_next(banner_s1, banner_f2, ignored)
    banner_f2.add_next(banner_content, ignored)
    banner_content.add_next(banner_f3, ignored)
    banner_f3.add_next(banner_s4, ignored)
    banner_s4.add_next(banner_s4, banner_f4, ignored)
    banner_f4.add_next(ignored)
    item_started.add_next(item_title)
    item_title.add_next(item_title, item_graphs_started, item_dd2)
    item_graphs_started.add_next(item_graphs)
    item_graphs.add_next(item_graphs, item_dd1)
    item_dd1.add_next(item_header, item_graphs_started)
    item_header.add_next(item_header, item_dd2, item_graphs_started)
    item_dd2.add_next(item_message)
    item_message.add_next(item_message, item_dd3)
    item_dd3.add_next(item_body)
#   item_body.add_next(item_body, item_dd4)                         # refmac bug
    item_body.add_next(item_body, item_dd4, item_started)           # refmac bug
    item_dd4.add_next(ignored)

    self.item_title_parts = list()
    self.item_graphs_parts = list()
    self.item_header_parts = list()
    self.item_message_parts = list()
    self.item_body_parts = list()

    banner_content.add_action(self.banner_content)
    banner_f4.add_action(self.banner_finished)
    item_started.add_action(self.item_started)
    item_title.add_action(self.item_title_parts.extend)
    item_graphs_started.add_action(self.graphs_started)
    item_graphs.add_action(self.item_graphs_parts.extend)
    item_header.add_action(self.item_header_parts.extend)
    item_message.add_action(self.item_message_parts.extend)
    item_body.add_action(self.item_body_parts.extend)
    item_dd4.add_action(self.item_finished)

    self.parser = RT.LogDataParser()
    self.parser.add_next(ignored_1, ignored)
    self.parser.add_recovery_next(ignored)
    self.parser.add_recovery_action(self.item_reset)

    self.banner_text = None
    self.item_kind = None
    self.graph_kind = None
    self.grid = rvapi_grid
    self.vpos = 2
    self.sect = rvapi_grid
    self.text_panel = None
    self.widget = None
    self.table_panel = None
    self.text_cou = -1
    self.table_cou = -1
    self.flush()

  def parse_stream(self, istream, ostream=None, verbose=False, pause=0, patches=None):
    self.parser.parse_stream(istream, ostream, verbose, pause, patches)
    if self.sect is not self.grid:
      self.sect.set_state(False)

  def banner_content(self, groups):
    self.banner_text, = groups

  def banner_finished(self, groups):
    assert not groups
    ccp4_version, sep, prog_name_date = self.banner_text.partition(':')
    prog_name, sep, prog_date = prog_name_date.partition(':')
    if not sep:
      prog_name, sep, prog_date = prog_name_date.lstrip().partition(' ')

    self.prog_name = ' '.join(prog_name.split())
    sect_title = '%s (%s)' %(self.prog_name, ' '.join(prog_date.split()))
    self.banner_text = None
    if self.show_progs:
      self.vpos += 1
      if self.sect is not self.grid:
        self.sect.set_state(False)

      self.sect = API.pyrvapi_section(self.grid, sect_title, self.vpos, 0, 1, 1)
      self.text_panel = None
      self.widget = None
      self.table_panel = None
      self.text_cou = -1
      self.table_cou = -1
      self.flush()

  def item_reset(self):
    del self.item_title_parts[:]
    del self.item_graphs_parts[:]
    del self.item_header_parts[:]
    del self.item_message_parts[:]
    del self.item_body_parts[:]
    self.item_kind = None
    self.graph_kind = None

  def item_started(self, groups):
    self.item_reset()                                               # refmac bug
    self.item_kind, = groups

  def graphs_started(self, groups):
    self.graph_kind, = groups

  def item_finished(self, groups):
    assert not groups
    title = ''.join(self.item_title_parts)
    graphs = ''.join(self.item_graphs_parts)
    header = ''.join(self.item_header_parts)
    message = ''.join(self.item_message_parts)
    body = ''.join(self.item_body_parts)
    if body.strip():
      if self.item_kind == 'TABLE':
        msg = message.strip().replace('loggraph', '') # phaser; is this right syntax?
#       assert not msg # msg field is actually for short column titles (see acorn log)
        self.show_table(title, graphs, msg, header, body)

      elif not self.rec_refs.match(title):
        assert self.item_kind in ('TEXT', 'SUMMARY')
        assert not (graphs or header)
        self.show_text(title, message, body)
        try:
          self.evaluation_data(title, message, body)

        except:
          pass

    self.flush()
#   self.item_reset()                                               # refmac bug

  def show_table(self, title, graphs, msg, header, body):
    title_line = title.split(':')[1].strip()
    assert len(title_line) < 100
    column_nick_list = []
    for column_nick in msg.split():
      column_nick = column_nick.replace('<', '&lt;').replace('>', '&gt;')
      column_nick_list.append(column_nick)

    column_name_list = []
    for column_name in header.split():
      column_name = column_name.replace('<', '&lt;').replace('>', '&gt;')
      if column_nick_list:
        column_name = column_name.replace('_', ' ')

      column_name_list.append(column_name)

    column_data_list = list(zip(*[line.split() for line in body.split('\n') if line.strip()]))
    assert len(column_data_list) == len(column_name_list)
    if column_nick_list:
      assert len(column_name_list) == len(column_nick_list)

    else:
      column_nick_list.extend(column_name_list)

    self.table_cou += 1
    if self.graph_tables:
      if self.table_cou == 0:
        assert self.table_panel is None
        self.table_panel = API.panel(self.sect, 2, 0, 1, 1)

      table = API.pyrvapi_table(self.table_panel, self.table_cou, 0, 1, 1, title_line, -1)
      for i in range(len(column_nick_list)):
        table.col_title(i, column_nick_list[i])
        column_data = column_data_list[i]
        for j in range(len(column_data)):
          table.body_cell(j, i, column_data[j])

    self.flush()
    graph_meta_list = self.rec_graphs.findall(graphs)
    if not graph_meta_list:
      return

    if self.table_cou == 0:
      assert self.widget is None
      self.widget = API.loggraph(self.sect, 1, 0, 1, 1)

    column_list = list()
    column_numbers_list = list()
    gdtobj = API.graph_data(self.widget, title_line)
    for column_name, column_data in zip(column_name_list, column_data_list):
      isvalid = True
      isint = True
      column_numbers = list()
      for datum in column_data:
        try:
          number = int(datum)

        except:
          isint = False
          try:
            number = float(datum)
#           number = 0.0 if datum == 'nan' else float(datum)        # fixed in rvapi

          except:
            isvalid = False
            break

        column_numbers.append(number)

      if isvalid and not isint:
        for i in range(len(column_numbers)):
          column_numbers[i] = float(column_numbers[i])

      if isvalid:
        column_numbers_list.append(column_numbers)

      column = API.graph_dataset(gdtobj, column_name, '', isint)
      column_list.append(column)
      for number in column_numbers:
         column.add_datum(number)

    for item in graph_meta_list:
      axes = [int(a) - 1 for a in item[2].split(',')]
      axisx = axes[0]
      # title = ' '.join(item[0].split())
      # same as above:
      # title = re.sub('\s+', ' ', item[0])
      # keeps original spaces and only corrects for phaser's newlines:
      title = re.sub(' *[\t\n\r\f\v]+ *', ' ', item[0])
      rec_sq_reso_names = '1/d^2', 'M(4SSQ/LL)', '&lt;4SSQ/LL&gt;', '1/resol^2', '4(S/L)**2'
      x_is_reso = column_nick_list[axisx].strip() in rec_sq_reso_names
      column_name_x = column_name_list[axisx].strip()
      if x_is_reso:
        # Angstrem sign (8491):
        # encoded = b'\xe2\x84\xab'
        # A-ring letter (197):
        encoded = b'\xc3\x85'
        decoded = str(encoded) if sys.getdefaultencoding() == 'ascii' else encoded.decode()
        column_name_x = 'Resolution (' + decoded + ')'

      pltobj = API.graph_plot(self.widget, title, column_name_x, '')
      if x_is_reso:
        xtick_n = 5
        xmax = None
        if not 'NOUGHT'.startswith(item[1]) and not 'AUTO'.startswith(item[1]):
          data = self.rec_limits.match(item[1])
          if data:
            xmax = float(data.group(2))

        if xmax == None:
          xmax = max(column_numbers_list[axisx]) *1.025

        xtick_delta = xmax/ xtick_n
        xtick_values = [xtick_delta* cou for cou in range(1,1 + xtick_n)]
        xtick_labvals = [1/ math.sqrt(value) for value in xtick_values]
        pltobj.reset_xticks()
        pltobj.add_xtick(0.0, 'Infty')
        for value, labval in zip(xtick_values, xtick_labvals):
          label = "%4.2f" %labval
          pltobj.add_xtick(value, label)

        if 'NOUGHT':
          pltobj.set_ymin(0)

      elif 'NOUGHT'.startswith(item[1]):
        pltobj.set_xmin(0)
        pltobj.set_ymin(0)

      elif not 'AUTO'.startswith(item[1]):
        data = self.rec_limits.match(item[1])
        if data:
          xmin, xmax, ymin, ymax = [float(lim) for lim in data.groups()]
          pltobj.set_xrange(xmin, xmax)
          pltobj.set_yrange(ymin, ymax)

      for axisy in axes[1:]:
        pltline = API.plot_line(pltobj, gdtobj, column_list[axisx], column_list[axisy])
        if self.graph_kind == 'SCATTER':
          pltline.set_options(style=API.plot_line.RVAPI_LINE_Off)

        self.flush()

      self.flush()

  def show_text(self, title, message, body):
    if self.item_kind == 'SUMMARY': # hack for refmac
      return

    if self.text_cou < 0:
      assert self.text_panel is None
      self.text_panel = API.panel(self.sect, 0, 0, 1, 1)

    title_line = title.split(':')[1].strip()
    state = -1 if re.match('^(:?Reference|Script|MR Result)', title_line) else 1
    style = 'vertical-align:top;text-align:left;'
    self.text_cou += 1
    table = API.pyrvapi_table(self.text_panel, self.text_cou, 0, 1, 1, title_line, state)
    j = 0
    msg = ' '.join(message.split()).replace('Baubles Markup', '')
    if msg:
      message_line = '<pre>' + msg + '</pre>'
      table.body_cell(j, 0, message_line, style)
      j += 1

    body_lines = body.split('\n')
    for i0 in range(len(body_lines)):
      if body_lines[i0].strip():
        break
    
    for i1 in range(len(body_lines)-1,-1,-1):
      if body_lines[i1].strip():
        break
    
    body_text = '<pre>' + '<br>'.join(body_lines[i0:i1+1]) + '</pre>'
    table.body_cell(j, 0, body_text, style)
    self.flush()

  def evaluation_data(self, title, message, body):
    if self.summary is None:
      return

    if self.prog_name.lower().startswith('refmac'):
      if ' '.join(message.split()).lower() == "final results":
        re_fmt = '%s\s+[0-9.+-]+\s+([0-9.+-]+)\s+'
        re_extract = '\s+Initial\s+Final\s+'
        re_extract += re_fmt %'R +factor'
        re_extract += re_fmt %'R +free'
        data = re.match(re_extract, body)
        if data:
          self.summary['refmac'] = dict()
          self.summary['refmac']['R_factor'], self.summary['refmac']['R_free'] = data.groups()

    elif self.prog_name.lower().startswith('cbuccaneer'):
      if title.split(':')[1].strip().lower() == 'result':
        re_extract = '.+\s+Completeness +by +residues +built: +([0-9.]+)'
        data = re.match(re_extract, body, re.S)
        if data:
          self.summary['cbuccaneer'] = dict()
          self.summary['cbuccaneer']['percentage'], = data.groups()

    elif self.prog_name.lower().startswith('phaser'):
      if title.split(':')[1].strip().lower() == 'mr result':
        lines = re.findall('\n +SOLU +SET +.+\n', body)
        if lines:
          line0 = lines[0]
          tfz_list = re.findall(' +TFZ[^0-9.]+([0-9.]+)', line0)
          llg_list = re.findall(' +LLG[^0-9.]+([0-9.]+)', line0)
          self.summary['phaser'] = dict()
          self.summary['phaser']['tfz'] = tfz_list[-1] if tfz_list else '0'
          self.summary['phaser']['llg'] = llg_list[-1] if llg_list else '0'
          self.summary['phaser']['count'] = str(len(lines))

    elif self.prog_name.lower().startswith('aimless'):
      if title.split(':')[1].strip().lower() == 'result':
        re_fmt = '%s\s+([0-9.+-]+)\s+[0-9.+-]+\s+[0-9.+-]+.*'
        re_extract = '.*Overall +InnerShell +OuterShell.*'
        re_extract += re_fmt %'Rmeas \(within I\+/I-\)'
        re_extract += re_fmt %'Rmeas \(all I\+ & I-\)'
        re_extract += re_fmt %'CC\(1/2\)'
        re_extract += re_fmt %'Completeness'
        re_extract += 'Space group: +([A-Z](?: [0-9]{1,2}){1,3}).*'
        data = re.match(re_extract, body, re.S)
        if data:
          self.summary['aimless'] = dict()
          (
            self.summary['aimless']['R_meas_ano'],
            self.summary['aimless']['R_meas_all'],
            self.summary['aimless']['Half_set_CC'],
            self.summary['aimless']['Completeness'],
            self.summary['aimless']['Space_group'],
          ) = data.groups()



