/* ---------------------------------------------------------- 
%   (C)1995 Institute for New Generation Computer Technology 
%       (Read COPYRIGHT for detailed information.) 
----------------------------------------------------------- */
/*-----------------------------------------------------------------
    HMM node splitting
    1995.1.27
-----------------------------------------------------------------*/
#include "defs.h"
#include "e_struct.h"

/* #define DEBUG_split */
#define DEBUG_eval

/* ---------- */
/* $B%N!<%IJ,3d(B */
/* ---------- */
node_split(node, s_or_p)
int node,s_or_p;
{
  int i,j,s,newnode;

  newnode = hmnet.nodenum;
  hmnet.nodenum++;
  if(hmnet.arc[node][node].link == LINK)
    hmnet.arc[newnode][newnode].link =  LINK;

  /* Serial Split #1: old-new */
  if(s_or_p == SERIAL12) {
    /* move output arcs */
    for(j=0;j<newnode;j++) {
      if(node == j) continue;
      if(hmnet.arc[node][j].link == LINK) {
	hmnet.arc[node][j].link = NOLINK;
	hmnet.arc[newnode][j].link = LINK;
      }
    }
    /* link old and new */
    hmnet.arc[node][newnode].link = LINK;

    /* set output statistics */
    if(tied_switch == ON) split_tied_node_init(node, newnode);
    else split_node_init(node, newnode);
  }

  /* Serial Split #2: new-old */
  else if(s_or_p == SERIAL21) {
    /* move output arcs */
    for(i=0;i<newnode;i++) {
      if(node == i) continue;
      if(hmnet.arc[i][node].link == LINK) {
	hmnet.arc[i][node].link = NOLINK;
	hmnet.arc[i][newnode].link = LINK;
      }
    }
    /* link old and new */
    hmnet.arc[newnode][node].link = LINK;
    /* move initial node */
    for(s=0;s<hmnet.initStateNumber;s++){
      if(node==hmnet.initialState[s]) {
	hmnet.initialState[s]=newnode;
	hmnet.initialProb[newnode]=hmnet.initialProb[node];
	hmnet.initialProb[node]=0.0;
	break;
      }
    }
    /* set output statistics */
    if(tied_switch == ON) split_tied_node_init(node, newnode);
    else split_node_init(node, newnode);
  }

  /* Parallel Split */
  else if(s_or_p == PARALLEL) {
    /* copy input arcs */
    for(i=0;i<newnode;i++) {
      if(node == i) continue;
      if(hmnet.arc[i][node].link == LINK) {
	hmnet.arc[i][newnode].link = LINK;
      }
    }
    /* copy output arcs */
    for(j=0;j<newnode;j++) {
      if(node == j) continue;
      if(hmnet.arc[node][j].link == LINK) {
	hmnet.arc[newnode][j].link = LINK;
      }
    }
    /* duplicate initial node and half initialProb */
    for(s=0;s<hmnet.initStateNumber;s++){
      if(node==hmnet.initialState[s]) {
	hmnet.initialProb[node] /= 2.0;
	hmnet.initialState[hmnet.initStateNumber]=newnode;
	hmnet.initialProb[newnode]=hmnet.initialProb[node];
	hmnet.initStateNumber ++;
	break;
      }
    }
    /* set output statistics */
    if(tied_switch == ON) split_tied_node_init(node, newnode);
    else split_node_init(node, newnode);
  }
}

set_splitState()
{
  char subseq[SAMPLEMAX][TIMEMAX];
  int subseqLength[SAMPLEMAX];
  int temp_log[AMINOS];
  int entropy, maxEntropy;
  int maxProf,maxAmino,tempusage;
  int parallelEval,parallelEval1,parallelEval2;
  int parallelCount1,parallelCount2;
  int subtrelis_log[SAMPLE_LENGTH][2];
  int serialEval12,serialEval21,candidate1,candidate2;
  int i,a,s,t,it,temp;
  char c;

  /* ---------------------------------- */
  /* $B%(%s%H%m%T!<:G(B($BBg(B)$B$N%N!<%I$r5a$a$k(B */
  /* ---------------------------------- */
  splitState = TERMINALSTATE;
  for(maxEntropy=0,i=0;i<hmnet.nodenum;i++){
    if(hmnet.node[i].usage==0) continue;
    for(entropy=0,a=0;a<AMINOS;a++) {
      if(hmnet.node[i].profile[a]==0) continue;
      temp=minus_II(hmnet.node[i].profile[a],hmnet.node[i].usage);
      entropy += temp;
    }
    if(maxEntropy > entropy) {
      maxEntropy = entropy;
      splitState = i;
    }
  }
  if(splitState == TERMINALSTATE)
    printf("error in setting split state\n");
  if(maxEntropy == 0) 
    printf("inappropriate to split the node\n");

  /* -------------------------------------------- */
  /* $B%5%s%W%k$+$i$=$N%N!<%I$K4X$o$kItJ,$rH4$-=P$9(B */
  /* -------------------------------------------- */
  for(s=0;s<sample.number;s++) {
    for(it=0,t=0;t<sample.length[s];t++) {
      if(sample.beststates[s][t]==splitState)
	subseq[s][it++] = sample.seq[s][t];
    }
    subseqLength[s]=it;
  }
  
  /* ------------------------------------------------ */
  /* $B:GIQCM$r$[$\=|$$$?2>=PNOJ,I[$r$b$D2>%N!<%I$r:n@.(B */
  /* ------------------------------------------------ */
  for(maxProf=0,a=0;a<AMINOS;a++) {
    if(maxProf < hmnet.node[splitState].profile[a]) {
      maxProf = hmnet.node[splitState].profile[a];
      maxAmino = a;
    }
  }
  tempusage = hmnet.node[splitState].usage - maxProf +1;
  for(a=0;a<AMINOS;a++) {
    if(a==maxAmino) 
      temp_log[a] = minus_II(1,tempusage);
    else 
      temp_log[a] = minus_II(hmnet.node[splitState].profile[a],tempusage);
  }

  /* ---------------------------------------------- */
  /* $BJBNsG[CV$7$?$H$-$N%5%s%W%k$NJP$j$HI>2ACM$N;;=P(B */
  /* ---------------------------------------------- */
  parallelCount1 = 0;
  parallelCount2 = 0;
  parallelEval = 0;
  for(s=0;s<sample.number;s++) {
    parallelEval1 = 0;
    parallelEval2 = 0;
    for(it=0;it<subseqLength[s];it++) {
      parallelEval1 += work_log.output[splitState][splitState][subseq[s][it]];
      parallelEval2 += temp_log[subseq[s][it]];
    }
/*    printf("DBEVAL %d %d %d\n",s,parallelEval1,parallelEval2); */
    if(parallelEval1 > parallelEval2) {
      parallelEval += parallelEval1;
      parallelCount1++;
    } else {
      parallelEval += parallelEval2;
      parallelCount2++;
    }
  }
  if(parallelCount1 == 0 || parallelCount2 == 0) parallelEval = mRANGE;

  /* ---------------------------------------- */
  /* $BD>NsG[CV$7$?$H$-$NI>2ACM$N;;=P(B (old-new) */
  /* ---------------------------------------- */
  serialEval12 = 0;
  for(s=0;s<sample.number;s++) {
    for(it=0;it<subseqLength[s];it++){
      subtrelis_log[it][0] = mRANGE;
      subtrelis_log[it][1] = mRANGE;
    }
    subtrelis_log[0][0] = 0;
    for(it=1;it<subseqLength[s];it++){
      subtrelis_log[it][0] = 
	plus(subtrelis_log[it-1][0],
	     work_log.output[splitState][splitState][subseq[s][it-1]]);
      candidate1 = plus(subtrelis_log[it-1][0],temp_log[subseq[s][it-1]]);
      candidate2 = plus(subtrelis_log[it-1][1],temp_log[subseq[s][it-1]]);
      if(candidate1 > candidate2)
	subtrelis_log[it][1] = candidate1;
      else 
	subtrelis_log[it][1] = candidate2;
    }
    candidate1 = plus(subtrelis_log[subseqLength[s]-1][0],
		      temp_log[subseq[s][subseqLength[s]-1]]);
    candidate2 = plus(subtrelis_log[subseqLength[s]-1][1],
		      temp_log[subseq[s][subseqLength[s]-1]]);
    if(candidate1 > candidate2)
      subtrelis_log[subseqLength[s]][1] = candidate1;
    else 
      subtrelis_log[subseqLength[s]][1] = candidate2;

    serialEval12 += subtrelis_log[subseqLength[s]][1];
  }

  /* ---------------------------------------- */
  /* $BD>NsG[CV$7$?$H$-$NI>2ACM$N;;=P(B (new-old) */
  /* ---------------------------------------- */
  serialEval21 = 0;
  for(s=0;s<sample.number;s++) {
    for(it=0;it<subseqLength[s];it++){
      subtrelis_log[it][0] = mRANGE;
      subtrelis_log[it][1] = mRANGE;
    }
    subtrelis_log[0][0] = 0;
    for(it=1;it<subseqLength[s];it++){
      subtrelis_log[it][0] = 
	plus(subtrelis_log[it-1][0],temp_log[subseq[s][it-1]]);
      candidate1 = 
	plus(subtrelis_log[it-1][0],
	     work_log.output[splitState][splitState][subseq[s][it-1]]);
      candidate2 = 
	plus(subtrelis_log[it-1][1],
	     work_log.output[splitState][splitState][subseq[s][it-1]]);
      if(candidate1 > candidate2)
	subtrelis_log[it][1] = candidate1;
      else 
	subtrelis_log[it][1] = candidate2;
    }
    candidate1 = plus(subtrelis_log[subseqLength[s]-1][0],
      work_log.output[splitState][splitState][subseq[s][subseqLength[s]-1]]);
    candidate1 = plus(subtrelis_log[subseqLength[s]-1][1],
      work_log.output[splitState][splitState][subseq[s][subseqLength[s]-1]]);
    if(candidate1 > candidate2)
      subtrelis_log[subseqLength[s]][1] = candidate1;
    else 
      subtrelis_log[subseqLength[s]][1] = candidate2;

    serialEval21 += subtrelis_log[subseqLength[s]][1];
  }

#ifdef DEBUG_eval
  printf("DBEVAL stat:%d p:%d s12:%d s21:%d\n",
	 splitState,parallelEval,serialEval12,serialEval21);
#endif

  /* -------------- */
  /* $B:GE,J,3d$r7hDj(B */
  /* -------------- */
  if(parallelEval > serialEval12 && parallelEval > serialEval21)
    splitMode = PARALLEL;
  else if(serialEval21 > serialEval12 && serialEval21 > parallelEval)
    splitMode = SERIAL21;
  else 
    splitMode = SERIAL12;
}

split_tied_node_init(oldnode, newnode)
int oldnode, newnode;
{
  int j,a;
  int maxProf, maxAmino;
  int tempusage, arcsOutNum;

  /* ---------------------------------------- */
  /* $B:GIQCM$r$[$\=|$$$?=PNOJ,I[$r=i4|CM$H$9$k(B */
  /* ---------------------------------------- */
  for(maxProf=0,a=0;a<AMINOS;a++) {
    if(maxProf < hmnet.node[oldnode].profile[a]) {
      maxProf = hmnet.node[oldnode].profile[a];
      maxAmino = a;
    }
  }
  tempusage = hmnet.node[oldnode].usage - maxProf +1;

  for(j=0;j<newnode;j++) {
    for(a=0;a<AMINOS;a++) {
      if(a==maxAmino) 
	work_log.output[newnode][j][a] = minus_II(1,tempusage);
      else 
	work_log.output[newnode][j][a] = 
	  minus_II(hmnet.node[oldnode].profile[a],tempusage);
    }
  }

  /* ---------------- */
  /* $BA+0\3NN($N=i4|CM(B */
  /* ---------------- */
  arcsOutNum =0;
  for(j=0;j<newnode;j++) {
    if(hmnet.arc[newnode][j].link == LINK) arcsOutNum ++;
  }
  for(j=0;j<newnode;j++) {
    if(hmnet.arc[newnode][j].link == LINK) 
      work_log.trans[newnode][j] = (-1)*log_near1((double)arcsOutNum);
  }

}

split_node_init(oldnode, newnode)
int oldnode, newnode;
{
  int j,a;
  int maxProf, maxAmino;
  int tempusage, arcsOutNum;

  /* ---------------------------------------------------- */
  /* $B:GIQCM$r$[$\=|$$$?=PNOJ,I[$r<+8J%k!<%W$N=i4|CM$H$9$k(B */
  /* ---------------------------------------------------- */
  for(maxProf=0,a=0;a<AMINOS;a++) {
    if(maxProf < hmnet.arc[oldnode][oldnode].profile[a]) {
      maxProf = hmnet.arc[oldnode][oldnode].profile[a];
      maxAmino = a;
    }
  }
  tempusage = hmnet.arc[oldnode][oldnode].usage - maxProf +1;

  for(a=0;a<AMINOS;a++) {
    if(a==maxAmino) 
      work_log.output[newnode][newnode][a] = 
	minus_II(1,tempusage);
    else 
      work_log.output[newnode][newnode][a] = 
	minus_II(hmnet.arc[oldnode][oldnode].profile[a],tempusage);
  }

  /* ---------------------------------- */
  /* $BB>$O%U%i%C%H$JJ,I[$r=i4|J,I[$H$9$k(B */
  /* ---------------------------------- */
  arcsOutNum =0;
  for(j=0;j<newnode;j++) {
    if(hmnet.arc[newnode][j].link == LINK) {
      arcsOutNum ++;
      for(a=0;a<AMINOS;a++) {
	work_log.output[newnode][j][a] = minus_II(1,20);
      }
    }
  }

  /* ---------------- */
  /* $BA+0\3NN($N=i4|CM(B */
  /* ---------------- */
  for(j=0;j<newnode;j++) {
    if(hmnet.arc[newnode][j].link == LINK) 
      work_log.trans[newnode][j] = (-1)*log_near1((double)arcsOutNum);
  }
}

/* end of file */
