#include <stdlib.h>
#include <math.h>
#include <stdio.h>

#include "hmm-gene.h"
#include "hmm.h"
#include "ga.h"


int hmm_train(char *file_name)
{
    int i, j, k;
    int num_learning_seq = 0;
    int length = 0;
    SEQ *seq4learning;
    char buf[TINY_SIZE];
    FILE *file_ptr;

    int count_num_seq(char *, int *);
    int read_seq(char *, int, SEQ **);
    int release_seq(int, SEQ *);
    int seq_length(int, SEQ *, int *);
    int print_vector(double *, int);
    int print_matrix(double **, int, int);

    HMM_CND hmm_cnd;
    int num_state = -1;
    int *tuple;
    int **connect;
    double **a_ij;
    double *pi_i;
    TPL_TBL *b_ij;
    double *score;

    int num_param, cnt;
    double sum, likelihd, aic, fitness;

    float fdum;
    int idum;
    int *idum_ptr;

    int is_zero(double);
    int is_almost_zero(double);
    int read_hmm_cnd(char *, HMM_CND *);
    int *ivector(long, long);
    int **imatrix(long, long, long, long);
    void free_ivector(int *, long, long);
    void free_imatrix(int **, long, long, long, long);
    int alloc_parameter1(int, int *, TPL_TBL **, double ***, double **,
			 double **, int);
    int free_parameter1(int, int *, TPL_TBL *, double **, double *,
			double *, int);

    int baum_welch(int, SEQ *, int, int, int *, int **, HMM_CND, 
		   double ***, double **, TPL_TBL **, double **);

    double rtime, ctime;
    double timer_getrtime(), timer_getutime(), timer_getstime();


    /* $B3X=,MQ(BDNA$BG[Ns72?t(B($B%U%!%$%kL>(B:./learn.seq)$B$r?t$($k!%(B */
#ifdef DEBUG
    fprintf(stderr, "--- counting number of sequences for learning...\n");
#endif
    if(count_num_seq("./learn.seq", &num_learning_seq) != 0){
	fprintf(stderr, "error occures in count_num_seq()...\n");
	return(-1);
    }
#ifdef DEBUG
    fprintf(stderr, "--- number of sequences for learning = %d\n",
	    num_learning_seq);
#endif


    /* $B3X=,MQ(BDNA$BG[Ns72(B($B%U%!%$%kL>(B:./learn.seq)$B$rFI$_9~$`(B
       ($B%a%b%j$N3NJ]$b9T$J$&(B)$B!%(B */
    /* $B$D$$$G$K(BDNA$BG[Ns72$K(BATCG$B0J30$NJ8;z$,4^$^$l$F$$$k$+$r%A%'%C%/$9$k!%(B */
#ifdef DEBUG
    fprintf(stderr, "--- reading sequences for learning...\n");
#endif
    if(read_seq("./learn.seq", num_learning_seq, &seq4learning) != 0){
	fprintf(stderr, "error occures in read_seq()...\n");
	release_seq(num_learning_seq, seq4learning);
	return(-1);
    }
#ifdef DEBUG
    for(i = 1; i <= num_learning_seq; i++){
	fprintf(stderr, "--- name = %s\n", (seq4learning + i)->a_name);
	fprintf(stderr, "    seq. = %s\n", (seq4learning + i)->a_seq);
    }
#endif


    /* $B3X=,MQ(BDNA$BG[Ns72$NG[NsD9$rD4$Y$k!%(B */
#ifdef DEBUG
    fprintf(stderr, "--- checking length of sequences for learning...\n");
#endif
    if(seq_length(num_learning_seq, seq4learning, &length) != 0){
	fprintf(stderr, "error occures in seq_length()...\n");
	release_seq(num_learning_seq, seq4learning);
	return(-1);
    }
#ifdef DEBUG
    fprintf(stderr, "--- length of sequences for learning = %d\n", length);
#endif


    /* HMM$B>r7o%U%!%$%k$rFI$_9~$`!%(B */
#ifdef DEBUG
    fprintf(stderr, "--- reading conditions of HMM...\n");
#endif
    if(read_hmm_cnd("./hmm.cnd", &hmm_cnd) != 0){
	fprintf(stderr, "error occures in read_hmm_cnd()...\n");
	release_seq(num_learning_seq, seq4learning);
	return(-1);
    }
#ifdef DEBUG
    fprintf(stderr, "    tolerance = %f\n", hmm_cnd.eps);
    fprintf(stderr, "    max. iteration = %d\n", hmm_cnd.max_itr);
    fprintf(stderr, "    balancing factor = %e\n", hmm_cnd.aic_weight);
#endif

    /* HMMGene$B$N%F%s%]%i%j%U%!%$%k$rFI$_9~$`!%(B */
    if((file_ptr = fopen(file_name, "r")) == NULL){
	fprintf(stderr, "can not find %s...\n", file_name);
	release_seq(num_learning_seq, seq4learning);
	return(-1);
    }
    for(; fscanf(file_ptr, "%s", buf) != EOF; ){
	if(strcmp(buf, "states=") == 0){

	    /* $B>uBV?t(B */
	    fscanf(file_ptr, "%d", &num_state);

	    /* $B3F>uBV$N(Btuple$BD9(B */
	    tuple = ivector((long)1, (long)num_state);
	    for(i = 1; i <= num_state; i++){
		tuple[i] = TPL_LEN;
	    }

	    /* $B%a%b%j3NJ](B */
	    connect = imatrix((long)1, (long)num_state, 
			      (long)1, (long)num_state);
	    if(alloc_parameter1(num_state, tuple, &b_ij, &a_ij, &pi_i, 
				&score, num_learning_seq) != 0){
		fprintf(stderr, 
			"error occures in alloc_parameter1()...\n");
		free_parameter1(num_state, tuple, b_ij, a_ij, pi_i, 
				score, num_learning_seq);
		free_ivector(tuple, (long)1, (long)num_state);
		free_imatrix(connect, (long)1, (long)num_state, 
			     (long)1, (long)num_state);
		release_seq(num_learning_seq, seq4learning);
		return(-1);
	    }
	}
	/* $B7k9g9TNs(B */
	else if(strcmp(buf, "connectivity...") == 0){
	    for(i = 1; i <= num_state; i++){
		for(j = 1; j <= num_state; j++){
		    fscanf(file_ptr, "%d", &idum);
		    connect[i][j] = idum;
		}
	    }
	}
	/* $BA+0\3NN((B */
	else if(strcmp(buf, "trans.") == 0){
	    fscanf(file_ptr, "%s", buf);
	    for(i = 1; i <= num_state; i++){
		for(j = 1; j <= num_state; j++){
		    fscanf(file_ptr, "%f", &fdum);
		    a_ij[i][j] = (double)fdum;
		}
	    }
	}
	/* $B=PNO3NN((B */
	else if(strcmp(buf, "base") == 0){
	    fscanf(file_ptr, "%s", buf);
	    for(i = 1; i <= num_state; i++){
		idum = (int) pow((double)NUCL, (double)(tuple[i]));
		idum_ptr = ivector(0, (long)TPL_LEN-1);
		for(j = 1; j <= idum; j++){

		    /* $B@0?t(B(j-1)$B$r(B4$B?J?t$KJQ49(B */
		    if(int2n(j-1, TPL_LEN, NUCL, idum_ptr) != 0){
			fprintf(stderr, "error occures in int2n()...\n");
			free_parameter1(num_state, tuple, b_ij, a_ij, pi_i, 
					score, num_learning_seq);
			free_ivector(tuple, (long)1, (long)num_state);
			free_imatrix(connect, (long)1, (long)num_state, 
				     (long)1, (long)num_state);
			release_seq(num_learning_seq, seq4learning);
			free_ivector(idum_ptr, 0, (long)(TPL_LEN-1));
			return(-1);
		    }
		    for(k = 0; k < TPL_LEN; k++){
			if(idum_ptr[k] == 0)
			    ((b_ij + i)->tuple)[j][k+1] = 'A';
			else if(idum_ptr[k] == 1)
			    ((b_ij + i)->tuple)[j][k+1] = 'T';
			else if(idum_ptr[k] == 2)
			    ((b_ij + i)->tuple)[j][k+1] = 'C';
			else if(idum_ptr[k] == 3)
			    ((b_ij + i)->tuple)[j][k+1] = 'G';
			else{
			    free_parameter1(num_state, tuple, b_ij, a_ij, 
					    pi_i, score, num_learning_seq);
			    free_ivector(tuple, (long)1, (long)num_state);
			    free_imatrix(connect, (long)1, (long)num_state, 
					 (long)1, (long)num_state);
			    release_seq(num_learning_seq, seq4learning);
			    free_ivector(idum_ptr, 0, (long)(TPL_LEN-1));
			    return(-1);
			}
		    }

		    fscanf(file_ptr, "%f", &fdum);
		    ((b_ij + i)->frq)[j] = (double)fdum;

		    free_ivector(idum_ptr, 0, (long)(TPL_LEN-1));
		}
	    }
	}
	/* $B=i4|>uBVJ,I[3NN((B */
	else if(strcmp(buf, "init.") == 0){
	    fscanf(file_ptr, "%s", buf);
	    for(i = 1; i <= num_state; i++){
		fscanf(file_ptr, "%f", &fdum);
		pi_i[i] = (double)fdum;
	    }
	}
    }
    fclose(file_ptr);
    if(num_state < 0){
	fprintf(stderr, "can not read %s...\n", file_name);
	fflush(stderr);
	free_parameter1(num_state, tuple, b_ij, a_ij, pi_i, 
			score, num_learning_seq);
	free_ivector(tuple, (long)1, (long)num_state);
	free_imatrix(connect, (long)1, (long)num_state, 
		     (long)1, (long)num_state);
	release_seq(num_learning_seq, seq4learning);
	return(0);
    }

#ifdef DEBUG
    fprintf(stderr, "--- initial state of HMM...\n");
    fprintf(stderr, "    connection = ...\n");
    for(i = 1; i <= num_state; i++){
	fprintf(stderr, "\t");
	for(j = 1; j <= num_state; j++){
	    fprintf(stderr, "%d ", connect[i][j]);
	}
	fprintf(stderr, "\n");
    }
    fprintf(stderr, "    a_ij = ...\n");
    for(i = 1; i <= num_state; i++){
	fprintf(stderr, "\t");
	for(j = 1; j <= num_state; j++){
	    fprintf(stderr, "%e ", a_ij[i][j]);
	}
	fprintf(stderr, "\n");
    }
    fprintf(stderr, "    b_ij = ...\n");
    for(i = 1; i <= num_state; i++){
	idum = (int) pow((double)NUCL, (double)(tuple[i]));
	fprintf(stderr, "\tstate=%d  freq.=...\n", i);
	for(j = 1; j <= idum; j++){
	    fprintf(stderr, "\t");
	    for(k = 1; k <= tuple[i]; k++){
		fprintf(stderr, "%c", ((b_ij + i)->tuple)[j][k]);
	    }
	    fprintf(stderr, "\t%e\n", ((b_ij + i)->frq)[j]);
	}
    }
    fprintf(stderr, "    pi_i = ...\n");
    fprintf(stderr, "\t");
    for(i = 1; i <= num_state; i++){
	fprintf(stderr, "%e ", pi_i[i]);
    }
    fprintf(stderr, "\n");
#endif


    /* Timer$B$N(Bstart$B!%(B */
    timer_start();


    /* HMM$B3X=,$K$h$k%Q%i%a!<%??dDj!%(B */
#ifdef DEBUG
    fprintf(stderr, "--- performing Baum-Welch algorithm...\n");
#endif
    if(baum_welch(num_learning_seq, seq4learning, length,
		  num_state, tuple, connect,
		  hmm_cnd, &a_ij, &pi_i, &b_ij, &score) != 0){
	fprintf(stderr, "error occures in baum_welch()...\n");
	free_parameter1(num_state, tuple, b_ij, a_ij, pi_i, 
			score, num_learning_seq);
	free_ivector(tuple, (long)1, (long)num_state);
	free_imatrix(connect, (long)1, (long)num_state, 
		     (long)1, (long)num_state);
	release_seq(num_learning_seq, seq4learning);
	return(-1);
    }


    /* $BJ?6QBP?tL`EY$N;;=P!%(B */
    sum = 0.0;
    for(i = 1; i <= num_learning_seq; i++){
	sum += score[i];
    }
    likelihd = sum / (double)num_learning_seq;


    /* HMM$B$N<+M3%Q%i%a!<%??t$N;;=P!%(B */
    num_param = 0;
    for(i = 1; i <= num_state; i++){
	cnt = 0;
	for(j = 1; j <= num_state; j++){
	    /* $B4p=`$r4E$/$9$k(B */
	    /*
	    if(is_zero(a_ij[i][j]) == FALSE){
		cnt++;
	    }
	    */
	    if(is_almost_zero(a_ij[i][j]) == FALSE){
		cnt++;
	    }
	}
	cnt--;
	if(cnt > 0){
	    num_param += cnt;
	}
    }
    for(i = 1; i <= num_state; i++){
	cnt = 0;
	idum = (int) pow((double)NUCL, (double)(tuple[i]));
	for(j = 1; j <= idum; j++){
	    /* $B4p=`$r4E$/$9$k(B */
	    /*
	    if(is_zero(((b_ij + i)->frq)[j]) == FALSE){
		cnt++;
	    }
	    */
	    if(is_almost_zero(((b_ij + i)->frq)[j]) == FALSE){
		cnt++;
	    }
	}
	cnt--;
	if(cnt > 0){
	    num_param += cnt;
	}
    }
    cnt = 0;
    for(i = 1; i <= num_state; i++){
	/* $B4p=`$r4E$/$9$k(B */
	/*
	if(is_zero(pi_i[i]) == FALSE){
	    cnt++;
	}
	*/
	/* $B&P$O<+M3%Q%i%a!<%??t$K4sM?$7$J$$(B */
	/* $B$N$O(BS1$B$+$i;O$^$k>l9g$N$_(B */
	if(is_almost_zero(pi_i[i]) == FALSE){
	    cnt++;
	}
	/**/
    }
    /* $B&P$O<+M3%Q%i%a!<%??t$K4sM?$7$J$$(B */
    /* $B$N$O(BS1$B$+$i;O$^$k>l9g$N$_(B */
    cnt--;
    /**/
    if(cnt > 0){
	num_param += cnt;
    }


    /* $BE,1~EY$N;;=P(B(AIC$B$GM?$($i$l$k(B)$B!%(B */
    aic = -2.0*likelihd + 2.0*(double)(num_param)*(hmm_cnd.aic_weight);
    fitness = 1.0 / aic;


    /* $B=PNO(B */
    if((file_ptr = fopen(file_name, "w")) == NULL){
	fprintf(stderr, "can not find %s...\n", file_name);
	free_parameter1(num_state, tuple, b_ij, a_ij, pi_i, 
			score, num_learning_seq);
	free_ivector(tuple, (long)1, (long)num_state);
	free_imatrix(connect, (long)1, (long)num_state, 
		     (long)1, (long)num_state);
	release_seq(num_learning_seq, seq4learning);
	return(-1);
    }
    /* $B%X%C%@It(B */
    fprintf(file_ptr, "--- id= %s ...(after learning)\n", file_name);
    fprintf(file_ptr, "    num. states= %d\n", num_state);
    fflush(file_ptr);
    /* $B7k9g9TNs!%(B */
    fprintf(file_ptr, "    connectivity...\n");
    for(i = 1; i <= num_state; i++){
	fprintf(file_ptr, "\t");
	for(j = 1; j <= num_state; j++){
	    fprintf(file_ptr, "%d ", connect[i][j]);
	}
	fprintf(file_ptr, "\n");
    }
    fflush(file_ptr);
    /* $BA+0\3NN(!%(B */
    fprintf(file_ptr, "    a_ij = ...\n");
    for(i = 1; i <= num_state; i++){
	fprintf(file_ptr, "\t");
	for(j = 1; j <= num_state; j++){
	    fprintf(file_ptr, "%e ", a_ij[i][j]);
	}
	fprintf(file_ptr, "\n");
    }
    fflush(file_ptr);
    /* $B1v4pIQEY!%(B */
    fprintf(file_ptr, "    b_ij = ...\n");
    for(i = 1; i <= num_state; i++){
	idum = (int) pow((double)NUCL, (double)(tuple[i]));
	fprintf(file_ptr, "\tstate=%d  freq.=...\n", i);
	for(j = 1; j <= idum; j++){
	    fprintf(file_ptr, "\t");
	    for(k = 1; k <= tuple[i]; k++){
		fprintf(file_ptr, "%c", ((b_ij + i)->tuple)[j][k]);
	    }
	    fprintf(file_ptr, "\t%e\n", ((b_ij + i)->frq)[j]);
	}
    }
    /* $B=i4|>uBVJ,I[!%(B */
    fprintf(file_ptr, "    pi_i = ...\n");
    fprintf(file_ptr, "\t");
    for(i = 1; i <= num_state; i++){
	fprintf(file_ptr, "%e ", pi_i[i]);
    }
    fprintf(file_ptr, "\n");
    fflush(file_ptr);
    /* $BE,1~EY(B */
    fprintf(file_ptr, "    likelihd= %e\tnum_p= %d\n", likelihd, num_param);
    fprintf(file_ptr, "    aic= %e\traw_fitness= %e\n", aic, fitness);
    fflush(file_ptr);
    fclose(file_ptr);


    /* Timer$B$N(Bstop$B!%(B */
    timer_stop();
    rtime = timer_getrtime();
    ctime = timer_getutime() + timer_getstime();
    fprintf(stderr, "real time(sec) = %f\n", rtime);
    fprintf(stderr, "cpu time(sec) = %f\n", ctime);
    fflush(stderr);
    /*
    fprintf(stdout, "real time(sec) = %f\n", rtime);
    fprintf(stdout, "cpu time(sec) = %f\n", ctime);
    fflush(stdout);
    */


    /* $B%a%b%j$N2rJ|(B */
    free_parameter1(num_state, tuple, b_ij, a_ij, pi_i, score, 
		    num_learning_seq);
    free_ivector(tuple, (long)1, (long)num_state);
    free_imatrix(connect, (long)1, (long)num_state, (long)1, (long)num_state);
    release_seq(num_learning_seq, seq4learning);


#ifdef DEBUG
    fprintf(stderr, "--- normal end, see you...\n");
#endif

    return(0);
}

