/* ---------------------------------------------------------- 
%   (C)1994 Institute for New Generation Computer Technology 
%       (Read COPYRIGHT for detailed information.) 
----------------------------------------------------------- */
#include <stdlib.h>
#include <math.h>
#include <stdio.h>

#include "quant2.h"


main(int argc, char *argv[])
{
    /* 2$@72$N%5%s%W%k$+$i$J$k%G!<%?$K$D$$$F!"(J
       $@?tNL2=M}O@Bh(J2$@N`$K4p$E$/:GE,$J%+%F%4%j!<?tNL$r;;=P$9$k!#(J */

    float *vector(long, long);
    int *ivector(long, long);
    unsigned long *lvector(long, long);
    float **matrix(long, long, long, long);

    void free_vector(float *, long, long);
    void free_ivector(int *, long, long);
    void free_lvector(unsigned long *, long, long);
    void free_matrix(float **, long, long, long, long);


    /* $@%5%s%W%k$N8D?t(J */
    unsigned long n1 = 0;
    unsigned long n2 = 0;
    unsigned long n = 0;

    /* $@%5%s%W%kG[Ns$ND9$5(J */
    unsigned long length;

    /* n1$@8D(J,n2$@8D$NG[Ns%G!<%?(J(0,1$@$GI=8=(J) */
    float **data1;
    float **data2;
    float **data;

    /* $@%+%F%4%j!<?tNL$N8D?t(J */
    unsigned long num;

    /* $@%+%F%4%j!<?tNL(J */
    float *X;
    float *vec_x, *dum_x;

    /* $@ItJ,%T%]%C%HA*Br$N:]$N9T8r49>pJs$r5-O?(J */
    int *index;

    /* $@9T8r492s?t(J(1:$@6v?t2s(J, -1:$@4q?t2s(J) */
    float count;

    /* $@G[Ns%G!<%?$r3JG<(J */
    DATA_SET *data_set1;
    DATA_SET *data_set2;
    DATA_SET *data_set;

    /* $@%Y%/%H%k(Jh ("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    float *vec_h, *H, *dum_H;

    /* $@%Y%/%H%k(Jg ("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    float *vec_g1;
    float *vec_g2;

    /* $@%^%H%j%C%/%9(Jf ("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    float **mat_f, **F, **dum_F;

    /* $@:GBgAj4XHf(J */
    float eta2 = 0.0;

    /* $@%l%s%8(J */
    float *range;

    /* $@%l%s%8$r%=!<%H$9$k:]$N%$%s%G%C%/%9(J */
    unsigned long *range_index;

    /* $@@5L#$N%+%F%4%j!<?t(J */
    int *item;
    int num_item;

    /* $@%^%O%i%N%S%95wN%(J */
    float *dist;

    /* $@8!Dj$N(JF$@CM(J */
    float *f_value;

    /* $@3X=,MQG[Ns(J($@@5Ii(J)$@$N3F0LCVKh$N1v4pIQEY(J */
    float **freq;

    int check_samples(char *, unsigned long *, unsigned long *);
    int print_matrix(float **, int, int);
    int print_vector(float *, int);
    int vector_plus(float *, float *, int, float *);
    int vector_times(float *, float, int, float *);
    int matrix_plus(float **, float **, int, int, float **);
    int matrix_times(float **, float, int, int, float **);
    int convert_data(float ***, DATA_SET *, unsigned long, unsigned long);
    int calc_g(float **, float **, unsigned long, unsigned long);
    int calc_h(float **, float *, float *, unsigned long, 
	       unsigned long, unsigned long);
    int calc_f(float ***, float **, float **, unsigned long, 
	       unsigned long, unsigned long);
    int evaluate_sequences(DATA_SET **, float **, float *, 
			   unsigned long, unsigned long);


    void ludcmp(float **a, int n, int *indx, float *d);
    void lubksb(float **a, int n, int *indx, float b[]);
    void mprove(float **a, float **alud, int n, int indx[], 
		float b[], float x[]);

    int lda(DATA_SET *, DATA_SET *, DATA_SET *, float *, 
	    unsigned long, unsigned long, unsigned long, unsigned long);
    void xindex(unsigned long n, float arr[], unsigned long indx[]);

    int struct_allocation(DATA_SET **, char *, int);
    int struct_free(DATA_SET *, int);
    int calc_freq(float ***, DATA_SET *, unsigned long, unsigned long);

    unsigned long dum_ii, dum_jj, dum_kk;
    int i, j, k, ii, jj, kk;
    float fdum1, fdum2, fdum3, fdum4, fdum5;
    float max, min, sigma;
    
    char key_in[32];


    /* $@0z?t$N%A%'%C%/(J */
    if((argc != 3) && (argc != 4))
	{
	    fprintf(stderr, 
		    "usage:: Quant2 [data_file1] [data_file2] [test_file(option)]\n");
	    exit(-1);
	}

    /* $@G[Ns$ND9$5$H%G!<%?$N8D?t$rD4$Y$k(J */
    if(check_samples(argv[1], &dum_ii, &n1) != 0)
	{
	    fprintf(stderr, "invalid sequences in %s.\n", argv[1]);
	    exit(-1);
	}
    if(check_samples(argv[2], &dum_jj, &n2) != 0)
	{
	    fprintf(stderr, "invalid sequences in %s.\n", argv[2]);
	    exit(-1);
	}
    if(dum_ii != dum_jj)
	{
	    fprintf(stderr, "invalid sequences.\n");
	    exit(-1);
	}
    length = dum_ii;
    num = FOUR * length;
    fprintf(stderr, "--- length of sequences = %d...\n", length);
    fprintf(stderr, "--- number of sequences in data file1 = %d...\n", n1);
    fprintf(stderr, "--- number of sequences in data file2 = %d...\n", n2);

    /* $@%a%b%j$r3NJ]$7%G!<%?$r3JG<$9$k(J */
    fprintf(stderr, "--- read data file1...\n");
    if(struct_allocation(&data_set1, argv[1], n1) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function struct_allocation.\n");
	    exit(-1);
	}
    fprintf(stderr, "--- read data file2...\n");
    if(struct_allocation(&data_set2, argv[2], n2) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function struct_allocation.\n");
	    exit(-1);
	}

    /* $@%a%b%j$r3NJ](J */
    mat_f = (float**) matrix((long) 1, (long) num, (long) 1, (long) num);
    data1 = (float**) matrix((long) 1, (long) n1, (long) 1, (long) num);
    data2 = (float**) matrix((long) 1, (long) n2, (long) 1, (long) num);
    vec_h = (float*) vector((long) 1, (long) num);
    vec_g1 = (float*) vector((long) 1, (long) num);
    vec_g2 = (float*) vector((long) 1, (long) num);
    vec_x = (float*) vector((long) 1, (long) num);
    dum_x = (float*) vector((long) 1, (long) num);
    range = (float*) vector((long) 1, (long) length);
    range_index = lvector((long) 1, (long) length);

    item = ivector((long) 1, (long) length);
    dist = (float*) vector((long) 1, (long) length);
    f_value = (float*) vector((long) 1, (long) length);


    /* $@G[Ns%G!<%?$r(J0,1$@$KJQ49(J */
    fprintf(stderr, "--- converting sequences in data file1...\n");
    if(convert_data(&data1, data_set1, length, n1) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function convert_data.\n");
	    exit(-1);
	}
    fprintf(stderr, "--- converting sequences in data file2...\n");
    if(convert_data(&data2, data_set2, length, n2) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function convert_data.\n");
	    exit(-1);
	}


    /* $@%Y%/%H%k(Jg$@$r;;=P(J ("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    fprintf(stderr, "--- calculating vector g1...\n");
    if(calc_g(&vec_g1, data1, num, n1) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function calc_g.\n");
	    exit(-1);
	}
    fprintf(stderr, "--- calculating vector g2...\n");
    if(calc_g(&vec_g2, data2, num, n2) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function calc_g.\n");
	    exit(-1);
	}

    /* $@%Y%/%H%k(Jh$@$r;;=P(J ("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    fprintf(stderr, "--- calculating vector h...\n");
    if(calc_h(&vec_h, vec_g1, vec_g2, num, n1, n2) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function calc_h.\n");
	    exit(-1);
	}

    /* $@%^%H%j%C%/%9(Jf$@$r;;=P(J ("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    fprintf(stderr, "--- calculating matrix f...\n");
    if(calc_f(&mat_f, data1, data2, num, n1, n2) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function calc_f.\n");
	    exit(-1);
	}

    /* $@3F(Jitem$@$K$*$1$k%+%F%4%j?t$rD4$Y$k(J */
    fprintf(stderr, "--- calculating number of categorical weight at each position...\n");
    num_item = 0;
    for(i = 1; i <= length; i++)
	{
	    k = 0;
	    for(j = FOUR - 1; j >= 0; j--)
		{
		    if((vec_g1[i * FOUR - j] > EPS) || 
		       (vec_g2[i * FOUR - j] > EPS))
			{
			    k++;
			    num_item++;
			}

		}
	    k--;
	    item[i] = k;
	    num_item--;
	}

    /* $@%i%s%/Mn$A$9$k9T$HNs$rH4$$$?(JF,H$@$r;;=P(J */
    fprintf(stderr, "--- calculating matrix F and vector H...\n");
    F = (float**) matrix((long) 1, (long) num_item, 
			 (long) 1, (long) num_item);
    dum_F = (float**) matrix((long) 1, (long) num_item, 
			     (long) 1, (long) num_item);
    H = (float*) vector((long) 1, (long) num_item);
    dum_H = (float*) vector((long) 1, (long) num_item);
    X = (float*) vector((long) 1, (long) num_item);
    k = 0;
    for(i = 1; i <= length; i++)
	{
	    int flag = 0;
	    for(j = FOUR - 1; j >= 0; j--)
		{
		    if((vec_g1[i * FOUR - j] > EPS) || 
		       (vec_g2[i * FOUR - j] > EPS))
			{
			    if(flag != 0)
				{
				    k++;
				    H[k] = vec_h[i * FOUR - j];
				}
			    flag = 1;
			}
		}
	}
    k = 0;
    for(i = 1; i <= length; i++)
	{
	    int flag1 = 0;
	    for(j = FOUR - 1; j >= 0; j--)
		{
		    if((vec_g1[i * FOUR - j] > EPS) || 
		       (vec_g2[i * FOUR - j] > EPS))
			{
			    if(flag1 != 0)
				{
				    k++;
				    kk = 0;
				    for(ii = 1; ii <= length; ii++)
					{
					    int flag2 = 0;
					    for(jj = FOUR - 1; jj >= 0; jj--)
						{
						    if((vec_g1[ii * FOUR - jj] > EPS) || 
						       (vec_g2[ii * FOUR - jj] > EPS))
							{
							    if(flag2 != 0)
								{
								    kk++;
								    F[k][kk] = mat_f[i * FOUR - j][ii * FOUR - jj];
								}
							    flag2 = 1;
							}
						}
					}
				}
			    flag1 = 1;
			}
		}
	}


    /* float *, float **$@7?0J30$O<+J,$G3NJ](J */
    if((index = (int *) calloc (num_item + 1, sizeof(int))) == NULL)
	{
	    fprintf(stderr, "not enought memory...\n");
	    exit(-1);
	}

    /* $@O"N)0l<!J}Dx<0(J(FX=cH)$@$r2r$/(J ("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    fprintf(stderr, "--- solving equations...\n");
    fdum1 = ((float) (n1 * n2)) / ((float) (n1 + n2));
    vector_times(H, fdum1, num_item, dum_H);
    matrix_times(F, 1.0, num_item, num_item, dum_F);
    ludcmp(dum_F, num_item, index, &count);
    mprove(F, dum_F, num_item, index, dum_H, X);


    /* $@%+%F%4%j!<?tNL$rI8=`2=$H%l%s%8$r;;=P(J ("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) 
       $@3F0LCV$K$*$1$k:G=i$N%+%F%4%j!<?tNL$r(J0.0$@$H$9$k(J */
    fprintf(stderr, "--- normalizing categorical weight...\n");
    k = 0;
    for(i = 1; i <= num; i++)
	{
	    vec_x[i] = 0.0;
	}
    for(i = 1; i <= length; i++)
	{
	    int flag = 0;
	    for(j = FOUR - 1; j >= 0; j--)
		{
		    if((vec_g1[i * FOUR - j] > EPS) || 
		       (vec_g2[i * FOUR - j] > EPS))
			{
			    if(flag != 0)
				{
				    k++;
				    vec_x[i * FOUR - j] = X[k];
				}
			    flag = 1;
			}
		    else
			{
			    vec_x[i * FOUR - j] = 0.0;
			}
		}
	}
    /* $@3F0LCV$K$*$1$k=E$_IU$-J?6Q$,(J0.0$@$K$J$k$h$&$KJd@5(J */
    freq = (float**) matrix((long) 1, (long) length, (long) 1, (long) FOUR);
    for(i = 1; i <= length; i++)
	{
	    for(j = 1; j <= FOUR; j++)
		{
		    freq[i][j] = 0.0;
		}
	}
    if(calc_freq(&freq, data_set1, length, n1) != 0)
	{
	    fprintf(stderr, "error occureds in function calc_freq.\n");
	    exit(-1);
	}
    if(calc_freq(&freq, data_set2, length, n2) != 0)
	{
	    fprintf(stderr, "error occureds in function calc_freq.\n");
	    exit(-1);
	}
    for(i = 1; i <= length; i++)
	{
	    for(j = 1; j <= FOUR; j++)
		{
		    freq[i][j] = freq[i][j] / ((float) (n1 + n2));
		}
	}
    for(i = 1; i <= length; i++)
	{
	    float alpha = 0.0;
	    for(j = FOUR - 1; j >= 0; j--)
		{
		    alpha += freq[i][FOUR - j] * vec_x[i * FOUR - j];
		}
	    for(j = FOUR - 1; j >= 0; j--)
		{
		    vec_x[i * FOUR - j] -= alpha;
		}
	}
    free_matrix(freq, (long) 1, (long) length, (long) 1, (long) FOUR);
    /* $@AmJ,;6&R$r(J1.0$@$K$9$k$h$&$K@55,2=$9$k(J */
    if(evaluate_sequences(&data_set1, data1, vec_x, num, n1) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function evaluate_sequences.\n");
	    exit(-1);
	}
    if(evaluate_sequences(&data_set2, data2, vec_x, num, n2) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function evaluate_sequences.\n");
	    exit(-1);
	}
    fdum4 = fdum5 = 0.0;
    for(i = 1; i <= n1; i++)
	{
	    fdum4 += (data_set1 + i) -> score;
	    fdum5 += pow((data_set1 + i) -> score, 2.0);
	}
    for(i = 1; i <= n2; i++)
	{
	    fdum4 += (data_set2 + i) -> score;
	    fdum5 += pow((data_set2 + i) -> score, 2.0);
	}
    fdum4 = pow(fdum4 / ((float) (n1 + n2)), 2.0);
    fdum5 = fdum5 / ((float) (n1 + n2));
    sigma = fdum5 - fdum4;
    for(i = 1; i <= length; i++)
	{
	    for(j = FOUR - 1; j >= 0; j--)
		{
		    vec_x[i * FOUR - j] = vec_x[i * FOUR - j] / sigma;
		}
	}

    /* $@%l%s%8$r5a$a$k(J */
    fprintf(stderr, "--- calculating range...\n");
    for(i = 1; i <= length; i++)
	{
	    min = max = vec_x[i * FOUR - 3];
	    for(j = FOUR - 2; j >= 0; j--)
		{
		    if(min > vec_x[i * FOUR - j])
			{
			    min = vec_x[i * FOUR - j];
			}
		    if(max < vec_x[i * FOUR - j])
			{
			    max = vec_x[i * FOUR - j];
			}
		}
	    range[i] = max - min;
	}
    printf("# <<categorical weights & ranges >>\n");
    printf("#  i\t\tA\t\tT\t\tC\t\tG\t\tRange\n");
    for(i = 1; i <= length; i++)
	{
	    printf("%4d\t%13.7f\t%13.7f\t%13.7f\t%13.7f\t%13.7f\n", i, 
		   vec_x[i * FOUR - 3], vec_x[i * FOUR - 2], 
		   vec_x[i * FOUR - 1], vec_x[i * FOUR], range[i]);
	}

    /* $@:GBgAj4XHf$r;;=P(J ("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    fprintf(stderr, "--- calculating eta2...\n");
    for(i = 1; i <= num_item; i++)
	{
	    eta2 += H[i] * X[i];
	}
    printf("# correlation ratio (eta2) = %13.7f\n", eta2);

    /* file1,2$@$NG[Ns$N%9%3%"$r;;=P(J */
    fprintf(stderr, "--- evaluating sequences in data file1...\n");
    if(evaluate_sequences(&data_set1, data1, vec_x, num, n1) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function evaluate_sequences.\n");
	    exit(-1);
	}
    printf("# sequence scores in %s\n", argv[1]);
    printf("# source\t\tscore\n", argv[1]);
    for(i = 1; i <= n1; i++)
	{
	    printf("%s:%s\t%13.7f\n", (data_set1 + i) -> source, 
		   (data_set1 + i) -> position, (data_set1 + i) -> score);
	}
    fprintf(stderr, "--- evaluating sequences in data file2...\n");
    if(evaluate_sequences(&data_set2, data2, vec_x, num, n2) != 0)
	{
	    fprintf(stderr, 
		    "error occureds in function evaluate_sequences.\n");
	    exit(-1);
	}
    printf("# sequence scores in %s\n", argv[2]);
    printf("# source\t\tscore\n", argv[2]);
    for(i = 1; i <= n2; i++)
	{
	    printf("%s:%s\t%13.7f\n", (data_set2 + i) -> source, 
		   (data_set2 + i) -> position, (data_set2 + i) -> score);
	}


    /* $@A4$F$N%+%F%4%j!<?tNL$rMQ$$$FH=JL$7$?;~$N(J
       $@%^%O%i%N%S%95wN%$r;;=P$9$k(J */
    fprintf(stderr, "--- liniar discriminat analysis using all categorical weight ...\n");
    if(lda(data_set1, data_set2, NULL, &fdum2, n1, n2, n, (long) 1) != 0)
	{
	    fprintf(stderr, "error occureds in function lda.\n");
	    exit(-1);
	}
    dist[length] = fdum2;
    printf("# mahalanobis distance (Dp^2) = %13.7f\n", dist[length]);


    fprintf(stderr, "do you want to evaluate signal set...?\n");
    do
	{
	    fprintf(stderr, "please answer yes or no...\n");
	    scanf("%s", key_in);
	}
    while((strcmp(key_in, "yes") != 0) && (strcmp(key_in, "no") != 0));


    if(strcmp(key_in, "yes") == 0)
	{
	    /* $@%l%s%8$r>:=g$K%=!<%H(J */
	    fprintf(stderr, "--- sorting range...\n");
	    xindex(length, range, range_index);

	    /* $@%l%s%8$N>.$5$$$b$N$+$i=gHV$K3:Ev$9$k%+%F%4%j!<?tNL$r=|$$$F(J
	       $@H=JL$r9T$J$&(J */
	    fprintf(stderr, "--- selecting important signal...\n");
	    dum_ii = num_item;
	    vector_times(vec_x, 1.0, num, dum_x);
	    for(i = 1; i < length; i++)
		{
		    float c1, c2, c3;

		    /* $@%+%F%4%j!<?tNL$N?t$O(J,,, */
		    dum_ii = dum_ii - item[range_index[i]];

		    /* $@H=JL4X?t$+$i=|$/%+%F%4%j!<?tNL$r(J0.0$@$K$9$k(J */
		    dum_x[range_index[i]*FOUR - 3] = 0.0;
		    dum_x[range_index[i]*FOUR - 2] = 0.0;
		    dum_x[range_index[i]*FOUR - 1] = 0.0;
		    dum_x[range_index[i]*FOUR] = 0.0;

		    /* dum_ii$@8D$N%+%F%4%j!<?tNL$G%9%3%"$r;;=P(J
		       DATA_SET *data_set$@$N(Jscore$@$,>e=q$-$5$l$k(J */
		    evaluate_sequences(&data_set1, data1, dum_x, num, n1);
		    evaluate_sequences(&data_set2, data2, dum_x, num, n2);


		    /* $@%^%O%i%N%S%95wN%$r;;=P(J */
		    lda(data_set1, data_set2, NULL, &fdum2, n1, n2, n, 
			(long) 1);
		    dist[length - i] = fdum2;

		    /* $@8!Dj(J */
		    c1 = ((float) (n1 + n2 - num_item - 1)) / 
			((float) (num_item - dum_ii));
		    c2 = (float) (n1 * n2);
		    c3 = ((float) (n1 + n2)) * ((float) (n1 + n2 - 2));
		    f_value[length-i] = c1 * c2 * (dist[length] - fdum2)
			/ (c3 + c2 * fdum2);
				     
		    printf("# put away categorical weight of position...\n# ");
		    for(j = 1; j <= i; j++)
			printf("%d ", range_index[j]);
		    printf("\n# so number of categorical weight = %d\n", dum_ii);
		    printf("# mahalanobis distance (Dp^2) = %13.7f\n", 
			   dist[length - i]);
		    printf("# F( %d , %d ) = %13.7f\n#\n", num_item - dum_ii,
			   n1 + n2 - num_item - 1, f_value[length - i]);
	    
		}
	}


    /* $@%F%9%HG[Ns$NI>2A(J */
    if(argc == 4)
	{
	    /* $@A4$F$N%+%F%4%j!<?tNL$K$h$k%9%3%"$r;;=P$9$k(J */
	    evaluate_sequences(&data_set1, data1, vec_x, num, n1);
	    evaluate_sequences(&data_set2, data2, vec_x, num, n2);

	    /* $@%F%9%HG[Ns$N%U%)!<%^%C%H%A%'%C%/(J */
	    if(check_samples(argv[3], &dum_kk, &n) != 0)
		{
		    fprintf(stderr, "invalid sequences in %s.\n", argv[3]);
		    exit(-1);
		}
	    if(length != dum_kk)
		{
		    fprintf(stderr, "invalid sequences.\n");
		    exit(-1);
		}
	    fprintf(stderr, 
		    "--- number of sequences in data file3 = %d...\n", n);

	    /* $@%F%9%HG[Ns$NFI$_9~$_(J */
	    fprintf(stderr, "--- read data file3...\n");
	    if(struct_allocation(&data_set, argv[3], n) != 0)
		{
		    fprintf(stderr, 
			    "error occureds in function struct_allocation.\n");
		    exit(-1);
		}

	    /* $@G[Ns%G!<%?$r(J0,1$@$KJQ49(J */
	    data = (float**) matrix((long) 1, (long) n, (long) 1, (long) num);
	    fprintf(stderr, "--- converting sequences in data file3...\n");
	    if(convert_data(&data, data_set, length, n) != 0)
		{
		    fprintf(stderr, 
			    "error occureds in function convert_data.\n");
		    exit(-1);
		}

	    /* $@%F%9%HG[Ns$N?tNL2=K!$K$h$k%9%3%"$r;;=P(J */
	    evaluate_sequences(&data_set, data, vec_x, num, n);

	    /* $@%F%9%HG[Ns$N?tNL2=K!$K$h$k%9%3%"$rH=JL4X?t$GI>2A(J */
	    fprintf(stderr, "--- estimating test sequences...\n");
	    lda(data_set1, data_set2, data_set, &fdum2, n1, n2, n, 
		(long) 1);

	}


    /* $@%a%b%j$r2rJ|(J */
    free_matrix(F, (long) 1, (long) num_item, (long) 1, (long) num_item);
    free_matrix(dum_F, (long) 1, (long) num_item, (long) 1, (long) num_item);
    free_vector(H, (long) 1, (long) num_item);
    free_vector(dum_H, (long) 1, (long) num_item);
    free_vector(X, (long) 1, (long) num_item);

    free_matrix(mat_f, (long) 1, (long) num, (long) 1, (long) num);
    free_matrix(data1, (long) 1, (long) n1, (long) 1, (long) num);
    free_matrix(data2, (long) 1, (long) n2, (long) 1, (long) num);
    free_vector(vec_h, (long) 1, (long) num);
    free_vector(vec_g1, (long) 1, (long) num);
    free_vector(vec_g2, (long) 1, (long) num);
    free_vector(vec_x, (long) 1, (long) num);
    free_vector(dum_x, (long) 1, (long) num);
    free_vector(range, (long) 1, (long) length);

    free_ivector(item, (long) 1, (long) length);
    free_vector(dist, (long) 1, (long) length);
    free_vector(f_value, (long) 1, (long) length);
    free_lvector(range_index, (long) 1, (long) length);


    /* float *, float **$@7?0J30$O<+J,$G2rJ|(J */
    free(index);

    /* $@9=B$BN$N%a%b%j2rJ|(J */
    struct_free(data_set1, n1);
    struct_free(data_set2, n2);


    exit(0);
}


/* $@%U%!%$%kL>(J*file_name$@$G<($5$l$k%G!<%?%U%!%$%kCf$N(J
   $@%G!<%?G[NsD9$H%G!<%??t$rD4$Y$k!#(J
   $@$9$Y$F$NG[Ns$OF1$8D9$5$G$J$1$l$P$J$i$J$$!#(J
   #$@$G;O$^$k9T$O!"%3%a%s%HJ8$H$_$J$9!#(J */
int check_samples(char *file_name, unsigned long *length, unsigned long *num)
{
    FILE *file_ptr;
    char buf[255];
    int idum;
    char *cdum;

    if((file_ptr = fopen(file_name, "r")) == NULL)
	{
	    fprintf(stderr, "can not read file %s.\n", file_name);
	    return(-1);
	}
    for(; fgets(buf, 256, file_ptr) != NULL; )
	{
	    /* $@%3%a%s%HJ80J30$rFI$_9~$`(J */
	    if(buf[0] != '#')
		{
		    (*num)++;

		    /* $@%G!<%?%U%!%$%k$NBh(J1,2$@%U%#!<%k%I$O(J
		       $@%5%s%W%k$N(JID$@$,5-=R$5$l$k$N$GL5;k$9$k(J */
		    if(strtok(buf, ":") == NULL)
			{
			    fprintf(stderr, 
				    "invalid format in %s.\n", file_name);
			    return(-1);
			}
		    if(strtok(NULL, ":") == NULL)
			{
			    fprintf(stderr, 
				    "invalid format in %s.\n", file_name);
			    return(-1);
			}
		    if((cdum = (char *) strtok(NULL, "\n")) == NULL)
			{
			    fprintf(stderr, 
				    "invalid format in %s.\n", file_name);
			    return(-1);
			}

		    if(*num == 1)
			{
			    *length = strlen(cdum);
			}
		    else
			{
			    idum = strlen(cdum);
			    if(*length != idum)
				{
				    fprintf(stderr, 
					    "invalid format in %s.\n",
					    file_name);
				    return(-1);
				}
			}
		}
	}
    fclose(file_ptr);

    return(0);
}

/* i$@!_(Jj$@$N(J2$@<!85G[Ns(J**matrix$@$N=PNO$9$k!#(J */
int print_matrix(float **matrix, int i, int j)
{
    int dum_i, dum_j;

    for(dum_i = 1; dum_i <= i; dum_i++)
	{
	    for(dum_j = 1; dum_j <= j; dum_j++)
		{
		    /*
		      fprintf(stderr, "%13.7f\t", matrix[dum_i][dum_j]);
		      */
		    fprintf(stderr, "%1.0f ", matrix[dum_i][dum_j]);
		}
	    fprintf(stderr, "\n");
	}

    return(0);
}

/* $@D9$5(Ji$@$N%Y%/%H%k(J*vector$@$N=PNO$9$k!#(J */
int print_vector(float *vector, int i)
{
    int dum_i;

    for(dum_i = 1; dum_i <= i; dum_i++)
	{
	    /*
	    fprintf(stderr, "%13.7f\t", vector[dum_i]);
	    */
	    printf("%13.7f\t", vector[dum_i]);
	}
    /*
    fprintf(stderr, "\n");
    */
    printf("\n");

    return(0);
}

/* n$@<!85$N%Y%/%H%k(J*a$@$H(J*b$@$NOB$r$H$j!"%Y%/%H%k(J*ans$@$KBeF~$9$k!#(J */
int vector_plus(float *a, float *b, int n, float* ans)
{
    int i;

    for(i = 1; i <= n; i++)
	{
	    ans[i] = a[i] + b[i];
	}

    return(0);
}

/* n$@<!85$N%Y%/%H%k(J*a$@$KDj?t(Jc$@$r$+$1!"%Y%/%H%k(J*ans$@$KBeF~$9$k!#(J */
int vector_times(float *a, float c, int n, float* ans)
{
    int i;

    for(i = 1; i <= n; i++)
	{
	    ans[i] = a[i] * c;
	}

    return(0);
}

/* i$@!_(Jj$@$N9TNs(J**a$@$H9TNs(J**b$@$NOB$r$H$j!"9TNs(J**ans$@$KBeF~$9$k!#(J */
int matrix_plus(float **a, float **b, int i, int j, float **ans)
{
    int dum_i, dum_j;

    for(dum_i = 1; dum_i <= i; dum_i++)
	{
	    for(dum_j = 1; dum_j <= j; dum_j++)
		{
		    ans[dum_i][dum_j] = a[dum_i][dum_j] + b[dum_i][dum_j];
		}
	}

    return(0);
}

/* i$@!_(Jj$@$N9TNs(J**a$@$KDj?t(Jc$@$r$+$1!"9TNs(J**ans$@$KBeF~$9$k!#(J */
int matrix_times(float **a, float c, int i, int j, float **ans)
{
    int dum_i, dum_j;

    for(dum_i = 1; dum_i <= i; dum_i++)
	{
	    for(dum_j = 1; dum_j <= j; dum_j++)
		{
		    ans[dum_i][dum_j] = a[dum_i][dum_j] * c;
		}
	}

    return(0);
}

/* $@9=B$BN$N%a%b%j$r3NJ]$7%G!<%?$r3JG<$9$k(J */
int struct_allocation(DATA_SET **set, char *file_name, int n)
{
    FILE *file_ptr;
    char buf[255];
    int i;
    char *cdum;

    /* $@9=B$BN$NG[Ns$O(J1$@HV$+$i(Jn$@HV$^$G$H$k!#(J($@B>$NG[Ns$HF1$8$K$9$k(J) */
    if((*set = (DATA_SET *) calloc (n + 1, sizeof(DATA_SET) )) == NULL ){
	fprintf(stderr, "not enought memory...\n");
	return(-1);
    }

    if((file_ptr = fopen(file_name, "r")) == NULL)
	{
	    fprintf(stderr, "can not read file %s.\n", file_name);
	    return(-1);
	}
    for(i = 0; fgets(buf, 256, file_ptr) != NULL; )
	{
	    /* $@%3%a%s%HJ80J30$rFI$_9~$`(J */
	    if(buf[0] != '#')
		{
		    i++;
		    if((cdum = (char *) strtok(buf, ":")) == NULL)
			{
			    fprintf(stderr, 
				    "invalid format in %s.\n", file_name);
			    return(-1);
			}
		    if((((*set + i) -> source) = 
			(char *) calloc (strlen(cdum), sizeof(char)))
		       == NULL )
			{
			    fprintf(stderr, "not enought memory...\n");
			    return(-1);
			}
		    strcpy((*set + i) -> source, cdum);

		    if((cdum = (char *) strtok(NULL, ":")) == NULL)
			{
			    fprintf(stderr, 
				    "invalid format in %s.\n", file_name);
			    return(-1);
			}
		    if((((*set + i) -> position) = 
			(char *) calloc (strlen(cdum), sizeof(char)))
		       == NULL )
			{
			    fprintf(stderr, "not enought memory...\n");
			    return(-1);
			}
		    strcpy((*set + i) -> position, cdum);

		    if((cdum = (char *) strtok(NULL, "\n")) == NULL)
			{
			    fprintf(stderr, 
				    "invalid format in %s.\n", file_name);
			    return(-1);
			}
		    if((((*set + i) -> sequence) = 
			(char *) calloc (strlen(cdum), sizeof(char)))
		       == NULL )
			{
			    fprintf(stderr, "not enought memory...\n");
			    return(-1);
			}
		    strcpy((*set + i) -> sequence, cdum);

		}
	}
    fclose(file_ptr);

    return(0);
}

int struct_free(DATA_SET *set, int n)
{
    int i;

    for(i = 1; i <= n; i++)
	{
	    free((set + i) -> source);
	    free((set + i) -> position);
	    free((set + i) -> sequence);
	}
    free(set);

    return(0);
}

int convert_data(float ***data, DATA_SET *data_set, unsigned long l,
		 unsigned long n)
{
    int i, j;
    char buf[256];
    int invalid_character;

    for(i = 1; i <= n; i++)
	{
	    strcpy(buf, (data_set + i) -> sequence);
	    for(j = 1; j <= l; j++)
		{
		    invalid_character = 0;

		    if(buf[j-1] == 'A')
			{
			    (*data)[i][j*4-3] = 1.0;
			    invalid_character = 1;
			}
		    else
			{
			    (*data)[i][j*4-3] = 0.0;
			}

		    if(buf[j-1] == 'T')
			{
			    (*data)[i][j*4-2] = 1.0;
			    invalid_character = 1;
			}
		    else
			{
			    (*data)[i][j*4-2] = 0.0;
			}

		    if(buf[j-1] == 'C')
			{
			    (*data)[i][j*4-1] = 1.0;
			    invalid_character = 1;
			}
		    else
			{
			    (*data)[i][j*4-1] = 0.0;
			}

		    if(buf[j-1] == 'G')
			{
			    (*data)[i][j*4] = 1.0;
			    invalid_character = 1;
			}
		    else
			{
			    (*data)[i][j*4] = 0.0;
			}

		    if(invalid_character == 0)
			{
			    fprintf(stderr, "invalid character...%c\n", 
				    buf[j-1]);
			    return(-1);
			}

		}
	}


    return(0);
}

int calc_g(float **vec, float **data, unsigned long l, 
	   unsigned long n)
{
    int count;
    int i, j;

    for(i = 1; i <= l; i++)
	{
	    count = 0;
	    for(j = 1; j <= n; j++)
		{
		    if(data[j][i] > EPS)
			{
			    count++;
			}
		}
	    (*vec)[i] = count;
	}

    return(0);
}

int calc_h(float **vec, float *vec1, float *vec2, unsigned long n, 
	       unsigned long n1, unsigned long n2)
{
    int i;

    for(i = 1; i <= n; i++)
	{
	    (*vec)[i] = vec1[i] / ((float) n1) - vec2[i] / ((float) n2);
	}

    return(0);
}

int calc_f(float ***mat, float **mat1, float **mat2, unsigned long size, 
	       unsigned long n1, unsigned long n2)
{
    int i, j, k, l;
    int count;
    float *n_jk, *n_jk1, *n_jk2;

    float *vector(long, long);
    void free_vector(float *, long, long);

    int vector_plus(float *, float *, int, float *);

    /* $@%a%b%j$N3NJ](J */
    n_jk = (float*) vector((long) 1, (long) size);
    n_jk1 = (float*) vector((long) 1, (long) size);
    n_jk2 = (float*) vector((long) 1, (long) size);


    /* n_jk$@$r;;=P(J("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    if(calc_g(&n_jk1, mat1, size, n1) != 0)
	{
	    fprintf(stderr, "error occureds in function calc_g.\n");
	    free_vector(n_jk, (long) 1, (long) size);
	    free_vector(n_jk1, (long) 1, (long) size);
	    free_vector(n_jk2, (long) 1, (long) size);
	    return(-1);
	}
    if(calc_g(&n_jk2, mat2, size, n2) != 0)
	{
	    fprintf(stderr, "error occureds in function calc_g.\n");
	    free_vector(n_jk, (long) 1, (long) size);
	    free_vector(n_jk1, (long) 1, (long) size);
	    free_vector(n_jk2, (long) 1, (long) size);
	    return(-1);
	}
    for(i = 1; i <= size; i++)
	{
	    n_jk[i] = n_jk1[i] + n_jk2[i];
	}

    /* f(uv,lm)$@$r;;=P(J("$@?tNL2=(J" $@D+AR=qE9$r;2>H(J) */
    for(i = 1; i <= size; i++)
	{
	    for(j = 1; j <= size; j++)
		{
		    count = 0;
		    for(k = 1; k <= n1; k++)
			{
			    if((mat1[k][i] > EPS) && 
			       (mat1[k][j] > EPS))
				{
				    count++;
				}
			}
		    for(k = 1; k <= n2; k++)
			{
			    if((mat2[k][i] > EPS) && 
			       (mat2[k][j] > EPS))
				{
				    count++;
				}
			}
		    /*
		    (*mat)[i][j] = (float) count;
		    */
		    (*mat)[i][j] = (float) count - 
			n_jk[i] * n_jk[j] / ((float) n1 + n2);
		}
	}


    /* $@%a%b%j$N2rJ|(J */
    free_vector(n_jk, (long) 1, (long) size);
    free_vector(n_jk1, (long) 1, (long) size);
    free_vector(n_jk2, (long) 1, (long) size);

    return(0);
}


int evaluate_sequences(DATA_SET **data_set, float **data, 
		       float *x, unsigned long size, unsigned long n)
{
    int i, j;
    float score;

    for(i = 1; i <= n; i++)
	{
	    score = 0.0;
	    for(j = 1; j <= size; j++)
		{
		    score += data[i][j] * x[j];
		}
	    ((*data_set) + i) -> score = score;
	}

    return(0);
}


int calc_freq(float ***mat, DATA_SET *data_set, unsigned long l, 
	      unsigned long n)
{
    int i, j;
    char buf[256];

    for(i = 1; i <= n; i++)
	{
	    strcpy(buf, (data_set + i) -> sequence);
	    for(j = 1; j <= l; j++)
		{
		    if(buf[j-1] == 'A')
			{
			    (*mat)[j][1]++;
			}
		    else if(buf[j-1] == 'T')
			{
			    (*mat)[j][2]++;
			}
		    else if(buf[j-1] == 'C')
			{
			    (*mat)[j][3]++;
			}
		    else if(buf[j-1] == 'G')
			{
			    (*mat)[j][4]++;
			}
		}
	}

    return(0);
}
