#include <stdio.h> 
#include <string.h> 
#include <stdlib.h> 
#include <time.h> 

#define gapInitiation 5 
#define gapExtension 1 

int alignment(char* s1, char* s2);
int max(int a, int b, int c);

int blosum62[24][24] =
 {4, -1, -2, -2,  0, -1, -1,  0, -2, -1, -1, -1, -1, -2, -1,  1,  0, -3, -2,  0, -2, -1,  0, -4,
 -1,  5,  0, -2, -3,  1,  0, -2,  0, -3, -2,  2, -1, -3, -2, -1, -1, -3, -2, -3, -1,  0, -1, -4,
 -2,  0,  6,  1, -3,  0,  0,  0,  1, -3, -3,  0, -2, -3, -2,  1,  0, -4, -2, -3,  3,  0, -1, -4,
 -2, -2,  1,  6, -3,  0,  2, -1, -1, -3, -4, -1, -3, -3, -1,  0, -1, -4, -3, -3,  4,  1, -1, -4,
  0, -3, -3, -3,  9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4,
 -1,  1,  0,  0, -3,  5,  2, -2,  0, -3, -2,  1,  0, -3, -1,  0, -1, -2, -1, -2,  0,  3, -1, -4,
 -1,  0,  0,  2, -4,  2,  5, -2,  0, -3, -3,  1, -2, -3, -1,  0, -1, -3, -2, -2,  1,  4, -1, -4,
  0, -2,  0, -1, -3, -2, -2,  6, -2, -4, -4, -2, -3, -3, -2,  0, -2, -2, -3, -3, -1, -2, -1, -4,
 -2,  0,  1, -1, -3,  0,  0, -2,  8, -3, -3, -1, -2, -1, -2, -1, -2, -2,  2, -3,  0,  0, -1, -4,
 -1, -3, -3, -3, -1, -3, -3, -4, -3,  4,  2, -3,  1,  0, -3, -2, -1, -3, -1,  3, -3, -3, -1, -4,
 -1, -2, -3, -4, -1, -2, -3, -4, -3,  2,  4, -2,  2,  0, -3, -2, -1, -2, -1,  1, -4, -3, -1, -4,
 -1,  2,  0, -1, -3,  1,  1, -2, -1, -3, -2,  5, -1, -3, -1,  0, -1, -3, -2, -2,  0,  1, -1, -4,
 -1, -1, -2, -3, -1,  0, -2, -3, -2,  1,  2, -1,  5,  0, -2, -1, -1, -1, -1,  1, -3, -1, -1, -4,
 -2, -3, -3, -3, -2, -3, -3, -3, -1,  0,  0, -3,  0,  6, -4, -2, -2,  1,  3, -1, -3, -3, -1, -4,
 -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4,  7, -1, -1, -4, -3, -2, -2, -1, -2, -4,
  1, -1,  1,  0, -1,  0,  0,  0, -1, -2, -2,  0, -1, -2, -1,  4,  1, -3, -2, -2,  0,  0,  0, -4,
  0, -1,  0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1,  1,  5, -2, -2,  0, -1, -1,  0, -4,
 -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1,  1, -4, -3, -2, 11,  2, -3, -4, -3, -2, -4,
 -2, -2, -2, -3, -2, -1, -2, -3,  2, -1, -1, -2, -1,  3, -3, -2, -2,  2,  7, -1, -3, -2, -1, -4,
  0, -3, -3, -3, -1, -2, -2, -3, -3,  3,  1, -2,  1, -1, -2, -2,  0, -3, -1,  4, -3, -2, -1, -4,
 -2, -1,  3,  4, -3,  0,  1, -1,  0, -3, -4,  0, -3, -3, -2,  0, -1, -4, -3, -3,  4,  1, -1, -4,
 -1,  0,  0,  1, -3,  3,  4, -2,  0, -3, -3,  1, -1, -3, -1,  0, -1, -3, -2, -2,  1,  4, -1, -4,
  0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2,  0,  0, -2, -1, -1, -1, -1, -1, -4,
 -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,  1};

main(int argc, char *argv[]) {
	int start,ende,n1,n2,score,tStart,tUsed,tRem,nRem;
	int rDays,rHours,rMins,rSecs;
	int n=0,numseq=0,nCount=0,nTot=0;
	float tps;
	long lFileLen;
	FILE  *ifp, *ofp;
	char *buf;
	char **seqs;
	printf ("Dynamic Needleman-Wunsch alignment matrix, 31.12.2007 by Nick Fankhauser\n");
	if (argc<5) {
		printf("Syntax: dnwa <sequences.txt> <matrix.txt> <start_line> <end_line>\n");
		exit(1);
	}
	start=atoi(argv[3]);
	ende=atoi(argv[4]);
	printf("Sequence file: %s\nMatrix file: %s\n",argv[1],argv[2]);
	if ((ifp=fopen(argv[1],"r"))==NULL) {
		printf("%s not found.\n",argv[1]);
		exit(1);
	}
	fseek(ifp, 0L, SEEK_END);     /* Position to end of file */
	lFileLen = ftell(ifp);        /* Get file length */
	rewind(ifp);                  /* Back to start of file */
	buf = malloc(sizeof(char) * lFileLen);
	while (!feof(ifp)) {
		fgets (buf,lFileLen,ifp);
		numseq++;
	}
	numseq--;
	printf("Sequence file size: %d, sequences: %d\n",lFileLen,numseq);
	if (ende>numseq) {ende=numseq;}
	if (start>numseq) {start=numseq;}
	rewind(ifp);                  /* Back to start of file */
	seqs = malloc(sizeof(char *) * (numseq));
	while (!feof(ifp)) {
		fgets (buf,lFileLen,ifp);
		buf[strlen(buf)-1]='\0';          /* gets rid of the new line */
		seqs[n] = malloc(sizeof(char) * (strlen(buf)+1));
		strcpy(seqs[n],buf);
		n++;
	}
	fclose(ifp);
	free(buf);
	if ((ofp=fopen(argv[2],"w"))==NULL) {
		printf("%s: not writable!\n",argv[2]);
		exit(1);
	}
	fclose(ofp);
	for (n1=start;n1<ende;n1++) {
		for (n2 = 0; n2 <= n1; n2++) {
			nTot++;
		}
	}
	printf("Computing from sequence %d to %d, %d alignments...\n",start,ende,nTot);
	tStart=time(NULL);
	for (n1=start;n1<ende;n1++) {
		for (n2 = 0; n2 <= n1; n2++) {
			ofp=fopen(argv[2],"a");
			score=alignment(seqs[n1], seqs[n2]);
     			fprintf(ofp, "%d ",score);
			nCount++;
			fclose(ofp);
		}
		tUsed=time(NULL)-tStart+1;
		tps=(float)tUsed/(float)nCount;
		nRem=nTot-nCount;
		tRem=tps*nRem;
		rDays=tRem/(60*60*24);
		rSecs=tRem-60*60*24*rDays;
		rHours=rSecs/(60*60);
		rSecs-=60*60*rHours;
		rMins=rSecs/60;
		rSecs-=60*rMins;
		printf("%d/%d alignments; ",nCount,nTot);
		if (rDays>0) {printf("%d days, ",rDays);}
		if (rHours>0) {printf("%d hours, ",rHours);}
		if (rMins>0) {printf("%d minutes, ",rMins);}
		printf("%d seconds remaining.\n",rSecs);
		ofp=fopen(argv[2],"a");
		fprintf(ofp,"\n");
		fclose(ofp);
	}
}

int max(int a, int b, int c) {
        if ((b>=a) && (b>=c)) {return b;}
        if ((c>=a) && (c>=b)) {return c;}
        return a;
}

int alignment(char* s1, char* s2) {
        int d1, d2, d3, i, j, l1, l2, x, mx, retScore, gap, **m;
	int ci ['Z'];
	ci['A']=0;ci['R']=1;ci['N']=2;ci['D']=3;ci['C']=4;ci['Q']=5;ci['E']=6;ci['G']=7;ci['H']=8;
	ci['I']=9;ci['L']=10;ci['K']=11;ci['M']=12;ci['F']=13;ci['P']=14;ci['S']=15;ci['T']=16;
	ci['W']=17;ci['Y']=18;ci['V']=19;ci['B']=20;ci['Z']=21;ci['X']=22;ci['*']=23;
        l1=strlen(s1);
	l2=strlen(s2);
	gap=gapExtension;
	m = malloc(sizeof(int *) * (l1+1));
	for(x = 0; x < l1+1; x ++) {
		m[x] = malloc(sizeof(int) * (l2+1));
	}
        m[0][0]=0;
        for (i=0;i<l1;i++) {m[i+1][0]=m[i][0]-gap;}
        for (j=0;j<l2;j++) {m[0][j+1]=m[0][j]-gap;} /* same at the first column. see paper why... */
        for (j=0;j<l2;j++) {                    /* outer line loop with j */
                for (i=0;i<l1;i++) {            /* inner line position loop with i */
                        d1=m[i][j]+blosum62[ci[s1[i]]][ci[s2[j]]];
                        d2=m[i][j+1]-gap;            /* score of left minus gap penalty */
                        d3=m[i+1][j]-gap;            /* score of up minus gap penalty */
			mx=max(d1,d2,d3);
			m[i+1][j+1]=mx;
			if ((mx==d2) || (mx==d3)) {gap=gapInitiation;}
			else {gap=gapExtension;}

                }
        }
	retScore=m[l1][l2];
	for(x = 0; x < l1+1; x ++) {free(m[x]);}
	free(m);
        return retScore;
}