/*------------------------------------------------------------- Copyright (C) 2000 Peter Clote. All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for NON-COMMERCIAL purposes and without fee is hereby granted provided that this copyright notice appears in all copies. THE AUTHOR AND PUBLISHER MAKE NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHORS AND PUBLISHER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. -------------------------------------------------------------*/ /*************************************************** Program: multipleGotoh.c Given input file of strings, all with same first and last letter, produces distance matrix. This incorporates the correction to Nei, Waterman, but only computes the distances, rather than the alignment. The distances are INTEGER rather than float. In the program sequence phy0.c phy1.c phy2.c it is assumed that the distances are floats. ***************************************************/ #include #include /* prototype for calloc, malloc */ #include /* strcmp() prototype */ #define N 10 /* upper bound on number strings */ #define MAXLEN 80 /* max length of string (name) */ #define INF 30000 #define SPACE ' ' #define MIN2(x,y) ( (x) <= (y) ? (x) : (y) ) #define MAX2(x,y) ( (x) <= (y) ? (y) : (x) ) #define MIN(x,y,z) MIN2(MIN2(x,y),z) #define MISMATCH 3 /* cost of mismatched letters */ #define D(i,j) d[(i)*(m+1) + (j)] #define P(i,j) p[(i)*(m+1) + (j)] #define Q(i,j) q[(i)*(m+1) + (j)] #define U 2 #define V 2 #define W(i) U*(i)+V /* cost of gap of length i, i>0 */ #define S(i) s[(i)-1] #define T(i) t[(i)-1] /* D is integer matrix with n+1 rows and m+1 cols, where n = length of s, m = length of t */ #define GAMMA(i,j) ( S(i) == T(j) ? 0 : MISMATCH ) typedef char String[MAXLEN]; void error ( char *s ); /* prototype of error function */ void error ( char *s ) { fprintf(stderr,"%s\n",s); exit(1); } /**************** MAIN BODY ********************/ main(int argc, char *argv[]) { String s,t; int distances[N][N]; /* holds distances between strings */ int *d; int i,j,n,m; int *dist ( char *s, char *t ); void display(int *, char *, char *); FILE *in; int num_strings; int num_lines(FILE*); String* strings, *initialize_array(FILE*, int); void display_array(String*, int); void display_distances( int d[N][N], int); void fancy_display_distances(int d[][], int ); /********* ERROR CHECKING in command line arguments *********/ if (argc != 2) { fprintf(stderr,"Usage: %s filename.\n",argv[0]); exit(1); } if ((in = fopen(argv[1],"r")) == NULL) { fprintf(stderr,"%s is not a readable file.\n", argv[1]); exit(1); } /********* Compute size of array and initialize ******/ num_strings = num_lines(in); strings = initialize_array(in,num_strings); if ( N < num_strings ) { fprintf(stderr,"Set constant N to %d\n",num_strings); exit(1); } for (i=0;i m ) continue; /* index out of bounds */ P(i,j) = MIN2( D(i-1,j) + W(1), P(i-1,j) + U ); Q(i,j) = MIN2( D(i,j-1) + W(1), Q(i,j-1) + U ); D(i,j) = MIN( D(i-1,j-1) + GAMMA(i,j), P(i,j), Q(i,j) ); } return(d); } void display ( int *d, char *s, char *t ) { /* display the distance matrix d(i,j) between strings s,t where 0<=i<=n = strlen(s) and 0<=j<=m = strlen(t) */ int i,j, n = strlen(s), m = strlen(t); printf("Distance Matrix for alignment of %s with %s.\n\n",s,t); /****************** print letters of string t ***********/ printf("%5c",SPACE); printf("%5c",SPACE); for (j=1;j<=m;j++) printf("%5c",T(j)); printf("\n"); /********************* print D(0,0),...,D(0,m) ************/ printf("%5c",SPACE); for (j=0;j<=m;j++) printf("%5d",D(0,j)); printf("\n"); for (i=1;i<=n;i++) { printf("%5c",S(i)); for (j=0;j<=m;j++) printf("%5d",D(i,j)); printf("\n"); } } void convert(char *p) { /* fgets() keeps the \n marker, but when we compare strings using strcmp(), the presence of the marker may keep a match from being noticed. Hence we strip the \n marker */ while (*p != '\n') ++p; *p = '\0'; } String *initialize_array(FILE *in, int n) { /* Assume that file contains strings, one per line */ int i; String *strings; void convert(char *); strings = (String*) calloc(n,sizeof(String)); for (i=0;i