/*------------------------------------------------------------- Copyright (C) 2000 Peter Clote. All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for NON-COMMERCIAL purposes and without fee is hereby granted provided that this copyright notice appears in all copies. THE AUTHOR AND PUBLISHER MAKE NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHORS AND PUBLISHER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. -------------------------------------------------------------*/ /****************************************** freqDNA.c P. Clote, 15 Dec 1996 Program computes frequency of single and dinucleotides ******************************************/ #include main(int argc, char *argv[]) { FILE * in; int n=0,i,j,a=0,c=0,g=0,t=0; /** a,c,g,t nucleotides */ int num = 0,s=0,k=0,w=0,r=0,y=0; /* num nucleotides in file **/ char ch; int first,second,index1,index2; float freqA,freqC,freqG,freqT; /*** for nucleotide frequencies */ int pair[4][4]; /*** for dinucleotide counts */ float freq[4][4]; /*** for dinucleotide frequencies */ void error(char *); /** error handling */ /**** Error handling for input file ****/ if (argc != 2) { fprintf(stderr,"%s filename\n",argv[0]); exit(1); } /***** Initialization of input file and pair array **/ in = fopen(argv[1],"r"); for (i=0;i<4;i++) for (j=0;j<4;j++) pair[i][j] = 0; /** remove first line header from M. jannaschii genome file **/ while ( ( ch = fgetc(in)) != '\n' ); /** remove first line **/ /** process first 2 char in file **/ first = fgetc(in); num++; switch ( first ) { case 'A' : a++; index1 = 0; break; case 'C' : c++; index1 = 1; break; case 'G' : g++; index1 = 2; break; case 'T' : t++; index1 = 3; break; default : error("Bad Data"); break; } second = fgetc(in); num++; switch ( second ) { case 'A' : a++; index2 = 0; break; case 'C' : c++; index2 = 1; break; case 'G' : g++; index2 = 2; break; case 'T' : t++; index2 = 3; break; default : error("Bad Data"); break; } pair[index1][index2]++; /** continue processing **/ do { first = second; index1 = index2; second = fgetc(in); if ( second == EOF ) break; if ( second == '\n' ) { second = fgetc(in); if ( second == EOF ) break; } /**** assumes not 2 '\n' in a row **/ switch ( second ) { case 'A' : a++; index2 = 0; break; case 'C' : c++; index2 = 1; break; case 'G' : g++; index2 = 2; break; case 'T' : t++; index2 = 3; break; case 'N' : n++; index2 = 4; break; case 'S' : s++; index2 = 5; break; case 'K' : k++; index2 = 6; break; case 'W' : w++; index2 = 7; break; case 'R' : r++; index2 = 8; break; case 'Y' : y++; index2 = 9; break; default : printf("%c\n",second); error("Bad Data"); break; } num++; if ( (index1 < 4) && (index2 < 4) ) pair[index1][index2]++; } while ( 1 ); /** infinite while loop */ /*** compute dinucleotide frequencies **/ for (i=0;i<4;i++) for(j=0;j<4;j++) freq[i][j] = (float) pair[i][j] / (float) (num-1); /*** process stats **/ printf("Nucleotides: %d\n\n",num); printf("A:%d\t",a); printf("C:%d\t",c); printf("G:%d\t",g); printf("T:%d\t",t); printf("N:%d\t\n",n); printf("S:%d\t\t",s); printf("K:%d\t\t",k); printf("W:%d\t\t",w); printf("R:%d\t\t",r); printf("Y:%d\t",y); printf("\n\n"); freqA = (float) a / (float) num; freqC = (float) c / (float) num; freqG = (float) g / (float) num; freqT = (float) t / (float) num; printf("Nucleotide frequencies\n"); printf("%c: %5.4f\t%c: %5.4f\t%c: %5.4f\t%c: %5.4f\n\n", 'A',freqA,'C',freqC,'G',freqG,'T',freqT); printf("Dinucleotides: %d\n\n",num-1); printf("\t\tA \t\tC \t\tG \t\tT\n"); for (i=0;i<80;i++) printf("-"); printf("\n"); printf("A\t\t"); for (j=0;j<4;j++) printf("%6.5f\t\t",freq[0][j]); printf("\n"); printf("C\t\t"); for (j=0;j<4;j++) printf("%6.5f\t\t",freq[1][j]); printf("\n"); printf("G\t\t"); for (j=0;j<4;j++) printf("%6.5f\t\t",freq[2][j]); printf("\n"); printf("T\t\t"); for (j=0;j<4;j++) printf("%6.5f\t\t",freq[3][j]); printf("\n"); } void error(char * s) { fprintf(stderr,"%s\n",s); exit(1); }