/*------------------------------------------------------------ Copyright (C) 1999 by Rolf Backofen, Sebastian Will. All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for NON-COMMERCIAL purposes and without fee is hereby granted provided that this copyright notice appears in all copies. THE AUTHORS AND PUBLISHER MAKE NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHORS AND PUBLISHER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. ------------------------------------------------------------*/ // ============================================================ // // Implementation of Smith-Waterman Similarity Sequence Alignment // // AUTHORS: Rolf Backofen, Sebastian Will // // DESCIRPTION: Smith-Waterman Alignment Algorithm with // either AFFINE or LOGARITHMIC gap costs // and either LOKAL or GLOBAL alignment // #include #include #include #include #include #include "matrix.hh" #include "matrices.hh" #include "trace.hh" #include "tracearrow.hh" #include "printarray.hh" #include "basicdef.hh" #include "options.h" int main (int argc, char *argv[]) { // --------------------------------------------------------- // options // char *a_str=0; char *b_str=0; char *sm_filename=0; cost_t match; cost_t mismatch; cost_t indel_var; cost_t indel_const; bool logarithmic; bool local; bool quiet; bool sm_opt; option_def options[] = { {0,0,0,O_ARG_STRING,&a_str,0,"sequence a","The first Sequence"}, {0,0,0,O_ARG_STRING,&b_str,0,"sequence b","The second Sequence"}, {"match",'m',0,O_ARG_DOUBLE,&match,"4.0","num","Cost of match"}, {"mismatch",'i',0,O_ARG_DOUBLE,&mismatch,"-4.0","num", "Cost of mismatch"}, {"scorematrix",'s',&sm_opt,O_ARG_STRING,&sm_filename,0,"filename", "File containing a score matrix"}, {"gapconst",'c',0,O_ARG_DOUBLE,&indel_const,"-4.0","num", "Cost to begin insertion or deletion "}, {"gapvar",'v',0,O_ARG_DOUBLE,&indel_var,"-2.0","num", "Cost to begin insertion or deletion "}, {"logarithmical",'l',&logarithmic,O_NO_ARG,0,0,0, "Logarithmical gap costs"}, {"local",'o',&local,O_NO_ARG,0,0,0, "Local alignment"}, {"quiet",'q',&quiet,O_NO_ARG,0,0,0, "Quiet mode. Just print alignment"}, // STOP {0,0,0,0,0,0,0,0} }; if (!process_options(argc,argv,options)) { print_help(argv[0],options); exit(-1); }; print_options(options); Sequence a(a_str); Sequence b(b_str); // --------------------------------------------------------- // auxiliary variables // int n = a.length(); int m = b.length(); cost_t v_diag, v_up[n], v_left[m]; // --------------------------------------------------------- // define the distance matrix and trace matrix // DistanceMatrix D(a,b); TraceMatrix TraceD(n,m,D); // --------------------------------------------------------- // the different arrows // // define arrows // TraceArrow *diag_arrow; TraceArrow *left_arrow; TraceArrow *up_arrow; ScoreMatrix *scorematrix=0; if (sm_opt) { scorematrix = new ScoreMatrix( sm_filename ); diag_arrow = new ScoreMatrixArrow(TraceD, 1, *scorematrix); } else { diag_arrow = new SimilarityArrow(TraceD, 1, match, mismatch); } if (logarithmic) { left_arrow = new LogarithmicIndelArrow(TraceD,0,1,indel_var,indel_const); up_arrow = new LogarithmicIndelArrow(TraceD,1,0,indel_var,indel_const); } else { left_arrow = new AffineIndelArrow(TraceD,0,1,indel_var,indel_const); up_arrow = new AffineIndelArrow(TraceD,1,0,indel_var,indel_const); } // --------------------------------------------------------- // Intialization // D[0][0]=0; for (int i=1;i<=n;++i) { if (local) D[i][0] = 0; else { D[i][0] = up_arrow->cost(i,0,i); TraceD[i][0].push_back( TMatrixEntryElem(*up_arrow,i) ); } } for (int j=1;j<=m;++j) { if (local) D[0][j] = 0; else { D[0][j] = left_arrow->cost(0,j,j); TraceD[0][j].push_back( TMatrixEntryElem(*left_arrow,j) ); } } cost_t maxD=MINUSINFTY; int max_i=0, max_j=0; // --------------------------------------------------------- // recursive definition of D-Matrix // for (int i=1;i<=n;++i) for (int j=1;j<=m;++j) { // calculate D_ij // // for debuging // cout << "hier i: " << i << " j:" << j << endl; if (local) D[i][j] = 0; else D[i][j] = MINUSINFTY; v_diag= D.get(diag_arrow->end_position(i,j)) + diag_arrow->cost(i,j,a,b); D[i][j] = max(D[i][j],v_diag); for (int k=1; k<=i; k++) { v_up[k] = D.get(up_arrow->end_position(i,j,k)) + up_arrow->cost(i,j,k); D[i][j] = max(D[i][j],v_up[k]); } for (int k=1; k<=j; k++) { v_left[k] = D.get(left_arrow->end_position(i,j,k)) + left_arrow->cost(i,j,k); D[i][j] = max(D[i][j],v_left[k]); } // set trace entries // if (v_diag == D[i][j]) TraceD[i][j].push_back( TMatrixEntryElem(*diag_arrow) ); for (int k=1; k<=i; k++) if (v_up[k] == D[i][j]) TraceD[i][j].push_back( TMatrixEntryElem(*up_arrow,k) ); for (int k=1; k<=j; k++) if (v_left[k] == D[i][j]) TraceD[i][j].push_back( TMatrixEntryElem(*left_arrow,k) ); // calculate maximal element. // if (D[i][j] > maxD) { max_i = i; max_j = j; maxD = D[i][j]; } } #ifndef NDEBUG cout<