Blmodel.h

Go to the documentation of this file.
00001 #ifndef BLMODEL_H
00002 #define BLMODeL_H
00003 #include "Match.h"
00004 #include <mysql++.h>
00005 
00017 class Blmodel : public Match {
00018    public:
00019       Blmodel(const string &q, const string &t, int fp, int ql, int tl,
00020             int qb, int qe, int tb, int te, int ex, double sc, 
00021             double ide, double qc, float ol)
00022          : Match(qb,qe,tb,te), qid(q), tid(t), fpnum(fp), qlen(ql), tlen(tl),
00023             numex(ex), score(sc), iden(ide), qcov(qc), olp(ol)
00024             { }
00025 
00026       /* the order of the column is in the qbegin,qend,tbegin,tend
00027        * in the input table */
00028       Blmodel(const mysqlpp::Row &r)
00029          : Match(int(r[5]), int(r[6]), int(r[7]), int(r[8])),
00030                qid(string(r.at(0))), tid(string(r[1])), fpnum(int(r[2])), 
00031                qlen(int(r[3])), tlen(int(r[4])), numex(int(r[9])), 
00032                score(double(r[10])), iden(double(r[11])), 
00033                qcov(double(r[12])), olp(float(r[13])) 
00034                { }
00035 
00036       const string& getQid() const { return qid; }
00037       const string& getTid() const { return tid; }
00038       int getFpnum() const { return fpnum; }
00039 
00040       /* Obtain the garget boundary that is likely to contain the
00041        * complete gene. If the guide protein sequence start from 1
00042        * then the 5' end of the gene is only expanded by 6 nt.
00043        * Same is true if the guide protein end is used up in the
00044        * alignment.
00045        *
00046        * Parameters: 
00047        *     ease is the 1/2 of intergenic + 5'-UTR on the 5'-side
00048        *         and the 1/2 of intergenic + 3'-UTR
00049        *     cr is the genomic/CDS ratio this parameter depends on genomes.
00050        *         This ratio will be computed from the underlying model by
00051        *         target range/ query range.  The program will pick the larger
00052        *         of t/q ratio and cr to compute the covered region in genomic.
00053        *
00054        * Return: the start and end of the target.  
00055        *
00056        * Use bioseq coordinates: start from 1
00057        * The cr is the ratio of total DNA to coding.
00058        * For human this number is very high 50 to 100,
00059        * for lower eukaryotes this number is cloase to 3-4.
00060        * Default value is 4.1.
00061        * */
00062       pair<int,int> getExpectedTargetRange(int ease=500, double cr=4.1) const;
00063 
00064       int getExonNum() const { return numex; }
00065       /* the score is derived from the sum of the scores of the
00066        * matching fragments
00067        */
00068       double getScore() const { return score; }
00069       /* normalized score, removed the overlapping regions
00070        */
00071       double getNormScore() const { return score/(1+olp); }
00072       int queryLength() const { return qlen; }
00073       int targetLength() const { return tlen; }
00074       friend ostream& operator<<(ostream &ous, const Blmodel &bm);
00075       bool queryHasStart() const { return 1 == queryBegin(); }
00076       bool queryHasStop() const { return qlen == queryEnd(); }
00077 
00078    private:
00079       string qid;
00080       string tid;
00081       int fpnum, qlen, tlen, numex;
00082       double score, iden, qcov;
00083       float olp;
00084       //bool hasbegin; has been removed from the new version
00085       //bool hasend; has been removed from the new version
00086 };
00087 
00088 #endif

Generated on Wed Oct 14 21:49:14 2009 for Softwares from Orpara by  doxygen 1.5.6