// BiCluster.cpp: implementation of the BiCluster class.
//
#include "BiCluster.h"
double logprop[3];
double Alphan;
double delta;
double gsldelta;
char *LnPfile = "lnp.txt";
char *Posteriorfile = "posterior.txt";
bool useConstMarginPrior = true;
int mNC = 0, mNU = 0;
int minlimit;
int TrueLimit;
int mixture = 1; //0: use independence marker model
//1: use a mixture of independence model and 1st order Markov chain
//2: use 1st order Markov chain
double autoRestart = 10;
typedef struct MySortTypeb {
double value;
int indn;
int indices[5];
} MySortTypeb;
bool operator<(const MySortTypeb &a, const MySortTypeb &b)
{ return a.value < b.value; }
BiCluster::BiCluster(int acn)
{ mPrior1 = mPrior2 = 0.001;
gsl_rng_env_setup();
T = gsl_rng_default;
gammar = gsl_rng_alloc(T);
gsl_rng_set(gammar, (unsigned int)time(NULL));
verbose = true;
alleleCn = acn;
Alphan = 0.5 * (double)acn;
delta = Alphan / (double)alleleCn;
gsldelta = gsl_sf_lngamma(delta);
allVectors = true;//
minDistance = 1000000; //1Mb
mBurnin = 100000;//5000;
mMCMC = 900000;//15000;
mThin = 1000;
}
BiCluster::~BiCluster()
{
gsl_rng_free(gammar);
}
void BiCluster::getHapRep(vector<vector<char> > const &data, vector<int> const &Dloci, vector<bool> const &indState,
vector<int> &haprep, vector<int> &hapid, vector<int> &hapcn)
{
int i, j, k;
int N = (int)data.size();
int l = (int)Dloci.size();
haprep.clear();
hapid.clear();
hapcn.clear();
hapid.resize(N, -1);
if(l == 0) return;
for(i = 0; i < N; i++)
if((int)indState[i])
{ for(k = 0; k < (int)haprep.size(); k++)
{ for(j = 0; j < l; j++)
if(data[i][Dloci[j]] != data[haprep[k]][Dloci[j]])
break;
if(j >= l)
break;
}
if(k >= (int)haprep.size())
{ haprep.push_back(i);
hapcn.push_back(1);
}
else
hapcn[k]++;
hapid[i] = k;
}
}
void BiCluster::getHapRep(vector<vector<char> > const &data, vector<int> const &Dloci, vector<bool> const &indState,
vector<HAPLOTYPE> &haps, vector<int> &hapid)
{
int i, j, k;
int N = (int)data.size();
int l = (int)Dloci.size();
haps.clear();
hapid.clear();
hapid.resize(N, -1);
for(i = 0; i < N; i++)
if(indState[i])
{ for(k = 0; k < (int)haps.size(); k++)
{ for(j = 0; j < l; j++)
if(data[i][Dloci[j]] != data[haps[k].repID][Dloci[j]])
break;
if(j >= l)
break;
}
if(k >= (int)haps.size())
{ HAPLOTYPE nh;
nh.repID = i;
nh.hCn = 1;
nh.bFreq = 1.;
nh.code = 0;
for(j = 0; j < (int)Dloci.size(); j++)
{ //tmpremove nh.bFreq *= alleleFreq[Dloci[j]][(int)data[i][Dloci[j]]];
nh.code = nh.code * alleleCn + (int)data[i][Dloci[j]];
}
haps.push_back(nh);
}
else
haps[k].hCn++;
hapid[i] = k;
}
}
void BiCluster::incHapRep(vector<vector<char> > const &data, int loci, vector<bool> const &indState,
vector<int> &hapRep, vector<int> &hapId, vector<int> &hapCn)
{ int sz = (int)hapRep.size();
int N = (int)data.size();
int hsz = alleleCn * sz;
if(hsz == 0) hsz = alleleCn;
vector<int> newrep(hsz, -1);
vector<int> newid(N);
vector<int> newcn(hsz, 0);
vector<int> pointer(hsz, -1);
int i, j = 0, k;
for(i = 0; i < N; i++)
if(indState[i])
{ if(sz == 0) k = (int)data[i][loci];
else
{ k = hapId[i];
k += sz * (int)data[i][loci];
}
if(pointer[k] < 0)
{ newrep[j] = i;
pointer[k] = j;
j++;
}
newcn[pointer[k]]++;
newid[i] = pointer[k];
}
for(i = alleleCn * sz - 1; i > 0; i--)
if(newrep[i] >= 0)
break;
newrep.resize(i + 1);
newcn.resize(i + 1);
hapRep = newrep;
hapCn = newcn;
hapId = newid;
}
void BiCluster::incHapRep(vector<vector<char> > const &data, int loci, vector<bool> const &indState,
vector<HAPLOTYPE> &haps, vector<int> &hapId)
{ int sz = (int)haps.size();
int N = (int)data.size();
int hsz = alleleCn * sz;
if(hsz == 0) hsz = alleleCn;
vector<HAPLOTYPE> newhaps(hsz);
vector<int> newid(N);
vector<int> pointer(hsz, -1);
int i, j = 0, k, t;
for(i = 0; i < N; i++)
if(indState[i])
{ if(sz == 0) t = (int)data[i][loci];
else
{ k = hapId[i];
t = (int)data[i][loci] * sz + k;
}
if(pointer[t] < 0)
{ newhaps[j].repID = i;
newhaps[j].hCn = 0;
newhaps[j].code = (int)data[i][loci];
if(sz > 0)
newhaps[j].code += haps[k].code * alleleCn;
//tmpremove
/*if((int)haps.size() > 0)
newhaps[j].bFreq = haps[k].bFreq * alleleFreq[loci][data[i][loci]];
else newhaps[j].bFreq = alleleFreq[loci][data[i][loci]];
*/
pointer[t] = j;
j++;
}
if(pointer[t] < 0 || pointer[t] >= (int)newhaps.size())
i=i;
newhaps[pointer[t]].hCn++;
newid[i] = pointer[t];
}
newhaps.resize(j);
haps = newhaps;
hapId = newid;
}
void BiCluster::decHapRep(vector<vector<char> > const &data, int loci, vector<int> const &Dloci, vector<bool> const &indState,
vector<int> &hapRep, vector<int> &hapId, vector<int> &hapCn)
{
int i, j, k;
if((int)Dloci.size() == 1)
{ hapRep.clear();
hapCn.clear();
for(i = 0; i < (int)hapId.size(); i++)
hapId[i] = -1;
return;
}
vector<int> idchange((int)hapRep.size(), -1);
for(i = 0; i < (int)hapRep.size() - 1; i++)
{ int x = hapRep[i];
if(idchange[i] >= 0) continue;
for(j = i + 1; j < (int)hapRep.size(); j++)
{ int y = hapRep[j];
for(k = 0; k < (int)Dloci.size(); k++)
if(Dloci[k] != loci && data[x][Dloci[k]] != data[y][Dloci[k]])
break;
if(k >= (int)Dloci.size()) //two are identical
{ idchange[j] = i;
hapCn[i] += hapCn[j];
// break;
}
}
}
vector<int> newRep;
vector<int> newCn;
vector<int> newid((int)hapRep.size(), -1);
for(i = 0; i < (int)hapRep.size(); i++)
{ if(idchange[i] < 0)
{ newRep.push_back(hapRep[i]);
newCn.push_back(hapCn[i]);
newid[i] = (int)newRep.size() - 1;
}
}
for(i = 0; i < (int)hapId.size(); i++)
if(hapId[i] >= 0)
{ if(idchange[hapId[i]] >= 0)
hapId[i] = newid[idchange[hapId[i]]];
else hapId[i] = newid[hapId[i]];
}
hapRep = newRep;
hapCn = newCn;
}
void BiCluster::decHapRep(vector<vector<char> > const &data, int loci, vector<int> const &Dloci, vector<bool> const &indState,
vector<HAPLOTYPE> &haps, vector<int> &hapId)
{
int i, j, k;
if((int)Dloci.size() == 1)
{ haps.clear();
for(i = 0; i < (int)hapId.size(); i++)
hapId[i] = -1;
return;
}
vector<int> idchange((int)haps.size(), -1);
//tmpremove
//for(i = 0; i < (int)haps.size(); i++)
// haps[i].bFreq /= alleleFreq[loci][(int)data[haps[i].repID][loci]];
for(i = 0; i < (int)Dloci.size(); i++)
if(Dloci[i] == loci)
break;
k = i;
int mod = (int)(pow((double)alleleCn, (double)Dloci.size() - k - 1.) + 0.5);
int sz = (int)(pow((double)alleleCn, (double)Dloci.size()) + 0.5);
vector<int> ch(sz, -1);
for(i = 0; i < (int)haps.size(); i++)
{ k = haps[i].code - (int)data[haps[i].repID][loci] * mod;
if(ch[k] >= 0)
{ idchange[i] = ch[k];
haps[ch[k]].hCn += haps[i].hCn;
}
else ch[k] = i;
}
vector<HAPLOTYPE> newhaps;
vector<int> newid((int)haps.size(), -1);
j = 0;
for(i = 0; i < (int)haps.size(); i++)
{ if(idchange[i] < 0)
{ newhaps.push_back(haps[i]);
newid[i] = (int)newhaps.size() - 1;
newhaps[j].code = newhaps[j].code % mod + (newhaps[j].code - newhaps[j].code % (mod * alleleCn)) / alleleCn;
j++;
}
}
for(i = 0; i < (int)hapId.size(); i++)
if(hapId[i] >= 0)
{ if(idchange[hapId[i]] >= 0)
hapId[i] = newid[idchange[hapId[i]]];
else hapId[i] = newid[hapId[i]];
}
haps = newhaps;
}
void BiCluster::computeAlleleFreq(vector<vector<char> > const &data)
{
int N = (int)data.size();
int L = (int)data[0].size();
alleleFreq.clear();
alleleFreq.resize(L, vector<double>(all
评论0