A: From Anuj’s paper: https://zenodo.org/record/377036/files/genotypes.zip?download=1
B: From John’s paper: https://zenodo.org/record/2586963/files/GigaMUGA%20CC%20Genotypes.csv?download=1
C: From HaploQA: http://haploqa-dev.jax.org/tag/Collaborative%20Cross.html; Download the “SNP Level Haplotype Report” for each CC sample from HaploQA.
Note:
We excluded the samples with % No Call>10%. There are 2 samples to be excluded: AFC, AE4
For each sample, to integrate two alleles, we did as following: 1) if both are same, use one to represent it homozygosity. e.g. both alleles are ‘A’, and its genotyoe is ‘A’; 2) if both are “No Call”, it will be labeled as “N”; 3) if both are different, label as ‘H’ meaning heterozygosity; 4) if any allele is “No Call”, it will be labeled as “N”.
We identified potential wrong assigments by calculating similarity among samples within each strain.
Anno = read.csv("./data/Haploqa/Haploqa_Anno.csv")
### QC by % No Call, exclude the samples % No Call>10%
Anno$X..No.Call = as.numeric(sub("%", "", Anno$X..No.Call))
Anno = subset(Anno, X..No.Call<=10)
Strain = unique(Anno$StrainID)
HapD = data.frame(snp_id=character())
for(i in Strain){
tmp1 = subset(Anno, StrainID==i)$ID
tmp1 = paste0('./data/Haploqa/',tmp1,'.txt')
tmp2 = data.frame(snp_id=character())
for(j in tmp1){
dnp = read.delim(j,sep='\t', header=TRUE)
dnp_1 = dnp[,c("snp_id","allele1_fwd","allele2_fwd")];
dnp_1 = dnp_1 %>% dplyr::mutate(geno = ifelse( allele1_fwd==allele2_fwd, allele1_fwd,
ifelse(allele1_fwd!='-'& allele2_fwd!='-', 'H',
ifelse(allele1_fwd=='-','-',
ifelse(allele2_fwd=='-','-', 'M')))))
dnp_1$geno = gsub("-","N",dnp_1$geno)
colnames(dnp_1)[colnames(dnp_1) == "geno"] <- dnp[1,1]
tmp2 = tmp2 %>% full_join(dnp_1[,c(1,4)], by = 'snp_id')
}
### Check similarity of samples for each Strain
C_out = apply(tmp2[,-1],2,FUN=paste, collapse = '')
C_out = stringsimmatrix(C_out, C_out, method = 'hamming')
rownames(C_out)=colnames(C_out) = colnames(tmp2)[-1]
if(length(which(C_out<0.9))>0){cat(i,sep="\n");
print( htmltools::tagList(datatable(round(C_out,digits = 3),options=list(pageLength=100,autoWidth=TRUE)) ))
}
}
CC007
CC029
CC081
CC015
CC035
Note:
In general, based on the results the similarity within a strain is larger than 0.9, and if similarity of sample(s) is below 0.7, will be potentially wrong assignment(s). Based on the similarity results, There are 5 strains showing potential wrong assignments: CC007, CC029, CC081, CC015, CC035. Among these 5 strains, 1) For samples from CC007 and 009, they are likely wrongly labeled; 2) CC081 shows that all values is around 0.80; 3) for Strain “CC015” and “CC035”, sample “V6” and “VP” show similarity (<0.7) with other samples, and all these samples may need further checking.
Use Anuj’s and John’s CC data to calculate similarity with these samples for each strain, and determine the strains they belong to
CC_Anuj = read.csv("./data/SEQgenotypes_Anuj.csv"); rownames(CC_Anuj)=CC_Anuj$marker
CC_John = read.csv("./data/GigaMUGA CC Genotypes_john.csv"); rownames(CC_John)=CC_John$Marker
Ha_PCC = Strain
colnames(CC_Anuj) = paste(colnames(CC_Anuj),"Anuj", sep='_')
colnames(CC_John) = paste(colnames(CC_John),"John", sep='_')
CC_com = merge(CC_Anuj, CC_John, by = "row.names")
CC_com = CC_com %>% select(Row.names, starts_with('CC'))
for(i in Ha_PCC){
tmp1 = subset(Anno, StrainID==i)$ID
tmp1 = paste0('./data/Haploqa/',tmp1,'.txt')
tmp2 = data.frame(snp_id=character())
for(j in tmp1){
dnp = read.delim(j,sep='\t', header=TRUE)
dnp_1 = dnp[,c("snp_id","allele1_fwd","allele2_fwd")];
dnp_1 = dnp_1 %>% dplyr::mutate(geno = ifelse( allele1_fwd==allele2_fwd, allele1_fwd,
ifelse(allele1_fwd!='-'& allele2_fwd!='-', 'H',
ifelse(allele1_fwd=='-','-',
ifelse(allele2_fwd=='-','-', 'M')))))
dnp_1$geno = gsub("-","N",dnp_1$geno)
colnames(dnp_1)[colnames(dnp_1) == "geno"] <- dnp[1,1]
tmp2 = tmp2 %>% full_join(dnp_1[,c(1,4)], by = 'snp_id')
}
### Check outliers for the Strain
C_out = merge(CC_com, tmp2, by.x='Row.names',by.y='snp_id', all=F)
N_C_out = colnames(C_out)[-1]
C_out = apply(C_out[,-1],2,FUN=paste, collapse = '')
C_out = stringsimmatrix(C_out, C_out, method = 'hamming')
rownames(C_out)=colnames(C_out) = N_C_out
C_out = C_out[,colnames(tmp2)[-1]]
C_out = C_out[apply(C_out, 1, function(x) max(x)>0.85),]%>%
t()%>% as.data.frame %>%
select(starts_with('CC')) %>% select(sort(tidyselect::peek_vars()))%>%t()
if(nrow(C_out)==0){cat(i,sep="\n");print(paste0("No Similar Strain for ", i));cat("\n");next}
SInfo = unique(gsub("_.*","",rownames(C_out)))
if(length(which(C_out<0.8))>0 | length(SInfo)!=1 | SInfo != i ){
cat(i,sep="\n")
print( htmltools::tagList(datatable(round(C_out,digits = 3),options=list(pageLength=100,autoWidth=TRUE)) ))
}
}
CC007
CC029
CC084 [1] “No Similar Strain for CC084”
CC083 CC030CC081 [1] “No Similar Strain for CC081”
CC015 CC059 CC035Note:
From the analysis above, 1) In CC007, there are 6 samples (ADV,ADW,ADX,ADY,ADZ,AE2) which belong to CC029; 2) In CC029, there are 6 samples (ADG,ADH,ADJ,ADK,ADM,AND) which belong to CC007; 3) For CC083 and CC030, it is likely that the labels were switched. 4) For samples in CC059, they show high similarity to both CC051 and CC059 in two datasets, but more similar to CC059; 5) For samples in CC081, they didn’t show high similarity(>0.8) to the CC081 in two datasets. This is partially due to their high % Het. Calls; The samples “VP” and “V6” in CC035 and CC015 also have higher % Het. Calls that could lead to low similarity within their strains (check following table for details). We will remove “VP” and “V6” from downstream processing.
Anno$X..Het..Calls = as.numeric(sub("%", "", Anno$X..Het..Calls))
SB_d = subset(Anno, X..Het..Calls>10)[,1:10];
SB_d$X..Het..Calls = paste0(SB_d$X..Het..Calls, '%'); SB_d$X..No.Call = paste0(SB_d$X..No.Call, '%')
DT::datatable(SB_d, filter='top', rownames=FALSE,options=list(pageLength=10, autoWidth=TRUE))
We will: 1) Update the annotation file based on previous analysis (Haploqa_Anno_update); 2)Collapse samples for each strain by “find_consensus_geno”; 3) Merge with snp info (from Karl’s annotation file: “gm_uwisc_v1.csv”); 4) Remove the SNPs with missing position info.
Note: “gm_uwisc_v1.csv” is downloaed from https://www.jax.org/research-and-faculty/genetic-diversity-initiative/tools-data/diversity-outbred-reference-data
SNP_R = read.csv("./data/gm_uwisc_v1.csv")[,c('marker','chr','bp_mm10')]
Anno = read.csv("./data/Haploqa/Haploqa_Anno_update.csv")
### QC by % No Call, exclude the samples % No Call>10%
Anno$X..No.Call = as.numeric(sub("%", "", Anno$X..No.Call))
Anno = subset(Anno, X..No.Call<=10)
Strain = unique(Anno$StrainID)
HapD = data.frame(snp_id=character())
for(i in Strain){
tmp1 = subset(Anno, StrainID==i)$ID
tmp1 = paste0('./data/Haploqa/',tmp1,'.txt')
tmp2 = data.frame(snp_id=character())
for(j in tmp1){
dnp = read.delim(j,sep='\t', header=TRUE)
dnp_1 = dnp[,c("snp_id","allele1_fwd","allele2_fwd")];
dnp_1 = dnp_1 %>% dplyr::mutate(geno = ifelse( allele1_fwd==allele2_fwd, allele1_fwd,
ifelse(allele1_fwd!='-'& allele2_fwd!='-', 'H',
ifelse(allele1_fwd=='-','-',
ifelse(allele2_fwd=='-','-', 'M')))))
dnp_1$geno = gsub("-","N",dnp_1$geno)
colnames(dnp_1)[colnames(dnp_1) == "geno"] <- dnp[1,1]
tmp2 = tmp2 %>% full_join(dnp_1[,c(1,4)], by = 'snp_id')
}
tmp3 <- qtl2convert::find_consensus_geno(tmp2[,-1], na.strings=c('N'))
tmp2 = cbind(tmp2$snp_id,tmp3);colnames(tmp2) = c("snp_id",i)
HapD = HapD %>% full_join(as.data.frame(tmp2), by = 'snp_id')
}
HapD = HapD[, c('snp_id',sort(colnames(HapD)[-1]))]
HapD = merge(SNP_R,HapD, by.x='marker',by.y='snp_id', all=F)
HapD = subset(HapD, chr!='NA')
HapD[is.na(HapD)] = "N"
#write.csv(HapD,"./data/Haploqa_34Strain_SNP.csv",row.names = F)
### check percentage of 'No Call' in each strain
cat("Percentage of 'No Call' in each strain",sep='\n')
## Percentage of 'No Call' in each strain
print(apply(HapD[,4:37],2,function(x) sum(x=='N'))/nrow(HapD)*100)
## CC001 CC005 CC007 CC008 CC010 CC015 CC016 CC020
## 3.520701 3.490124 3.382378 3.728187 3.450811 3.632816 3.391842 3.680865
## CC022 CC023 CC024 CC027 CC028 CC029 CC030 CC031
## 3.908736 3.664849 4.363748 3.751483 3.726003 3.505413 3.659025 3.253518
## CC033 CC035 CC036 CC044 CC045 CC055 CC057 CC059
## 3.441347 3.910920 3.316128 4.734309 3.847582 3.864326 3.658297 3.739107
## CC060 CC065 CC074 CC075 CC078 CC079 CC080 CC081
## 3.956057 3.541086 3.792980 3.964065 4.990572 5.001492 7.453461 7.892457
## CC083 CC084
## 5.250475 4.054339
### check percentage of 'Het. Calls' in each strain
cat("Percentage of heterozygous genotypes in each strain",sep='\n')
## Percentage of heterozygous genotypes in each strain
print(apply(HapD[,4:37],2,function(x) sum(x=='H'))/nrow(HapD)*100)
## CC001 CC005 CC007 CC008 CC010 CC015 CC016
## 0.6850661 0.6836101 0.7542280 0.7367555 1.3599400 1.8266004 0.8161096
## CC020 CC022 CC023 CC024 CC027 CC028 CC029
## 0.9442410 0.6712338 1.3002424 0.8554227 1.3155308 0.7331154 1.2223444
## CC030 CC031 CC033 CC035 CC036 CC044 CC045
## 0.5518386 0.7513159 0.7826207 1.2733057 1.1844874 1.6831806 1.3148028
## CC055 CC057 CC059 CC060 CC065 CC074 CC075
## 1.8134960 2.3209255 1.1910395 1.8964902 1.0316033 2.2058984 1.6183869
## CC078 CC079 CC080 CC081 CC083 CC084
## 4.1045727 1.3861487 3.1727080 21.1489600 2.1927941 2.1614892
### check percentage of 'Hom. Calls' in each strain
cat("Percentage of homozygous genotypes in each strain",sep='\n')
## Percentage of homozygous genotypes in each strain
print(apply(HapD[,4:37],2,function(x) sum(x!='H' & x!='N'))/nrow(HapD)*100)
## CC001 CC005 CC007 CC008 CC010 CC015 CC016 CC020
## 95.79423 95.82627 95.86339 95.53506 95.18925 94.54058 95.79205 95.37489
## CC022 CC023 CC024 CC027 CC028 CC029 CC030 CC031
## 95.42003 95.03491 94.78083 94.93299 95.54088 95.27224 95.78914 95.99517
## CC033 CC035 CC036 CC044 CC045 CC055 CC057 CC059
## 95.77603 94.81577 95.49938 93.58251 94.83762 94.32218 94.02078 95.06985
## CC060 CC065 CC074 CC075 CC078 CC079 CC080 CC081
## 94.14745 95.42731 94.00112 94.41755 90.90486 93.61236 89.37383 70.95858
## CC083 CC084
## 92.55673 93.78417
From the above results, CC081 shows higher percentage of heterozygous genotypes than other strains. We also compared it with John’s dataset, and found it was also higher than John’s CC081.
CC81_H = subset(Anno, StrainID=='CC081')[,c(1,3,7:9)]
CC81_H[,5] = paste0(CC81_H[,5],'%')
CC81_J = CC_John %>% select(starts_with('CC081'))
CC81_info = vector( "numeric" , 3)
CC81_info[1] = round(sum(CC81_J=='H')/nrow(CC81_J)*100, digits=2)
CC81_info[2] = round(sum(CC81_J!='H' & CC81_J!='N')/nrow(CC81_J)*100, digits=2)
CC81_info[3] = round(sum(CC81_J=='N')/nrow(CC81_J)*100, digits=2)
CC81_info = paste0(CC81_info,'%')
CC81_info = c('CC081_John','CC081', CC81_info)
CC81_info = as.data.frame(rbind(CC81_H,CC81_info));
colnames(CC81_info) = c('ID','Strain.name','Het_Calls%','Hom_Calls%','No_Call%')
DT::datatable(CC81_info, filter='top', rownames=FALSE,options=list(pageLength=10, autoWidth=TRUE))
There are different SNP sets between HaploQA and Anuj’s/John’s dataset.First we checked those unique SNPs in HaploQA.
CC_Anuj = read.csv("./data/SEQgenotypes_Anuj.csv");
CC_Haploqa = read.csv("./data/Haploqa_34Strain_SNP.csv")
### Unique SNPs in HaploQA data which is "Haploqa_34Strain_SNP.csv
### Check the minor AF for each unique SNP without counting "N"
CCU_Haploqa = CC_Haploqa %>% filter(!(marker %in% CC_Anuj$marker))
MiAF = apply(CCU_Haploqa[,4:ncol(CCU_Haploqa)], 1, function(x) min(table(as.factor(x),exclude = 'N')))
MiAF = data.frame(marker=CCU_Haploqa$marker, MAF=MiAF/34.0)
cat("Distribution of Minor Allele Frequencies of unique SNPs across 34 strains in HaploQA",sep='\n')
## Distribution of Minor Allele Frequencies of unique SNPs across 34 strains in HaploQA
MiAF %>% ggplot(aes(MAF)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
We also checked ‘NO Call’ frequencies for all SNPs in HaploQA data
### Check 'NO Call' frequencies for all SNPs in HaploQA data
NCAF = apply(CC_Haploqa[,4:ncol(CCU_Haploqa)], 1, function(x) sum(x=='N'))
NCAF = data.frame(marker=CC_Haploqa$marker, No_Call_AF=NCAF/34.0 )
cat("Distribution of 'NO Call' Frequencies of all SNPs across 34 strains in HaploQA",sep='\n')
## Distribution of 'NO Call' Frequencies of all SNPs across 34 strains in HaploQA
NCAF %>% ggplot(aes(No_Call_AF)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The following is to integrate 3 genotype datasets
Note: 1) Filter SNPs in Anuj’s and John’s data based on Karl’s annotation file “gm_uwisc_v1.csv”; 2) There are over 5000 unique SNPs in HaploQA data set compared with other 2 datasets. we only keep those unique SNPs with 0.1<MAF(Minor Allele Frequency)<0.9 in HaploQA data;3) For Anuj’s and John’s datasets, there are no unique SNPs compared with HaploQA data. 4) in HaploQA data, we removed CC081 due to its quality.
### SNP info from "gm_uwisc_v1.csv", and remove those SNPs without location info
SNP_R = read.csv("./data/gm_uwisc_v1.csv")[,c('marker','chr','bp_mm10')]
SNP_R = subset(SNP_R, chr!='NA')
### Load 3 datasets for integration: Anuj, John, Haploqa
CC_Anuj = read.csv("./data/SEQgenotypes_Anuj.csv");
CC_John = read.csv("./data/GigaMUGA CC Genotypes_john.csv"); colnames(CC_John)[2]='marker'
CC_Haploqa = read.csv("./data/Haploqa_34Strain_SNP.csv")
TStrain = c(colnames(CC_Anuj),colnames(CC_John),colnames(CC_Haploqa))
TStrain = unique(TStrain[grep("^CC.*", TStrain)])
### Filter SNPs in Anuj's and John's datasets
CC_Anuj = CC_Anuj %>% filter((marker %in% SNP_R$marker))
CC_John = CC_John %>% filter((marker %in% SNP_R$marker))
### Filter unique SNPs in HaploQA which show MAF >0.9 or <0.1
CCU_Haploqa = CC_Haploqa %>% filter(!(marker %in% CC_Anuj$marker))
MiAF = apply(CCU_Haploqa[,4:ncol(CCU_Haploqa)], 1, function(x) min(table(as.factor(x),exclude = 'N')))
MiAF = data.frame(marker=CCU_Haploqa$marker, MAF=MiAF/34.0)
MiAF = MiAF = subset(MiAF, MAF>0.1 & MAF<0.9)
MiAF = MiAF$marker
CC_Haploqa = CC_Haploqa %>% filter((marker %in% c(CC_Anuj$marker,MiAF)))
### Remove CC081 from HaploQA due to the quality.
CC_Haploqa$CC081 = NULL
### Merge 3 datasets
list_df = list(CC_Anuj,CC_John,CC_Haploqa)
CC_T = Reduce(function(x, y) full_join(x, y, by = "marker"), list_df)
CC_T = CC_T %>% select(marker, starts_with('CC'))
CC_T[is.na(CC_T)] = "N"; colnames(CC_T)[1] <- "snp_id"
TDCC = data.frame(snp_id=character())
for(i in TStrain){
tmp1 = CC_T %>% select(snp_id,starts_with(i))
if(dim(tmp1)[2]==2){colnames(tmp1) = c("snp_id",i);TDCC = TDCC %>% full_join(as.data.frame(tmp1), by = 'snp_id')}
if(dim(tmp1)[2]>2){
tmp2 <- qtl2convert::find_consensus_geno(tmp1[,-1], na.strings=c('N'))
tmp2 = cbind(tmp1$snp_id,tmp2);colnames(tmp2) = c("snp_id",i)
TDCC = TDCC %>% full_join(as.data.frame(tmp2), by = 'snp_id')
}
}
### replace SNPs which are unique in HaploQA
rownames(TDCC) = TDCC$snp_id;rownames(CC_Haploqa) = CC_Haploqa$marker;
TDCC[MiAF,colnames(CC_Haploqa)[-c(1:3)]] = CC_Haploqa[MiAF,4:ncol(CC_Haploqa)]
TDCC = TDCC[, c('snp_id',sort(colnames(TDCC)[-1]))]
TDCC = merge(SNP_R,TDCC, by.x='marker',by.y='snp_id', all=F)
TDCC = TDCC[!is.na(TDCC$chr),]; TDCC[is.na(TDCC)] = "N"
#write.csv(TDCC,"./data/CC_SNP_3into1.csv",row.names = F)
### check percentages of 'No Call', 'Het. Calls' and 'Hom. Calls' in each strain
SNP_info = data.frame(Strain=colnames(TDCC)[-c(1:3)])
SNP_info$'No_Call' = apply(TDCC[,4:ncol(TDCC)],2,function(x) sum(x=='N'))/nrow(TDCC)*100
SNP_info$'Het_Calls' = apply(TDCC[,4:ncol(TDCC)],2,function(x) sum(x=='H'))/nrow(TDCC)*100
SNP_info$'Hom_Calls' = apply(TDCC[,4:ncol(TDCC)],2,function(x) sum(x!='H' & x!='N'))/nrow(TDCC)*100
SNP_info = data.frame(SNP_info[,-1], row.names=SNP_info[,1])
colnames(SNP_info) = c('No_Call%','Het_Calls%','Hom_Calls%')
### check percentage of 'Hom. Calls' in each strain
cat("Summary for all CC strains",sep='\n')
## Summary for all CC strains
DT::datatable(round(SNP_info,digits = 2),options=list(pageLength=10,autoWidth=TRUE))
For some strains they show higher percentages of “No Call”. The potential reasons: 1) There are extra unique SNPs in HaploQA which lead to ‘No Call’ in other 2 datasets; 2) there are some SNPs which show no consense across 3 datasets
CC_T = read.csv("./data/CC_SNP_3into1.csv")
Founder_TM = read.csv("./data/CC_Founders/GigaMUGA_founder_consensus_genotypes_Mt.csv")
Founder_TY = read.csv("./data/CC_Founders/GigaMUGA_founder_consensus_genotypes_Y.csv")
CC_TM = subset(CC_T, marker %in%Founder_TM$marker)
CC_TM = CC_TM %>% select(marker,starts_with('CC'))
rownames(CC_TM) = CC_TM$marker;rownames(Founder_TM) = Founder_TM$marker;
CC_TM = CC_TM[Founder_TM$marker,]
CC_TM[CC_TM=='N']=NA
test_M = mitochondrial_Y_geno(Founder_TM, CC_TM)
CC_TM = subset(CC_T, marker %in%Founder_TY$marker)
CC_TM = CC_TM %>% select(marker,starts_with('CC'))
rownames(CC_TM) = CC_TM$marker;rownames(Founder_TY) = Founder_TY$marker;
CC_TM = CC_TM[Founder_TY$marker,]
CC_TM[CC_TM=='N']=NA
test_Y = mitochondrial_Y_geno(Founder_TY, CC_TM)
test_MY = test_M %>% inner_join(test_Y, by = 'id') %>% select(id, starts_with('mito_y'))
colnames(test_MY) = c('Strain_ID','mitochondria', 'Ychr')
cat("Infer Origins of mitochondrial and Ychr info",sep='\n')
## Infer Origins of mitochondrial and Ychr info
DT::datatable(test_MY,options=list(pageLength=10,autoWidth=TRUE))
Compared to the info from Srivastava et al. (2017) (doi:10.1534/genetics.116.198838) and Shorter et al. (2019) (doi: 10.1534/g3.119.400039), there are 3 missing for Ychr due to ‘No Call’ in the integrated data. With the Funnel Code from Shorter et al. (2019), they are matched each other.
myQTLdata <- read_cross2(file = "./data/CC_all.zip" )
summary(myQTLdata)
## Object of class cross2 (crosstype "risib8")
##
## Total individuals 76
## No. genotyped individuals 76
## No. phenotyped individuals 0
## No. with both geno & pheno 0
## No. covariates 4
## No. phenotype covariates 0
##
## No. chromosomes 20
## Total markers 120685
##
## No. markers by chr:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 9217 9201 7144 6967 6940 6944 6781 6030 6223 5884 6615 5612 5637 5555 4907 4588
## 17 18 19 X
## 4597 4258 3288 4297
## The haplotype reconstruction will take a while, so we use the one which has been produced
# infer genotypes, as those with maximal marginal probability
ginf <- maxmarg(pr,minprob=0.5)
# guess phase
ph <- guess_phase(myQTLdata, ginf)
### plot phased genotypes for 76 CC strains
for(i in rownames(myQTLdata$covar)){
cat(i,sep='\n')
plot_onegeno(ph, myQTLdata$pmap, ind= i ,col=CCcolors,shift=TRUE)
legend(11,170, legend =c("A/J","C57BL/6J","129S1/SvImJ","NOD/ShiLtJ","NZO/HlLtJ","CAST/EiJ","PWK/PhJ","WSB/EiJ"),
border = 'black', pch=20, pt.cex=2, cex=0.6, col = CCcolors,ncol=4, x.intersp = 1,y.intersp=0.9,text.width=1.55,title="Founders")
}
## CC001
## CC002
## CC003
## CC004
## CC005
## CC006
## CC007
## CC008
## CC009
## CC010
## CC011
## CC012
## CC013
## CC015
## CC016
## CC017
## CC018
## CC019
## CC020
## CC021
## CC022
## CC023
## CC024
## CC025
## CC026
## CC027
## CC028
## CC029
## CC030
## CC031
## CC032
## CC033
## CC034
## CC035
## CC036
## CC037
## CC038
## CC039
## CC040
## CC041
## CC042
## CC043
## CC044
## CC045
## CC046
## CC047
## CC049
## CC050
## CC051
## CC052
## CC053
## CC055
## CC056
## CC057
## CC058
## CC059
## CC060
## CC061
## CC062
## CC063
## CC065
## CC068
## CC070
## CC071
## CC072
## CC073
## CC074
## CC075
## CC076
## CC078
## CC079
## CC080
## CC081
## CC082
## CC083
## CC084