3 GDS - Solutions

  1. Set a filter selecting only multi-allelic variants. Inspect their genotypes using the different methods you learned above. Use the alleleDosage method to find dosage for the second (and third, etc.) alternate allele.
seqSetFilter(gds, variant.sel=multi.allelic)
## # of selected variants: 21
geno <- seqGetData(gds, "genotype")
dim(geno)
## [1]    2 1126   21
geno[,1:5,]
## , , 1
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    1    1    0    1    1
##   [2,]    0    1    1    1    1
## 
## , , 2
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    0    0
## 
## , , 3
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    0    0
## 
## , , 4
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    1    0    0    1    0
##   [2,]    0    0    0    0    1
## 
## , , 5
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    0    0
## 
## , , 6
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    0    0
## 
## , , 7
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    0    0
## 
## , , 8
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    0    0
## 
## , , 9
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    2
##   [2,]    0    0    0    0    0
## 
## , , 10
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    1    2    0    0    1
##   [2,]    0    2    0    2    1
## 
## , , 11
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    1    0    0
##   [2,]    1    1    0    1    0
## 
## , , 12
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    3    1    1    1    3
##   [2,]    3    3    3    1    0
## 
## , , 13
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    2    0
##   [2,]    0    0    0    0    0
## 
## , , 14
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    0    0
## 
## , , 15
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    2    0    0    0    2
##   [2,]    0    0    0    2    0
## 
## , , 16
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    2    2    2    2    2
##   [2,]    2    2    2    2    2
## 
## , , 17
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    2    2    2    2    2
##   [2,]    2    2    2    2    2
## 
## , , 18
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    0    0
## 
## , , 19
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    0    0
## 
## , , 20
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    1
##   [2,]    0    0    0    0    0
## 
## , , 21
## 
##       sample
## allele [,1] [,2] [,3] [,4] [,5]
##   [1,]    0    0    0    0    0
##   [2,]    0    0    0    2    0
geno <- getGenotype(gds)
dim(geno)
## [1] 1126   21
head(geno)
##          variant
## sample    30    69    73    161   162   195   243   253   407   431  
##   HG00096 "1|0" "0|0" "0|0" "1|0" "0|0" "0|0" "0|0" "0|0" "0|0" "1|0"
##   HG00097 "1|1" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "2|2"
##   HG00099 "0|1" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0"
##   HG00100 "1|1" "0|0" "0|0" "1|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|2"
##   HG00101 "1|1" "0|0" "0|0" "0|1" "0|0" "0|0" "0|0" "0|0" "2|0" "1|1"
##   HG00102 "0|1" "0|0" "0|0" "1|0" "0|0" "0|0" "0|0" "0|0" "0|0" "2|2"
##          variant
## sample    434   610   627   645   689   756   765   814   988   1014 
##   HG00096 "0|1" "3|3" "0|0" "0|0" "2|0" "2|2" "2|2" "0|0" "0|0" "0|0"
##   HG00097 "0|1" "1|3" "0|0" "0|0" "0|0" "2|2" "2|2" "0|0" "0|0" "0|0"
##   HG00099 "1|0" "1|3" "0|0" "0|0" "0|0" "2|2" "2|2" "0|0" "0|0" "0|0"
##   HG00100 "0|1" "1|1" "2|0" "0|0" "0|2" "2|2" "2|2" "0|0" "0|0" "0|0"
##   HG00101 "0|0" "3|0" "0|0" "0|0" "2|0" "2|2" "2|2" "0|0" "0|0" "1|0"
##   HG00102 "0|1" "3|3" "0|2" "0|0" "0|0" "2|2" "1|2" "0|0" "0|0" "0|1"
##          variant
## sample    1056 
##   HG00096 "0|0"
##   HG00097 "0|0"
##   HG00099 "0|0"
##   HG00100 "0|2"
##   HG00101 "0|0"
##   HG00102 "0|2"
geno <- getGenotypeAlleles(gds)
head(geno)
##          variant
## sample    30       69    73    161   162   195   243   253   407  
##   HG00096 "GT|GTT" "A|A" "G|G" "A|G" "A|A" "A|A" "C|C" "G|G" "T|T"
##   HG00097 "GT|GT"  "A|A" "G|G" "G|G" "A|A" "A|A" "C|C" "G|G" "T|T"
##   HG00099 "GTT|GT" "A|A" "G|G" "G|G" "A|A" "A|A" "C|C" "G|G" "T|T"
##   HG00100 "GT|GT"  "A|A" "G|G" "A|G" "A|A" "A|A" "C|C" "G|G" "T|T"
##   HG00101 "GT|GT"  "A|A" "G|G" "G|A" "A|A" "A|A" "C|C" "G|G" "C|T"
##   HG00102 "GTT|GT" "A|A" "G|G" "A|G" "A|A" "A|A" "C|C" "G|G" "T|T"
##          variant
## sample    431       434      610       627   645   689   756              
##   HG00096 "TAA|TA"  "G|GTTA" "G|G"     "G|G" "G|G" "C|G" "CGAGCAT|CGAGCAT"
##   HG00097 "T|T"     "G|GTTA" "GCC|G"   "G|G" "G|G" "G|G" "CGAGCAT|CGAGCAT"
##   HG00099 "TA|TA"   "GTTA|G" "GCC|G"   "G|G" "G|G" "G|G" "CGAGCAT|CGAGCAT"
##   HG00100 "TA|T"    "G|GTTA" "GCC|GCC" "C|G" "G|G" "G|C" "CGAGCAT|CGAGCAT"
##   HG00101 "TAA|TAA" "G|G"    "G|GC"    "G|G" "G|G" "C|G" "CGAGCAT|CGAGCAT"
##   HG00102 "T|T"     "G|GTTA" "G|G"     "G|C" "G|G" "G|G" "CGAGCAT|CGAGCAT"
##          variant
## sample    765         814     988           1014      1056 
##   HG00096 "C|C"       "CT|CT" "CGTGA|CGTGA" "C|C"     "A|A"
##   HG00097 "C|C"       "CT|CT" "CGTGA|CGTGA" "C|C"     "A|A"
##   HG00099 "C|C"       "CT|CT" "CGTGA|CGTGA" "C|C"     "A|A"
##   HG00100 "C|C"       "CT|CT" "CGTGA|CGTGA" "C|C"     "A|G"
##   HG00101 "C|C"       "CT|CT" "CGTGA|CGTGA" "CCATT|C" "A|A"
##   HG00102 "CATTATT|C" "CT|CT" "CGTGA|CGTGA" "C|CCATT" "A|G"
dos <- refDosage(gds)
head(dos)
##          variant
## sample    30 69 73 161 162 195 243 253 407 431 434 610 627 645 689 756 765
##   HG00096  1  2  2   1   2   2   2   2   2   1   1   0   2   2   1   0   0
##   HG00097  0  2  2   2   2   2   2   2   2   0   1   0   2   2   2   0   0
##   HG00099  1  2  2   2   2   2   2   2   2   2   1   0   2   2   2   0   0
##   HG00100  0  2  2   1   2   2   2   2   2   1   1   0   1   2   1   0   0
##   HG00101  0  2  2   1   2   2   2   2   1   0   2   1   2   2   1   0   0
##   HG00102  1  2  2   1   2   2   2   2   2   0   1   0   1   2   2   0   0
##          variant
## sample    814 988 1014 1056
##   HG00096   2   2    2    2
##   HG00097   2   2    2    2
##   HG00099   2   2    2    2
##   HG00100   2   2    2    1
##   HG00101   2   2    1    2
##   HG00102   2   2    1    1
dos <- altDosage(gds)
head(dos)
##          variant
## sample    30 69 73 161 162 195 243 253 407 431 434 610 627 645 689 756 765
##   HG00096  1  0  0   1   0   0   0   0   0   1   1   2   0   0   1   2   2
##   HG00097  2  0  0   0   0   0   0   0   0   2   1   2   0   0   0   2   2
##   HG00099  1  0  0   0   0   0   0   0   0   0   1   2   0   0   0   2   2
##   HG00100  2  0  0   1   0   0   0   0   0   1   1   2   1   0   1   2   2
##   HG00101  2  0  0   1   0   0   0   0   1   2   0   1   0   0   1   2   2
##   HG00102  1  0  0   1   0   0   0   0   0   2   1   2   1   0   0   2   2
##          variant
## sample    814 988 1014 1056
##   HG00096   0   0    0    0
##   HG00097   0   0    0    0
##   HG00099   0   0    0    0
##   HG00100   0   0    0    1
##   HG00101   0   0    1    0
##   HG00102   0   0    1    1
dos <- alleleDosage(gds, n=2)
head(dos)
##          variant
## sample    30 69 73 161 162 195 243 253 407 431 434 610 627 645 689 756 765
##   HG00096  0  0  0   0   0   0   0   0   0   0   0   0   0   0   1   2   2
##   HG00097  0  0  0   0   0   0   0   0   0   2   0   0   0   0   0   2   2
##   HG00099  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   2   2
##   HG00100  0  0  0   0   0   0   0   0   0   1   0   0   1   0   1   2   2
##   HG00101  0  0  0   0   0   0   0   0   1   0   0   0   0   0   1   2   2
##   HG00102  0  0  0   0   0   0   0   0   0   2   0   0   1   0   0   2   1
##          variant
## sample    814 988 1014 1056
##   HG00096   0   0    0    0
##   HG00097   0   0    0    0
##   HG00099   0   0    0    0
##   HG00100   0   0    0    1
##   HG00101   0   0    0    0
##   HG00102   0   0    0    1
dos <- alleleDosage(gds, n=3)
head(dos)
##          variant
## sample    30 69 73 161 162 195 243 253 407 431 434 610 627 645 689 756 765
##   HG00096  0  0  0   0   0   0   0   0   0   0   0   2   0   0   0   0   0
##   HG00097  0  0  0   0   0   0   0   0   0   0   0   1   0   0   0   0   0
##   HG00099  0  0  0   0   0   0   0   0   0   0   0   1   0   0   0   0   0
##   HG00100  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   HG00101  0  0  0   0   0   0   0   0   0   0   0   1   0   0   0   0   0
##   HG00102  0  0  0   0   0   0   0   0   0   0   0   2   0   0   0   0   0
##          variant
## sample    814 988 1014 1056
##   HG00096   0   0    0    0
##   HG00097   0   0    0    0
##   HG00099   0   0    0    0
##   HG00100   0   0    0    0
##   HG00101   0   0    0    0
##   HG00102   0   0    0    0
  1. Use the hwe function in SeqVarTools to run a Hardy-Weinberg Equilibrium test on each variant. Identify a variant with low p-value and inspect its genotypes. (Note that the HWE test is only valid for biallelic variants, and will return NA for multiallelic variants.)
seqResetFilter(gds)
## # of selected samples: 1,126
## # of selected variants: 1,120
hwe.res <- hwe(gds)
lowp <- !is.na(hwe.res$p) & hwe.res$p < 1e-4
head(hwe.res[lowp,])
##     variant.id nAA nAa naa     afreq            p         f
## 75          75 702 336  88 0.7726465 1.070663e-06 0.1506466
## 92          92 632 381 113 0.7304618 3.558878e-06 0.1407120
## 98          98 672 335 119 0.7455595 2.369695e-12 0.2158342
## 105        105  93 272 761 0.2033748 7.851777e-16 0.2544970
## 114        114 299 482 345 0.4795737 1.745346e-06 0.1424409
## 150        150 471 447 208 0.6167851 8.020208e-08 0.1602251
seqSetFilter(gds, variant.id=75)
## # of selected variants: 1
table(getGenotype(gds))
## 
## 0|0 0|1 1|0 1|1 
## 702 165 171  88
table(refDosage(gds))
## 
##   0   1   2 
##  88 336 702
seqClose(gds)