GDS - Solutions
- Set a filter selecting only multi-allelic variants. Inspect their genotypes using the different methods you learned above. Use the
alleleDosage
method to find dosage for the second (and third, etc.) alternate allele.
seqSetFilter(gds, variant.sel=multi.allelic)
## # of selected variants: 21
geno <- seqGetData(gds, "genotype")
dim(geno)
## [1] 2 1126 21
## , , 1
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 1 1 0 1 1
## [2,] 0 1 1 1 1
##
## , , 2
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
##
## , , 3
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
##
## , , 4
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 1 0 0 1 0
## [2,] 0 0 0 0 1
##
## , , 5
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
##
## , , 6
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
##
## , , 7
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
##
## , , 8
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
##
## , , 9
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 2
## [2,] 0 0 0 0 0
##
## , , 10
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 1 2 0 0 1
## [2,] 0 2 0 2 1
##
## , , 11
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 1 0 0
## [2,] 1 1 0 1 0
##
## , , 12
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 3 1 1 1 3
## [2,] 3 3 3 1 0
##
## , , 13
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 2 0
## [2,] 0 0 0 0 0
##
## , , 14
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
##
## , , 15
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 2 0 0 0 2
## [2,] 0 0 0 2 0
##
## , , 16
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 2 2 2 2 2
## [2,] 2 2 2 2 2
##
## , , 17
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 2 2 2 2 2
## [2,] 2 2 2 2 2
##
## , , 18
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
##
## , , 19
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
##
## , , 20
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 1
## [2,] 0 0 0 0 0
##
## , , 21
##
## sample
## allele [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 2 0
geno <- getGenotype(gds)
dim(geno)
## [1] 1126 21
## variant
## sample 30 69 73 161 162 195 243 253 407 431 434
## HG00096 "1|0" "0|0" "0|0" "1|0" "0|0" "0|0" "0|0" "0|0" "0|0" "1|0" "0|1"
## HG00097 "1|1" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "2|2" "0|1"
## HG00099 "0|1" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|0" "1|0"
## HG00100 "1|1" "0|0" "0|0" "1|0" "0|0" "0|0" "0|0" "0|0" "0|0" "0|2" "0|1"
## HG00101 "1|1" "0|0" "0|0" "0|1" "0|0" "0|0" "0|0" "0|0" "2|0" "1|1" "0|0"
## HG00102 "0|1" "0|0" "0|0" "1|0" "0|0" "0|0" "0|0" "0|0" "0|0" "2|2" "0|1"
## variant
## sample 610 627 645 689 756 765 814 988 1014 1056
## HG00096 "3|3" "0|0" "0|0" "2|0" "2|2" "2|2" "0|0" "0|0" "0|0" "0|0"
## HG00097 "1|3" "0|0" "0|0" "0|0" "2|2" "2|2" "0|0" "0|0" "0|0" "0|0"
## HG00099 "1|3" "0|0" "0|0" "0|0" "2|2" "2|2" "0|0" "0|0" "0|0" "0|0"
## HG00100 "1|1" "2|0" "0|0" "0|2" "2|2" "2|2" "0|0" "0|0" "0|0" "0|2"
## HG00101 "3|0" "0|0" "0|0" "2|0" "2|2" "2|2" "0|0" "0|0" "1|0" "0|0"
## HG00102 "3|3" "0|2" "0|0" "0|0" "2|2" "1|2" "0|0" "0|0" "0|1" "0|2"
geno <- getGenotypeAlleles(gds)
head(geno)
## variant
## sample 30 69 73 161 162 195 243 253 407 431
## HG00096 "GT|GTT" "A|A" "G|G" "A|G" "A|A" "A|A" "C|C" "G|G" "T|T" "TAA|TA"
## HG00097 "GT|GT" "A|A" "G|G" "G|G" "A|A" "A|A" "C|C" "G|G" "T|T" "T|T"
## HG00099 "GTT|GT" "A|A" "G|G" "G|G" "A|A" "A|A" "C|C" "G|G" "T|T" "TA|TA"
## HG00100 "GT|GT" "A|A" "G|G" "A|G" "A|A" "A|A" "C|C" "G|G" "T|T" "TA|T"
## HG00101 "GT|GT" "A|A" "G|G" "G|A" "A|A" "A|A" "C|C" "G|G" "C|T" "TAA|TAA"
## HG00102 "GTT|GT" "A|A" "G|G" "A|G" "A|A" "A|A" "C|C" "G|G" "T|T" "T|T"
## variant
## sample 434 610 627 645 689 756 765
## HG00096 "G|GTTA" "G|G" "G|G" "G|G" "C|G" "CGAGCAT|CGAGCAT" "C|C"
## HG00097 "G|GTTA" "GCC|G" "G|G" "G|G" "G|G" "CGAGCAT|CGAGCAT" "C|C"
## HG00099 "GTTA|G" "GCC|G" "G|G" "G|G" "G|G" "CGAGCAT|CGAGCAT" "C|C"
## HG00100 "G|GTTA" "GCC|GCC" "C|G" "G|G" "G|C" "CGAGCAT|CGAGCAT" "C|C"
## HG00101 "G|G" "G|GC" "G|G" "G|G" "C|G" "CGAGCAT|CGAGCAT" "C|C"
## HG00102 "G|GTTA" "G|G" "G|C" "G|G" "G|G" "CGAGCAT|CGAGCAT" "CATTATT|C"
## variant
## sample 814 988 1014 1056
## HG00096 "CT|CT" "CGTGA|CGTGA" "C|C" "A|A"
## HG00097 "CT|CT" "CGTGA|CGTGA" "C|C" "A|A"
## HG00099 "CT|CT" "CGTGA|CGTGA" "C|C" "A|A"
## HG00100 "CT|CT" "CGTGA|CGTGA" "C|C" "A|G"
## HG00101 "CT|CT" "CGTGA|CGTGA" "CCATT|C" "A|A"
## HG00102 "CT|CT" "CGTGA|CGTGA" "C|CCATT" "A|G"
dos <- refDosage(gds)
head(dos)
## variant
## sample 30 69 73 161 162 195 243 253 407 431 434 610 627 645 689 756 765 814
## HG00096 1 2 2 1 2 2 2 2 2 1 1 0 2 2 1 0 0 2
## HG00097 0 2 2 2 2 2 2 2 2 0 1 0 2 2 2 0 0 2
## HG00099 1 2 2 2 2 2 2 2 2 2 1 0 2 2 2 0 0 2
## HG00100 0 2 2 1 2 2 2 2 2 1 1 0 1 2 1 0 0 2
## HG00101 0 2 2 1 2 2 2 2 1 0 2 1 2 2 1 0 0 2
## HG00102 1 2 2 1 2 2 2 2 2 0 1 0 1 2 2 0 0 2
## variant
## sample 988 1014 1056
## HG00096 2 2 2
## HG00097 2 2 2
## HG00099 2 2 2
## HG00100 2 2 1
## HG00101 2 1 2
## HG00102 2 1 1
dos <- altDosage(gds)
head(dos)
## variant
## sample 30 69 73 161 162 195 243 253 407 431 434 610 627 645 689 756 765 814
## HG00096 1 0 0 1 0 0 0 0 0 1 1 2 0 0 1 2 2 0
## HG00097 2 0 0 0 0 0 0 0 0 2 1 2 0 0 0 2 2 0
## HG00099 1 0 0 0 0 0 0 0 0 0 1 2 0 0 0 2 2 0
## HG00100 2 0 0 1 0 0 0 0 0 1 1 2 1 0 1 2 2 0
## HG00101 2 0 0 1 0 0 0 0 1 2 0 1 0 0 1 2 2 0
## HG00102 1 0 0 1 0 0 0 0 0 2 1 2 1 0 0 2 2 0
## variant
## sample 988 1014 1056
## HG00096 0 0 0
## HG00097 0 0 0
## HG00099 0 0 0
## HG00100 0 0 1
## HG00101 0 1 0
## HG00102 0 1 1
dos <- alleleDosage(gds, n=2)
head(dos)
## variant
## sample 30 69 73 161 162 195 243 253 407 431 434 610 627 645 689 756 765 814
## HG00096 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 2 0
## HG00097 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 2 0
## HG00099 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0
## HG00100 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 2 2 0
## HG00101 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 2 2 0
## HG00102 0 0 0 0 0 0 0 0 0 2 0 0 1 0 0 2 1 0
## variant
## sample 988 1014 1056
## HG00096 0 0 0
## HG00097 0 0 0
## HG00099 0 0 0
## HG00100 0 0 1
## HG00101 0 0 0
## HG00102 0 0 1
dos <- alleleDosage(gds, n=3)
head(dos)
## variant
## sample 30 69 73 161 162 195 243 253 407 431 434 610 627 645 689 756 765 814
## HG00096 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
## HG00097 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## HG00099 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## HG00100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## HG00101 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## HG00102 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
## variant
## sample 988 1014 1056
## HG00096 0 0 0
## HG00097 0 0 0
## HG00099 0 0 0
## HG00100 0 0 0
## HG00101 0 0 0
## HG00102 0 0 0
- Use the
hwe
function in SeqVarTools to run a Hardy-Weinberg Equilibrium test on each variant. Identify a variant with low p-value and inspect its genotypes. (Note that the HWE test is only valid for biallelic variants, and will return NA
for multiallelic variants.)
## # of selected samples: 1,126
## # of selected variants: 1,120
hwe.res <- hwe(gds)
lowp <- !is.na(hwe.res$p) & hwe.res$p < 1e-4
head(hwe.res[lowp,])
## variant.id nAA nAa naa afreq p f
## 75 75 702 336 88 0.7726465 1.070663e-06 0.1506466
## 92 92 632 381 113 0.7304618 3.558878e-06 0.1407120
## 98 98 672 335 119 0.7455595 2.369695e-12 0.2158342
## 105 105 93 272 761 0.2033748 7.851777e-16 0.2544970
## 114 114 299 482 345 0.4795737 1.745346e-06 0.1424409
## 150 150 471 447 208 0.6167851 8.020208e-08 0.1602251
seqSetFilter(gds, variant.id=75)
## # of selected variants: 1
##
## 0|0 0|1 1|0 1|1
## 702 165 171 88
##
## 0 1 2
## 88 336 702