@article{SisLab2071, title = {Building ancestral recombination graphs for whole genomes}, author = {Thao Nguyen and Sy Vinh Le and Hai Ho and Si Quang Le}, publisher = {IEEE}, year = {2016}, journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics}, url = {https://eprints.uet.vnu.edu.vn/eprints/id/eprint/2071/}, abstract = {Abstract{--} The Ancestral Recombination Graph (ARG) plays an important role in human population genetics. Nevertheless, most current ARG inference algorithms are only applicable to small data sets due to their computational burden. Margarita by Minichiello and Durbin [1] can handle larger data sets; however, it is still not feasible at genome scale. We hereby propose a heuristic algorithm, called ARG4WG, to construct plausible ARGs from thousands of whole chromosome samples, in which the so-called longest shared end, i.e. the longest match between left or right ends of sequences, is used for recombination in the building process. This strategy allows ARG4WG to significantly reduce the computational cost, by working hundreds to thousands times faster than Margarita. ARG4WG leads to ARGs with fewer numbers of recombination events on real data sets. Margarita is slightly better than ARG4WG in reconstructing tree topology on simulated data. The ARGs resulted from our algorithm also performed reasonably well in an association study with 5560 haplotypes across a whole Chromosome 11 of the Gambia dataset. These results indicate that ARG4WG is a good candidate for genome-wide association study from large data sets.} }