{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T23:52:06Z","timestamp":1773273126546,"version":"3.50.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030596118","type":"print"},{"value":"9783030596125","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-59612-5_6","type":"book-chapter","created":{"date-parts":[[2020,9,17]],"date-time":"2020-09-17T15:56:47Z","timestamp":1600358207000},"page":"68-84","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Scalable Reference Genome Assembly from Compressed Pan-Genome Index with Spark"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8851-4265","authenticated-orcid":false,"given":"Altti Ilari","family":"Maarala","sequence":"first","affiliation":[]},{"given":"Ossi","family":"Arasalo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8244-2299","authenticated-orcid":false,"given":"Daniel","family":"Valenzuela","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4547-2701","authenticated-orcid":false,"given":"Keijo","family":"Heljanko","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4454-1493","authenticated-orcid":false,"given":"Veli","family":"M\u00e4kinen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,18]]},"reference":[{"key":"6_CR1","doi-asserted-by":"publisher","unstructured":"Marcshall, T., Marz, M., Abeel, T., et al.: Computational pan-genomics: status, promises and challenges. The Computational Pan-Genomics Consortium. Brief. Bioinform. (2016). https:\/\/doi.org\/10.1093\/bib\/bbw089","DOI":"10.1093\/bib\/bbw089"},{"key":"6_CR2","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1038\/s41576-020-0210-7","volume":"21","author":"RM Sherman","year":"2020","unstructured":"Sherman, R.M., Salzberg, S.L.: Pan-genomics in the human genome era. Nat. Rev. Genet. 21, 243\u2013254 (2020). https:\/\/doi.org\/10.1038\/s41576-020-0210-7","journal-title":"Nat. Rev. Genet."},{"key":"6_CR3","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1038\/ng.3257","volume":"47","author":"A Dilthey","year":"2015","unstructured":"Dilthey, A., Cox, C., Iqbal, Z., Nelson, M., McVean, G.: Improved genome inference in the MHC using a population reference graph. Nat. Genet. 47, 682\u2013688 (2015). https:\/\/doi.org\/10.1038\/ng.3257","journal-title":"Nat. Genet."},{"key":"6_CR4","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1038\/nature15393","volume":"526","author":"A Auton","year":"2015","unstructured":"Auton, A., Abecasis, G., Altshuler, D., et al.: A global reference for human genetic variation. Nature 526, 68\u201374 (2015). https:\/\/doi.org\/10.1038\/nature15393","journal-title":"Nature"},{"key":"6_CR5","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1016\/j.nmni.2015.06.005","volume":"7","author":"L Rouli","year":"2015","unstructured":"Rouli, L., Merhej, V., Fournier, P.E., Raoult, D.: The bacterial pangenome as a new tool for analysing pathogenic bacteria. New Microbes New Infect. 7, 72\u201385 (2015)","journal-title":"New Microbes New Infect."},{"key":"6_CR6","doi-asserted-by":"publisher","first-page":"6881","DOI":"10.1128\/JB.00619-08","volume":"190","author":"DA Rasko","year":"2008","unstructured":"Rasko, D.A., Rosovitz, M.J., Myers, G.S.A., et al.: The pangenome structure of Escherichia coli: comparative genomic analysis of E. coli commensal and pathogenic isolates. J. Bacteriol. 190, 6881\u20136893 (2008)","journal-title":"J. Bacteriol."},{"key":"6_CR7","doi-asserted-by":"publisher","first-page":"3199","DOI":"10.1128\/jb.00183-12","volume":"194","author":"E Trost","year":"2012","unstructured":"Trost, E., Blom, J., Soares, S.C., et al.: Pangenomic study of Corynebacterium diphtheriae that provides insights into the genomic diversity of pathogenic isolates from cases of classical diphtheria, endocarditis, and pneumonia. J. Bacteriol. 194, 3199\u20133215 (2012). https:\/\/doi.org\/10.1128\/jb.00183-12","journal-title":"J. Bacteriol."},{"key":"6_CR8","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1038\/ng.3801","volume":"49","author":"B Kehr","year":"2017","unstructured":"Kehr, B., Helgadottir, A., Melsted, P., et al.: Diversity in non-repetitive human sequences not found in the reference genome. Nat. Genet. 49, 588\u2013593 (2017). https:\/\/doi.org\/10.1038\/ng.3801","journal-title":"Nat. Genet."},{"key":"6_CR9","doi-asserted-by":"publisher","first-page":"13950","DOI":"10.1073\/pnas.0506758102","volume":"102","author":"H Tettelin","year":"2005","unstructured":"Tettelin, H., Masignani, V., Cieslewicz, M.J., et al.: Genome analysis of multiple pathogenic isolates of Streptococcus agalactiae: implications for the microbial \u2018pan-genome\u2019. Proc. Natl. Acad. Sci. U.S.A. 102, 13950\u201313955 (2005). https:\/\/doi.org\/10.1073\/pnas.0506758102","journal-title":"Proc. Natl. Acad. Sci. U.S.A."},{"key":"6_CR10","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1038\/s41588-018-0273-y","volume":"51","author":"RM Sherman","year":"2019","unstructured":"Sherman, R.M., Forman, J., Antonescu, V., et al.: Assembly of a pan-genome from deep sequencing of 910 humans of African descent. Nat. Genet. 51, 30\u201335 (2019). https:\/\/doi.org\/10.1038\/s41588-018-0273-y","journal-title":"Nat. Genet."},{"key":"6_CR11","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1038\/nature18964","volume":"538","author":"S Mallick","year":"2016","unstructured":"Mallick, S., Li, H., Lipson, M., et al.: The Simons genome diversity project: 300 genomes from 142 diverse populations. Nature 538, 201\u2013206 (2016). https:\/\/doi.org\/10.1038\/nature18964","journal-title":"Nature"},{"key":"6_CR12","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1186\/s13059-019-1751-y","volume":"20","author":"Z Duan","year":"2019","unstructured":"Duan, Z., Qiao, Y., Lu, J., et al.: HUPAN: a pan-genome analysis pipeline for human genomes. Genome Biol. 20, 149 (2019). https:\/\/doi.org\/10.1186\/s13059-019-1751-y","journal-title":"Genome Biol."},{"issue":"15","key":"6_CR13","doi-asserted-by":"publisher","first-page":"2408","DOI":"10.1093\/bioinformatics\/btx170","volume":"33","author":"Z Hu","year":"2017","unstructured":"Hu, Z., et al.: EUPAN enables pan-genome studies of a large number of eukaryotic genomes. Bioinformatics 33(15), 2408\u20132409 (2017). https:\/\/doi.org\/10.1093\/bioinformatics\/btx170","journal-title":"Bioinformatics"},{"key":"6_CR14","doi-asserted-by":"publisher","first-page":"278","DOI":"10.1038\/s41588-018-0041-z","volume":"50","author":"Q Zhao","year":"2018","unstructured":"Zhao, Q., Feng, Q., Lu, H., et al.: Pan-genome analysis highlights the extent of genomic variation in cultivated and wild rice. Nat. Genet. 50, 278\u2013284 (2018). https:\/\/doi.org\/10.1038\/s41588-018-0041-z","journal-title":"Nat. Genet."},{"issue":"6","key":"6_CR15","doi-asserted-by":"publisher","first-page":"928","DOI":"10.1093\/bioinformatics\/btx702","volume":"34","author":"AI Maarala","year":"2018","unstructured":"Maarala, A.I., Bzhalava, Z., Dillner, J., Heljanko, K., Bzhalava, D.: ViraPipe: scalable parallel pipeline for viral metagenome analysis from next generation sequencing reads. Bioinformatics 34(6), 928\u2013935 (2018). https:\/\/doi.org\/10.1093\/bioinformatics\/btx702","journal-title":"Bioinformatics"},{"key":"6_CR16","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1186\/s12864-018-4465-8","volume":"19","author":"D Valenzuela","year":"2018","unstructured":"Valenzuela, D., Norri, T., V\u00e4lim\u00e4ki, N., et al.: Towards pan-genome read alignment to improve variation calling. BMC Genomics 19, 87 (2018). https:\/\/doi.org\/10.1186\/s12864-018-4465-8","journal-title":"BMC Genomics"},{"issue":"2","key":"6_CR17","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1109\/TCBB.2013.2297101","volume":"11","author":"J Siren","year":"2014","unstructured":"Siren, J., V\u00e4lim\u00e4ki, N., M\u00e4kinen, V.: Indexing graphs for path queries with applications in genome research. IEEE\/ACM Trans. Comput. Biol. Bioinform. 11(2), 375\u2013388 (2014). https:\/\/doi.org\/10.1109\/TCBB.2013.2297101","journal-title":"IEEE\/ACM Trans. Comput. Biol. Bioinform."},{"key":"6_CR18","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1093\/bioinformatics\/btt215","volume":"29","author":"L Huang","year":"2013","unstructured":"Huang, L., Popic, V., Batzoglou, S.: Short read alignment with populations of genomes. Bioinformatics 29, 361\u2013370 (2013). https:\/\/doi.org\/10.1093\/bioinformatics\/btt215","journal-title":"Bioinformatics"},{"key":"6_CR19","doi-asserted-by":"publisher","first-page":"R98","DOI":"10.1186\/gb-2009-10-9-r98","volume":"10","author":"K Schneeberger","year":"2009","unstructured":"Schneeberger, K., Hagmann, J., Ossowski, S., et al.: Simultaneous alignment of short reads against multiple genomes. Genome Biol. 10, R98 (2009)","journal-title":"Genome Biol."},{"key":"6_CR20","unstructured":"Paten, B., Novak, A., Haussler, D.: Mapping to a reference genome structure. ArXiv http:\/\/arxiv.org\/abs\/1404.5010 (2014)"},{"key":"6_CR21","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"D Jeffrey","year":"2008","unstructured":"Jeffrey, D., Sanjay, G.: MapReduce: simplified data processing on large clusters. Commun. ACM 51, 107\u2013113 (2008). https:\/\/doi.org\/10.1145\/1327452.1327492","journal-title":"Commun. ACM"},{"key":"6_CR22","unstructured":"Zaharia, M., Chowdhury, M., Franklin, M.J., Shenker, S., Stoica, I.: Spark: cluster computing with working sets. In Proceedings of the 2nd USENIX conference on Hot topics in cloud computing (HotCloud 2010), p. 10. USENIX Association, USA (2010)"},{"key":"6_CR23","unstructured":"Zaharia, M., Chowdhury, M., Das, T., et al.: Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In: Proceedings of the 9th USENIX conference on Networked Systems Design and Implementation (NSDI 2012), Berkeley, CA, USA, p. 2 (2012)"},{"issue":"14","key":"6_CR24","doi-asserted-by":"publisher","first-page":"1754","DOI":"10.1093\/bioinformatics\/btp324","volume":"25","author":"H Li","year":"2009","unstructured":"Li, H., Durbin, R.: Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics 25(14), 1754\u20131760 (2009). https:\/\/doi.org\/10.1093\/bioinformatics\/btp324","journal-title":"Bioinformatics"},{"key":"6_CR25","doi-asserted-by":"publisher","first-page":"R25","DOI":"10.1186\/gb-2009-10-3-r25","volume":"10","author":"B Langmead","year":"2009","unstructured":"Langmead, B., Trapnell, C., Pop, M., Salzberg, S.L.: Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biol. 10, R25 (2009). https:\/\/doi.org\/10.1186\/gb-2009-10-3-r25","journal-title":"Genome Biol."},{"issue":"6","key":"6_CR26","doi-asserted-by":"publisher","first-page":"876","DOI":"10.1093\/bioinformatics\/bts054","volume":"28","author":"M Niemenmaa","year":"2012","unstructured":"Niemenmaa, M., Kallio, A., Schumacher, A., Klemel\u00e4, P., Korpelainen, E., Heljanko, K.: Hadoop-BAM: directly manipulating next generation sequencing data in the cloud. Bioinformatics 28(6), 876\u2013877 (2012). https:\/\/doi.org\/10.1093\/bioinformatics\/bts054","journal-title":"Bioinformatics"},{"issue":"15","key":"6_CR27","doi-asserted-by":"publisher","first-page":"2482","DOI":"10.1093\/bioinformatics\/btv179","volume":"31","author":"D Decap","year":"2015","unstructured":"Decap, D., Reumers, J., Herzeel, C., Costanza, P., Fostier, J.: Halvade: scalable sequence analysis with MapReduce. Bioinformatics 31(15), 2482\u20132488 (2015)","journal-title":"Bioinformatics"},{"issue":"3","key":"6_CR28","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1109\/TIT.1977.1055714","volume":"23","author":"J Ziv","year":"1977","unstructured":"Ziv, J., Lempel, A.: A universal algorithm for sequential data compression. IEEE Trans. Inf. Theory 23(3), 337\u2013343 (1977). https:\/\/doi.org\/10.1109\/TIT.1977.1055714","journal-title":"IEEE Trans. Inf. Theory"},{"key":"6_CR29","unstructured":"Burrows, M., Wheeler, D.J.: A block-sorting lossless data compression algorithm. Technical report 124, Palo Alto, CA, Digital Equipment Corporation (1994)"},{"key":"6_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1007\/978-3-319-38851-9_22","volume-title":"Experimental Algorithms","author":"D Valenzuela","year":"2016","unstructured":"Valenzuela, D.: CHICO: a compressed hybrid index for repetitive collections. In: Goldberg, A.V., Kulikov, A.S. (eds.) SEA 2016. LNCS, vol. 9685, pp. 326\u2013338. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-38851-9_22"},{"key":"6_CR31","doi-asserted-by":"publisher","unstructured":"Valenzuela, D., M\u00e4kinen, V.: CHIC: a short read aligner for pan-genomic references. bioRxiv 178129 (2017). https:\/\/doi.org\/10.1101\/178129","DOI":"10.1101\/178129"},{"issue":"3","key":"6_CR32","doi-asserted-by":"publisher","first-page":"265","DOI":"10.14778\/2078331.2078341","volume":"5","author":"C Hoobin","year":"2011","unstructured":"Hoobin, C., Puglisi, S.J., Zobel, J.: Relative Lempel-Ziv factorization for efficient storage and retrieval of web collections. Proc. VLDB Endow. 5(3), 265\u2013273 (2011). https:\/\/doi.org\/10.14778\/2078331.2078341","journal-title":"Proc. VLDB Endow."},{"key":"6_CR33","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.jda.2014.03.001","volume":"27","author":"S Rajasekaran","year":"2014","unstructured":"Rajasekaran, S., Nicolae, M.: An elegant algorithm for the construction of suffix arrays. J. Discrete Algorithms 27, 21\u201328 (2014). https:\/\/doi.org\/10.1016\/j.jda.2014.03.001","journal-title":"J. Discrete Algorithms"}],"container-title":["Lecture Notes in Computer Science","Big Data \u2013 BigData 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-59612-5_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T12:34:15Z","timestamp":1726662855000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-59612-5_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030596118","9783030596125"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-59612-5_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"18 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"BIGDATA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Big Data","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Honolulu, HI","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"bigdata2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.bigdatacongress.org\/2020\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}