{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T12:16:30Z","timestamp":1766578590549,"version":"3.41.0"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319987019"},{"type":"electronic","value":"9783319987026"}],"license":[{"start":{"date-parts":[[2018,8,17]],"date-time":"2018-08-17T00:00:00Z","timestamp":1534464000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-319-98702-6_25","type":"book-chapter","created":{"date-parts":[[2018,8,16]],"date-time":"2018-08-16T09:27:55Z","timestamp":1534411675000},"page":"208-215","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["A DNA Sequence Corpus for Compression Benchmark"],"prefix":"10.1007","author":[{"given":"Diogo","family":"Pratas","sequence":"first","affiliation":[]},{"given":"Armando J.","family":"Pinho","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,8,17]]},"reference":[{"key":"25_CR1","doi-asserted-by":"crossref","unstructured":"Grumbach, S., Tahi, F.: Compression of DNA sequences. In: Proceedings of the Data Compression Conference, DCC-1993, Snowbird, Utah, pp. 340\u2013350 (1993)","DOI":"10.1109\/DCC.1993.253115"},{"issue":"6","key":"25_CR2","doi-asserted-by":"publisher","first-page":"875","DOI":"10.1016\/0306-4573(94)90014-0","volume":"30","author":"S Grumbach","year":"1994","unstructured":"Grumbach, S., Tahi, F.: A new challenge for compression algorithms: genetic sequences. Inf. Process. Manage. 30(6), 875\u2013886 (1994)","journal-title":"Inf. Process. Manage."},{"key":"25_CR3","first-page":"131","volume":"13","author":"E Rivals","year":"1997","unstructured":"Rivals, E., Delgrange, O., Delahaye, J.P., Dauchet, M., Delorme, M.O., H\u00e9naut, A., Ollivier, E.: Detection of significant patterns by compression algorithms: the case of approximate tandem repeats in DNA sequences. Comput. Appl. Biosci. 13, 131\u2013136 (1997)","journal-title":"Comput. Appl. Biosci."},{"key":"25_CR4","doi-asserted-by":"crossref","unstructured":"Chen, T., Sullivan, G.J., Puri, A.: H.263 (including H.263+) and other ITU-T video coding standards. In: Puri, A., Chen, T., (eds.) Multimedia Systems, Standards, and Networks pp. 55\u201385. Marcel Dekker (2000)","DOI":"10.1201\/9780203908440.ch3"},{"issue":"12","key":"25_CR5","doi-asserted-by":"publisher","first-page":"1696","DOI":"10.1093\/bioinformatics\/18.12.1696","volume":"18","author":"X Chen","year":"2002","unstructured":"Chen, X., Li, M., Ma, B., Tromp, J.: DNACompress: fast and effective DNA sequence compression. Bioinformatics 18(12), 1696\u20131698 (2002)","journal-title":"Bioinformatics"},{"key":"25_CR6","doi-asserted-by":"crossref","unstructured":"Tabus, I., Korodi, G., Rissanen, J.: DNA sequence compression using the normalized maximum likelihood model for discrete regression. In: Proceedings of the Data Compression Conference, DCC-2003, Snowbird, Utah, pp. 253\u2013262 (2003)","DOI":"10.1109\/DCC.2003.1194016"},{"key":"25_CR7","doi-asserted-by":"crossref","unstructured":"Korodi, G., Tabus, I.: Normalized maximum likelihood model of order-1 for the compression of DNA sequences. In: Proceedings of the Data Compression Conference, DCC-2007, Snowbird, Utah, pp. 33\u201342, March 2007","DOI":"10.1109\/DCC.2007.60"},{"key":"25_CR8","doi-asserted-by":"crossref","unstructured":"Cao, M.D., Dix, T.I., Allison, L., Mears, C.: A simple statistical algorithm for biological sequence compression. In: Proceedings of the Data Compression Conference, DCC-2007, Snowbird, Utah, pp. 43\u201352, March 2007","DOI":"10.1109\/DCC.2007.7"},{"issue":"6","key":"25_CR9","doi-asserted-by":"publisher","first-page":"e21588","DOI":"10.1371\/journal.pone.0021588","volume":"6","author":"AJ Pinho","year":"2011","unstructured":"Pinho, A.J., Ferreira, P.J.S.G., Neves, A.J.R., Bastos, C.A.C.: On the representability of complete genomes by multiple competing finite-context (Markov) models. PLoS ONE 6(6), e21588 (2011)","journal-title":"PLoS ONE"},{"issue":"3","key":"25_CR10","first-page":"245","volume":"33","author":"A Gupta","year":"2011","unstructured":"Gupta, A., Agarwal, S.: A novel approach for compressing DNA sequences using semi-statistical compressor. Int. J. Comput. Appl. 33(3), 245\u2013251 (2011)","journal-title":"Int. J. Comput. Appl."},{"issue":"5","key":"25_CR11","doi-asserted-by":"publisher","first-page":"643","DOI":"10.1109\/TEVC.2011.2160399","volume":"15","author":"Z Zhu","year":"2011","unstructured":"Zhu, Z., Zhou, J., Ji, Z., Shi, Y.: DNA sequence compression using adaptive particle swarm optimization-based memetic algorithm. IEEE Trans. Evol. Comput. 15(5), 643\u2013658 (2011)","journal-title":"IEEE Trans. Evol. Comput."},{"issue":"4","key":"25_CR12","doi-asserted-by":"publisher","first-page":"785","DOI":"10.1007\/s12038-012-9230-6","volume":"37","author":"T Bose","year":"2012","unstructured":"Bose, T., Mohammed, M.H., Dutta, A., Mande, S.S.: BIND-an algorithm for loss-less compression of nucleotide sequence data. J. Biosci. 37(4), 785\u2013789 (2012)","journal-title":"J. Biosci."},{"key":"25_CR13","doi-asserted-by":"crossref","unstructured":"Dai, W., Xiong, H., Jiang, X., Ohno-Machado, L.: An adaptive difference distribution-based coding with hierarchical tree structure for DNA sequence compression. In: Proceedings of the Data Compression Conference, DCC-2013, pp. 371\u2013380. IEEE (2013)","DOI":"10.1109\/DCC.2013.45"},{"issue":"11","key":"25_CR14","doi-asserted-by":"publisher","first-page":"e80377","DOI":"10.1371\/journal.pone.0080377","volume":"8","author":"P Li","year":"2013","unstructured":"Li, P., Wang, S., Kim, J., Xiong, H., Ohno-Machado, L., Jiang, X.: DNA-COMPACT: DNA compression based on a pattern-aware contextual modeling technique. PLoS ONE 8(11), e80377 (2013)","journal-title":"PLoS ONE"},{"key":"25_CR15","doi-asserted-by":"crossref","unstructured":"Guo, H., Chen, M., Liu, X., Xie, M.: Genome compression based on Hilbert space filling curve. In: Proceedings of the 3rd International Conference on Management, Education, Information and Control (MEICI 2015), Shenyang, China, pp. 29\u201331 (2015)","DOI":"10.2991\/meici-15.2015.294"},{"issue":"6","key":"25_CR16","doi-asserted-by":"publisher","first-page":"1275","DOI":"10.1109\/TCBB.2015.2430331","volume":"12","author":"X Xie","year":"2015","unstructured":"Xie, X., Zhou, S., Guan, J.: CoGI: towards compressing genomes as an image. IEEE\/ACM Trans. Comput. Biol. Bioinf. 12(6), 1275\u20131285 (2015)","journal-title":"IEEE\/ACM Trans. Comput. Biol. Bioinf."},{"key":"25_CR17","doi-asserted-by":"crossref","unstructured":"Pratas, D., Pinho, A.J., Ferreira, P.J.S.G.: Efficient compression of genomic sequences. In: Proceedings of the Data Compression Conference, DCC-2016, Snowbird, Utah, 231\u2013240, March 2016","DOI":"10.1109\/DCC.2016.60"},{"issue":"4","key":"25_CR18","doi-asserted-by":"publisher","first-page":"56","DOI":"10.3390\/info7040056","volume":"7","author":"M Hosseini","year":"2016","unstructured":"Hosseini, M., Pratas, D., Pinho, A.J.: A survey on data compression methods for biological sequences. Information 7(4), 56 (2016)","journal-title":"Information"},{"key":"25_CR19","doi-asserted-by":"publisher","first-page":"1397","DOI":"10.1002\/spe.619","volume":"34","author":"G Manzini","year":"2004","unstructured":"Manzini, G., Rastero, M.: A simple and fast DNA compressor. Software-Pract. Experience 34, 1397\u20131411 (2004)","journal-title":"Software-Pract. Experience"},{"key":"25_CR20","doi-asserted-by":"crossref","unstructured":"Pratas, D., Pinho, A.J.: On the approximation of the Kolmogorov complexity for DNA sequences. In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 259\u2013266. Springer (2017)","DOI":"10.1007\/978-3-319-58838-4_29"},{"issue":"11","key":"25_CR21","doi-asserted-by":"publisher","first-page":"e79922","DOI":"10.1371\/journal.pone.0079922","volume":"8","author":"AJ Pinho","year":"2013","unstructured":"Pinho, A.J., Garcia, S.P., Pratas, D., Ferreira, P.J.S.G.: DNA sequences at a glance. PLoS ONE 8(11), e79922 (2013)","journal-title":"PLoS ONE"},{"issue":"8","key":"25_CR22","doi-asserted-by":"publisher","first-page":"e0183416","DOI":"10.1371\/journal.pone.0183416","volume":"12","author":"E Sales","year":"2017","unstructured":"Sales, E., Viruel, J., Domingo, C., Marqu\u00e9s, L.: Genome wide association analysis of cold tolerance at germination in temperate japonica rice (Oryza sativa L.) varieties. PLoS ONE 12(8), e0183416 (2017)","journal-title":"PLoS ONE"},{"issue":"9","key":"25_CR23","doi-asserted-by":"publisher","first-page":"3031","DOI":"10.3382\/ps\/pex151","volume":"96","author":"N Hudson","year":"2017","unstructured":"Hudson, N., Hawken, R., Okimoto, R., Sapp, R., Reverter, A.: Data compression can discriminate broilers by selection line, detect haplotypes, and estimate genetic potential for complex phenotypes. Poult. Sci. 96(9), 3031\u20133038 (2017)","journal-title":"Poult. Sci."},{"issue":"4","key":"25_CR24","first-page":"378","volume":"54","author":"VA Keck","year":"2015","unstructured":"Keck, V.A., Edgerton, D.S., Hajizadeh, S., Swift, L.L., Dupont, W.D., Lawrence, C., Boyd, K.L.: Effects of habitat complexity on pair-housed zebrafish. J. Am. Assoc. Lab. Anim. Sci. 54(4), 378\u2013383 (2015)","journal-title":"J. Am. Assoc. Lab. Anim. Sci."},{"issue":"22","key":"25_CR25","doi-asserted-by":"publisher","first-page":"7477","DOI":"10.1523\/JNEUROSCI.0758-12.2012","volume":"32","author":"Y Goldshmit","year":"2012","unstructured":"Goldshmit, Y., Sztal, T.E., Jusuf, P.R., Hall, T.E., Nguyen-Chi, M., Currie, P.D.: Fgf-dependent glial cell bridges facilitate spinal cord regeneration in zebrafish. J. Neurosci. 32(22), 7477\u20137492 (2012)","journal-title":"J. Neurosci."},{"issue":"2","key":"25_CR26","doi-asserted-by":"publisher","first-page":"eaao3424","DOI":"10.1126\/sciadv.aao3424","volume":"4","author":"C Bamberger","year":"2018","unstructured":"Bamberger, C., Mart\u00ednez-Bartolom\u00e9, S., Montgomery, M., Lavall\u00e9e-Adam, M., Yates, J.R.: Increased proteomic complexity in Drosophila hybrids during development. Sci. Adv. 4(2), eaao3424 (2018)","journal-title":"Sci. Adv."},{"issue":"6874","key":"25_CR27","doi-asserted-by":"publisher","first-page":"871","DOI":"10.1038\/nature724","volume":"415","author":"V Wood","year":"2002","unstructured":"Wood, V., et al.: The genome sequence of Schizosaccharomyces pombe. Nature 415(6874), 871\u201380 (2002)","journal-title":"Nature"},{"key":"25_CR28","doi-asserted-by":"crossref","unstructured":"Pinho, A.J., Pratas, D., Ferreira, P.J.S.G.: Authorship attribution using relative compression. In: Proceedings of the Data Compression Conference, DCC-2016, Snowbird, Utah, March 2016","DOI":"10.1109\/DCC.2016.53"},{"issue":"35","key":"25_CR29","doi-asserted-by":"publisher","first-page":"14902","DOI":"10.1073\/pnas.0907740106","volume":"106","author":"SM Rich","year":"2009","unstructured":"Rich, S.M., Leendertz, F.H., Xu, G., LeBreton, M., Djoko, C.F., Aminake, M.N., Takang, E.E., Diffo, J.L., Pike, B.L., Rosenthal, B.M., et al.: The origin of malignant malaria. Proc. Natl. Acad. Sci. 106(35), 14902\u201314907 (2009)","journal-title":"Proc. Natl. Acad. Sci."},{"issue":"3","key":"25_CR30","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1038\/nrmicro2298","volume":"8","author":"O Tenaillon","year":"2010","unstructured":"Tenaillon, O., Skurnik, D., Picard, B., Denamur, E.: The population genetics of commensal Escherichia coli. Nat. Rev. Microbiol. 8(3), 207 (2010)","journal-title":"Nat. Rev. Microbiol."},{"issue":"s1","key":"25_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1111\/hel.12165","volume":"19","author":"LH Eusebi","year":"2014","unstructured":"Eusebi, L.H., Zagari, R.M., Bazzoli, F.: Epidemiology of Helicobacter pylori infection. Helicobacter 19(s1), 1\u20135 (2014)","journal-title":"Helicobacter"},{"issue":"2","key":"25_CR32","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1099\/ijs.0.02826-0","volume":"54","author":"S Nakagawa","year":"2004","unstructured":"Nakagawa, S., Takai, K., Horikoshi, K., Sako, Y.: Aeropyrum camini sp. nov., a strictly aerobic, hyperthermophilic archaeon from a deep-sea hydrothermal vent chimney. Int. J. Syst. Evol. Microbiol. 54(2), 329\u2013335 (2004)","journal-title":"Int. J. Syst. Evol. Microbiol."},{"issue":"21","key":"25_CR33","doi-asserted-by":"publisher","first-page":"6086","DOI":"10.1128\/JB.05953-11","volume":"193","author":"H Liu","year":"2011","unstructured":"Liu, H., Wu, Z., Li, M., Zhang, F., Zheng, H., Han, J., Liu, J., Zhou, J., Wang, S., Xiang, H.: Complete genome sequence of Haloarcula hispanica, a model haloarchaeon for studying genetics, metabolism, and virus-host interaction. J. Bacteriol. 193(21), 6086\u20136087 (2011)","journal-title":"J. Bacteriol."},{"key":"25_CR34","doi-asserted-by":"publisher","first-page":"15131","DOI":"10.1038\/srep15131","volume":"5","author":"W Zhang","year":"2015","unstructured":"Zhang, W., Zhou, J., Liu, T., Yu, Y., Pan, Y., Yan, S., Wang, Y.: Four novel algal virus genomes discovered from Yellowstone Lake metagenomes. Sci. Rep. 5, 15131 (2015)","journal-title":"Sci. Rep."},{"issue":"15","key":"25_CR35","doi-asserted-by":"publisher","first-page":"2421","DOI":"10.1093\/bioinformatics\/btv189","volume":"31","author":"RM Silva","year":"2015","unstructured":"Silva, R.M., Pratas, D., Castro, L., Pinho, A.J., Ferreira, P.J.S.G.: Three minimal sequences found in Ebola virus genomes and absent from human DNA. Bioinformatics 31(15), 2421\u20132425 (2015)","journal-title":"Bioinformatics"},{"issue":"7","key":"25_CR36","doi-asserted-by":"publisher","first-page":"2143","DOI":"10.1111\/1462-2920.12923","volume":"18","author":"J Wang","year":"2016","unstructured":"Wang, J., Gao, Y., Zhao, F.: Phage-bacteria interaction network in human oral microbiome. Environ. Microbiol. 18(7), 2143\u20132158 (2016)","journal-title":"Environ. Microbiol."}],"container-title":["Advances in Intelligent Systems and Computing","Practical Applications of Computational Biology and Bioinformatics, 12th International Conference"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-98702-6_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T10:48:52Z","timestamp":1751798932000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-98702-6_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,8,17]]},"ISBN":["9783319987019","9783319987026"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-98702-6_25","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"type":"print","value":"2194-5357"},{"type":"electronic","value":"2194-5365"}],"subject":[],"published":{"date-parts":[[2018,8,17]]}}}