{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T04:20:10Z","timestamp":1768278010413,"version":"3.49.0"},"reference-count":10,"publisher":"Oxford University Press (OUP)","issue":"12","license":[{"start":{"date-parts":[[2018,11,14]],"date-time":"2018-11-14T00:00:00Z","timestamp":1542153600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"name":"National Science Center","award":["2014\/13\/B\/NZ2\/01248"],"award-info":[{"award-number":["2014\/13\/B\/NZ2\/01248"]}]},{"name":"PRELUDIUM","award":["2014\/13\/N\/ST6\/01843"],"award-info":[{"award-number":["2014\/13\/N\/ST6\/01843"]}]},{"name":"PRELUDIUM","award":["2015\/17\/D\/ST6\/04063"],"award-info":[{"award-number":["2015\/17\/D\/ST6\/04063"]}]},{"name":"Polish budget funds","award":["IP2015019874"],"award-info":[{"award-number":["IP2015019874"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,6,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>Efficient processing of large-scale genomic datasets has recently become possible due to the application of \u2018big data\u2019 technologies in bioinformatics pipelines. We present SeQuiLa\u2014a distributed, ANSI SQL-compliant solution for speedy querying and processing of genomic intervals that is available as an Apache Spark package. Proposed range join strategy is significantly (\u223c22\u00d7) faster than the default Apache Spark implementation and outperforms other state-of-the-art tools for genomic intervals processing.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>The project is available at http:\/\/biodatageeks.org\/sequila\/.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/bty940","type":"journal-article","created":{"date-parts":[[2018,11,13]],"date-time":"2018-11-13T12:47:22Z","timestamp":1542113242000},"page":"2156-2158","source":"Crossref","is-referenced-by-count":8,"title":["SeQuiLa: an elastic, fast and scalable SQL-oriented solution for processing and querying genomic intervals"],"prefix":"10.1093","volume":"35","author":[{"given":"Marek","family":"Wiewi\u00f3rka","sequence":"first","affiliation":[{"name":"Institute of Computer Science, Warsaw University of Technology, Warsaw, Poland"}]},{"given":"Anna","family":"Le\u015bniewska","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Poznan University of Technology, Pozna\u0144, Poland"}]},{"given":"Agnieszka","family":"Szmur\u0142o","sequence":"additional","affiliation":[{"name":"Institute of Computer Science, Warsaw University of Technology, Warsaw, Poland"}]},{"given":"Kacper","family":"St\u0119pie\u0144","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Poznan University of Technology, Pozna\u0144, Poland"}]},{"given":"Mateusz","family":"Borowiak","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Poznan University of Technology, Pozna\u0144, Poland"}]},{"given":"Micha\u0142","family":"Okoniewski","sequence":"additional","affiliation":[{"name":"Scientific IT Services, ETH Zurich, Z\u00fcrich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0941-4571","authenticated-orcid":false,"given":"Tomasz","family":"Gambin","sequence":"additional","affiliation":[{"name":"Institute of Computer Science, Warsaw University of Technology, Warsaw, Poland"}]}],"member":"286","published-online":{"date-parts":[[2018,11,14]]},"reference":[{"key":"2023012713224799600_bty940-B1","first-page":"348","volume-title":"Introduction to Algorithms","author":"Cormen","year":"2009"},{"key":"2023012713224799600_bty940-B2","doi-asserted-by":"crossref","first-page":"1457","DOI":"10.1093\/bioinformatics\/btx808","article-title":"Analyzing large scale genomic data on the cloud with Sparkhit","volume":"34","author":"Huang","year":"2018","journal-title":"Bioinformatics"},{"key":"2023012713224799600_bty940-B3","doi-asserted-by":"crossref","first-page":"63.","DOI":"10.1186\/s12859-016-0904-1","article-title":"GenAp: a distributed SQL interface for genomic data","volume":"17","author":"Kozanitis","year":"2016","journal-title":"BMC Bioinformatics"},{"key":"2023012713224799600_bty940-B4","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1093\/bioinformatics\/btt250","article-title":"Using Genome Query Language to uncover genetic variation","volume":"30","author":"Kozanitis","year":"2014","journal-title":"Bioinformatics"},{"key":"2023012713224799600_bty940-B5","doi-asserted-by":"crossref","first-page":"e1003118.","DOI":"10.1371\/journal.pcbi.1003118","article-title":"Software for computing and annotating genomic ranges","volume":"9","author":"Lawrence","year":"2013","journal-title":"PLoS Comput. Biol"},{"key":"2023012713224799600_bty940-B6","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1093\/bioinformatics\/bts652","article-title":"Binary Interval Search: a scalable algorithm for counting interval intersections","volume":"29","author":"Layer","year":"2013","journal-title":"Bioinformatics"},{"key":"2023012713224799600_bty940-B7","doi-asserted-by":"crossref","first-page":"2078","DOI":"10.1093\/bioinformatics\/btp352","article-title":"The Sequence Alignment\/Map format and SAMtools","volume":"25","author":"Li","year":"2009","journal-title":"Bioinformatics"},{"key":"2023012713224799600_bty940-B8","doi-asserted-by":"crossref","first-page":"923","DOI":"10.1093\/bioinformatics\/btt656","article-title":"featureCounts: an efficient general purpose program for assigning sequence reads to genomic features","volume":"30","author":"Liao","year":"2014","journal-title":"Bioinformatics"},{"key":"2023012713224799600_bty940-B9","doi-asserted-by":"crossref","first-page":"1881","DOI":"10.1093\/bioinformatics\/btv048","article-title":"GenoMetric Query Language: a novel approach to large-scale genomic data management","volume":"31","author":"Masseroli","year":"2015","journal-title":"Bioinformatics"},{"key":"2023012713224799600_bty940-B10","author":"Massie","year":"2013"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/12\/2156\/48934835\/bioinformatics_35_12_2156.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/12\/2156\/48934835\/bioinformatics_35_12_2156.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T14:16:27Z","timestamp":1674828987000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/35\/12\/2156\/5182295"}},"subtitle":[],"editor":[{"given":"John","family":"Hancock","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2018,11,14]]},"references-count":10,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2019,6,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/bty940","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2019,6]]},"published":{"date-parts":[[2018,11,14]]}}}