{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:17:17Z","timestamp":1775063837734,"version":"3.50.1"},"reference-count":14,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2016,12,13]],"date-time":"2016-12-13T00:00:00Z","timestamp":1481587200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/about_us\/legal\/notices"}],"funder":[{"name":"New South Wales Ministry of Health, a National Health and Medical Research Council\/National Heart Foundation Career Development Fellowship","award":["1105271"],"award-info":[{"award-number":["1105271"]}]},{"name":"Ramaciotti Establishment Grant","award":["ES2014\/010"],"award-info":[{"award-number":["ES2014\/010"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,3,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Summary<\/jats:title>\n                    <jats:p>Single-cell RNA-seq (scRNA-seq) is increasingly used in a range of biomedical studies. Nonetheless, current RNA-seq analysis tools are not specifically designed to efficiently process scRNA-seq data due to their limited scalability. Here we introduce Falco, a cloud-based framework to enable paralellization of existing RNA-seq processing pipelines using big data technologies of Apache Hadoop and Apache Spark for performing massively parallel analysis of large scale transcriptomic data. Using two public scRNA-seq datasets and two popular RNA-seq alignment\/feature quantification pipelines, we show that the same processing pipeline runs 2.6\u2013145.4 times faster using Falco than running on a highly optimized standalone computer. Falco also allows users to utilize low-cost spot instances of Amazon Web Services, providing a \u223c65% reduction in cost of analysis.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and Implementation<\/jats:title>\n                    <jats:p>Falco is available via a GNU General Public License at https:\/\/github.com\/VCCRI\/Falco\/<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btw732","type":"journal-article","created":{"date-parts":[[2016,11,16]],"date-time":"2016-11-16T15:05:49Z","timestamp":1479308749000},"page":"767-769","source":"Crossref","is-referenced-by-count":23,"title":["Falco: a quick and flexible single-cell RNA-seq processing framework on the cloud"],"prefix":"10.1093","volume":"33","author":[{"given":"Andrian","family":"Yang","sequence":"first","affiliation":[{"name":"Victor Chang Cardiac Research Institute, Sydney, NSW, Australia"},{"name":"St. Vincent\u2019s Clinical School, University of New South Wales, Sydney, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Troup","sequence":"additional","affiliation":[{"name":"Victor Chang Cardiac Research Institute, Sydney, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peijie","family":"Lin","sequence":"additional","affiliation":[{"name":"Victor Chang Cardiac Research Institute, Sydney, NSW, Australia"},{"name":"St. Vincent\u2019s Clinical School, University of New South Wales, Sydney, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joshua W K","family":"Ho","sequence":"additional","affiliation":[{"name":"Victor Chang Cardiac Research Institute, Sydney, NSW, Australia"},{"name":"St. Vincent\u2019s Clinical School, University of New South Wales, Sydney, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2016,12,13]]},"reference":[{"key":"2023020204504878200_btw732-B1","doi-asserted-by":"crossref","first-page":"e0155461.","DOI":"10.1371\/journal.pone.0155461","article-title":"SparkBWA: speeding up the alignment of high-throughput DNA sequencing data","volume":"11","author":"Abu\u00edn","year":"2016","journal-title":"Plos One"},{"key":"2023020204504878200_btw732-B2","doi-asserted-by":"crossref","first-page":"166","DOI":"10.1093\/bioinformatics\/btu638","article-title":"HTSeq A Python framework to work with high-throughput sequencing data","volume":"31","author":"Anders","year":"2014","journal-title":"Bioinformatics"},{"key":"2023020204504878200_btw732-B3","doi-asserted-by":"crossref","first-page":"7285","DOI":"10.1073\/pnas.1507125112","article-title":"A survey of human brain transcriptome diversity at the single cell level","volume":"112","author":"Darmanis","year":"2015","journal-title":"Proc. Natl. Acad. Sci"},{"key":"2023020204504878200_btw732-B4","article-title":"MapReduce: Simplified Data Processing on Large Clusters. In: Proceedings of the Sixth Symposium on Operating System Design and Implementation (OSDI), OSDI\u201904. USENIX Association.","author":"Dean","year":"2004"},{"key":"2023020204504878200_btw732-B5","doi-asserted-by":"crossref","first-page":"2482","DOI":"10.1093\/bioinformatics\/btv179","article-title":"Halvade: scalable sequence analysis with MapReduce","volume":"31","author":"Decap","year":"2015","journal-title":"Bioinformatics"},{"key":"2023020204504878200_btw732-B6","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1093\/bioinformatics\/bts635","article-title":"STAR: ultrafast universal RNA-seq aligner","volume":"29","author":"Dobin","year":"2013","journal-title":"Bioinformatics (Oxford, England)"},{"key":"2023020204504878200_btw732-B7","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1038\/nature14966","article-title":"Single-cell messenger RNA sequencing reveals rare intestinal cell types","volume":"525","author":"Gr\u00fcn","year":"2015","journal-title":"Nature"},{"key":"2023020204504878200_btw732-B8","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1038\/nmeth.3317","article-title":"HISAT: a fast spliced aligner with low memory requirements","volume":"12","author":"Kim","year":"2015","journal-title":"Nat. Methods"},{"key":"2023020204504878200_btw732-B9","doi-asserted-by":"crossref","first-page":"471","DOI":"10.1016\/j.stem.2015.09.011","article-title":"Single cell RNA-sequencing of pluripotent states unlocks modular transcriptional variation","volume":"17","author":"Kolodziejczyk","year":"2015","journal-title":"Cell Stem Cell"},{"key":"2023020204504878200_btw732-B10","doi-asserted-by":"crossref","first-page":"610","DOI":"10.1016\/j.molcel.2015.04.005","article-title":"The technology and biology of single-cell RNA sequencing","volume":"58","author":"Kolodziejczyk","year":"2015","journal-title":"Mol. Cell"},{"key":"2023020204504878200_btw732-B11","doi-asserted-by":"crossref","first-page":"923","DOI":"10.1093\/bioinformatics\/btt656","article-title":"FeatureCounts: an efficient general purpose program for assigning sequence reads to genomic features","volume":"30","author":"Liao","year":"2014","journal-title":"Bioinformatics"},{"key":"2023020204504878200_btw732-B12","doi-asserted-by":"crossref","first-page":"1396","DOI":"10.1126\/science.1254257","article-title":"Single-cell RNA-seq highlights intratumoral heterogeneity in primary glioblastoma","volume":"344","author":"Patel","year":"2014","journal-title":"Science"},{"key":"2023020204504878200_btw732-B13","doi-asserted-by":"crossref","first-page":"2652","DOI":"10.1093\/bioinformatics\/btu343","article-title":"SparkSeq: fast, scalable and cloud-ready tool for the interactive genomic data analysis with nucleotide precision","volume":"30","author":"Wiewiorka","year":"2014","journal-title":"Bioinformatics"},{"key":"2023020204504878200_btw732-B14","author":"Zaharia","year":"2010"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/5\/767\/49037993\/bioinformatics_33_5_767.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/5\/767\/49037993\/bioinformatics_33_5_767.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T23:52:27Z","timestamp":1675295547000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/33\/5\/767\/2559427"}},"subtitle":[],"editor":[{"given":"Ziv","family":"Bar-Joseph","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2016,12,13]]},"references-count":14,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2017,3,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btw732","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/064006","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2017,3,1]]},"published":{"date-parts":[[2016,12,13]]}}}