{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T16:32:40Z","timestamp":1778171560723,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T00:00:00Z","timestamp":1654819200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,10]]},"DOI":"10.1145\/3514221.3526054","type":"proceedings-article","created":{"date-parts":[[2022,6,12]],"date-time":"2022-06-12T02:33:49Z","timestamp":1655001229000},"page":"2326-2339","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":33,"title":["Photon: A Fast Query Engine for Lakehouse Systems"],"prefix":"10.1145","author":[{"given":"Alexander","family":"Behm","sequence":"first","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Shoumik","family":"Palkar","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Utkarsh","family":"Agarwal","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Timothy","family":"Armstrong","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"David","family":"Cashman","sequence":"additional","affiliation":[{"name":"Databricks Inc., Toronto, Canada"}]},{"given":"Ankur","family":"Dave","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Todd","family":"Greenstein","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Shant","family":"Hovsepian","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Ryan","family":"Johnson","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Arvind","family":"Sai Krishnan","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Paul","family":"Leventis","sequence":"additional","affiliation":[{"name":"Databricks Inc., Amsterdam, Netherlands"}]},{"given":"Ala","family":"Luszczak","sequence":"additional","affiliation":[{"name":"Databricks Inc., Amsterdam, Netherlands"}]},{"given":"Prashanth","family":"Menon","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Mostafa","family":"Mokhtar","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Gene","family":"Pang","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Sameer","family":"Paranjpye","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Greg","family":"Rahn","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Bart","family":"Samwel","sequence":"additional","affiliation":[{"name":"Databricks Inc., Amsterdam, Netherlands"}]},{"given":"Tom","family":"van Bussel","sequence":"additional","affiliation":[{"name":"Databricks Inc., Amsterdam, Netherlands"}]},{"given":"Herman","family":"van Hovell","sequence":"additional","affiliation":[{"name":"Databricks Inc., Amsterdam, Netherlands"}]},{"given":"Maryann","family":"Xue","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Reynold","family":"Xin","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]},{"given":"Matei","family":"Zaharia","sequence":"additional","affiliation":[{"name":"Databricks Inc., San Francisco, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2017. Restrict-qualified pointers in LLVM. https:\/\/llvm.org\/devmtg\/2017-02-04\/Restrict-Qualified-Pointers-in-LLVM.pdf."},{"key":"e_1_3_2_1_2_1","unstructured":"2018. Apache Arrow. https:\/\/arrow.apache.org\/."},{"key":"e_1_3_2_1_3_1","unstructured":"2021. Apache Impala. https:\/\/impala.apache.org\/."},{"key":"e_1_3_2_1_4_1","unstructured":"2021. Apache Parquet. https:\/\/parquet.apache.org."},{"key":"e_1_3_2_1_5_1","unstructured":"2021. Dictionary encoding. https:\/\/github.com\/apache\/parquet-format\/blob\/master\/Encodings.md#dictionary-encoding-plain_dictionary--2-and-rle_dictionary--8."},{"key":"e_1_3_2_1_6_1","unstructured":"2021. Google Protocol Buffers. https:\/\/developers.google.com\/protocol-buffers\/."},{"key":"e_1_3_2_1_7_1","unstructured":"2021. ICU - International Components for Unicode. https:\/\/icu.unicode.org\/."},{"key":"e_1_3_2_1_8_1","unstructured":"2021. JNI APIs and Developer Guides. https:\/\/docs.oracle.com\/javase\/8\/docs\/technotes\/guides\/jni\/."},{"key":"e_1_3_2_1_9_1","unstructured":"2021. OffHeapColumnVector. https:\/\/github.com\/apache\/spark\/blob\/master\/sql\/core\/src\/main\/java\/org\/apache\/spark\/sql\/execution\/vectorized\/OffHeapColumnVector.java."},{"key":"e_1_3_2_1_10_1","unstructured":"2021. Parquet Encodings. https:\/\/github.com\/apache\/parquet-format\/blob\/master\/Encodings.md."},{"key":"e_1_3_2_1_11_1","unstructured":"2021. Parquet-MR. https:\/\/github.com\/apache\/parquet-mr."},{"key":"e_1_3_2_1_12_1","unstructured":"2021. RLE\/Bit-packing encoding. https:\/\/github.com\/apache\/parquet-format\/blob\/master\/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3."},{"key":"e_1_3_2_1_13_1","unstructured":"2021. Snowflake Database Storage. https:\/\/docs.snowflake.com\/en\/user-guide\/intro-key-concepts.html#database-storage."},{"key":"e_1_3_2_1_14_1","unstructured":"2021. Time Zone Database. https:\/\/www.iana.org\/time-zones."},{"key":"e_1_3_2_1_15_1","unstructured":"2021. TPC-DS Result Details. http:\/\/tpc.org\/tpcds\/results\/tpcds_result_detail5.asp?id=121103001."},{"key":"e_1_3_2_1_16_1","volume-title":"Stratos Idreos, and Samuel Madden.","author":"Abadi Daniel","year":"2013","unstructured":"Daniel Abadi, Peter Boncz, Stavros Harizopoulos Amiato, Stratos Idreos, and Samuel Madden. 2013. The Design and Implementation of Modern Column-oriented Database Systems. Now Hanover, Mass."},{"key":"e_1_3_2_1_17_1","unstructured":"Sameer Agarwal Davies Liu and Reynold Xin. 2016. Apache Spark as a Compiler: Joining a Billion Rows per Second on a Laptop. https:\/\/databricks.com\/blog\/2016\/05\/23\/"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415560"},{"key":"e_1_3_2_1_19_1","volume-title":"Lakehouse: A New Generation of Open Platforms that Unify Data Warehousing and Advanced Analytics. CIDR.","author":"Armbrust Michael","year":"2021","unstructured":"Michael Armbrust, Ali Ghodsi, Reynold Xin, and Matei Zaharia. 2021. Lakehouse: A New Generation of Open Platforms that Unify Data Warehousing and Advanced Analytics. CIDR."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2742797"},{"key":"e_1_3_2_1_21_1","unstructured":"Benoit Dageville. 2021. Striking a balance with 'open' at Snowflake. https:\/\/www.infoworld.com\/article\/3617938\/striking-a-balance-with-open-at-snowflake.html."},{"key":"e_1_3_2_1_22_1","first-page":"54","article-title":"Database architecture optimized for the new bottleneck: Memory access","volume":"99","author":"Boncz Peter A","year":"1999","unstructured":"Peter A Boncz, Stefan Manegold, Martin L Kersten, et al . 1999. Database architecture optimized for the new bottleneck: Memory access. In VLDB, Vol. 99. 54--65.","journal-title":"VLDB"},{"key":"e_1_3_2_1_23_1","first-page":"225","article-title":"MonetDB\/X100: Hyper-Pipelining Query Execution","volume":"5","author":"Boncz Peter A","year":"2005","unstructured":"Peter A Boncz, Marcin Zukowski, and Niels Nes. 2005. MonetDB\/X100: Hyper-Pipelining Query Execution.. In CIDR, Vol. 5. 225--237.","journal-title":"CIDR"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920881"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457292"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2903741"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2013.6544838"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the 13th USENIX Conference on Operating Systems Design and Implementation","author":"Essertel Gr\u00e9gory M.","year":"2018","unstructured":"Gr\u00e9gory M. Essertel, Ruby Y. Tahboub, James M. Decker, Kevin J. Brown, Kunle Olukotun, and Tiark Rompf. 2018. Flare: Optimizing Apache Spark with Native Compilation for Scale-up Architectures and Medium-Size Data. In Proceedings of the 13th USENIX Conference on Operating Systems Design and Implementation (Carlsbad, CA, USA) (OSDI'18). USENIX Association, USA, 799--815."},{"key":"e_1_3_2_1_29_1","unstructured":"Wenchen Fan Herman van H\u00f6vell and MaryAnn Xue. 2020. Adaptive Query Execution: Speeding Up Spark SQL at Runtime. https:\/\/databricks.com\/blog\/2020\/05\/29\/adaptive-query-execution-speeding-up-spark-sql-at-runtime.html."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2741948.2741968"},{"key":"e_1_3_2_1_31_1","volume-title":"Encapsulation of Parallelism in the Volcano Query Processing System","author":"Graefe Goetz","unstructured":"Goetz Graefe. 1990. Encapsulation of Parallelism in the Volcano Query Processing System. Vol. 19. ACM."},{"key":"e_1_3_2_1_32_1","unstructured":"Adrian Ionescu. 2018. Processing Petabytes of Data in Seconds with Databricks Delta. https:\/\/databricks.com\/blog\/2018\/07\/31\/processing-petabytes-of-data-in-seconds-with-databricks-delta.html."},{"key":"e_1_3_2_1_33_1","volume-title":"Computer Architecture, 1996 23rd Annual International Symposium on. IEEE, 78--78","author":"Kagi A","year":"1996","unstructured":"A Kagi, James R Goodman, and Doug Burger. 1996. Memory Bandwidth Limitations of Future Microprocessors. In Computer Architecture, 1996 23rd Annual International Symposium on. IEEE, 78--78."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.14778\/3275366.3284966"},{"key":"e_1_3_2_1_35_1","volume-title":"The Vertica analytic database: C-store 7 years later. arXiv preprint arXiv:1208.4173","author":"Lamb Andrew","year":"2012","unstructured":"Andrew Lamb, Matt Fuller, Ramakrishna Varadarajan, Nga Tran, Ben Vandier, Lyric Doshi, and Chuck Bear. 2012. The Vertica analytic database: C-store 7 years later. arXiv preprint arXiv:1208.4173 (2012)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2882925"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"P. Leach M. Mealling and R. Salz. 2005. RFC 4122: A Universally Unique IDentifier (UUID) URN Namespace. https:\/\/datatracker.ietf.org\/doc\/html\/rfc4122.","DOI":"10.17487\/rfc4122"},{"key":"e_1_3_2_1_38_1","unstructured":"Alicja Luszczak Micha Szafra'ski Micha Switakowski and Reynold Xin. 2018. Databricks Cache Boosts Apache Spark Performance. https:\/\/databricks.com\/blog\/2018\/01\/09\/databricks-cache-boosts-apache-spark-performance.html."},{"key":"e_1_3_2_1_39_1","first-page":"19","article-title":"Memory bandwidth and Machine Balance in Current High Performance Computers","volume":"1995","author":"McCalpin John D","year":"1995","unstructured":"John D McCalpin et al. 1995. Memory bandwidth and Machine Balance in Current High Performance Computers. IEEE Computer Society Technical Committee on Computer Architecture (TCCA) 1995 (1995), 19--25.","journal-title":"IEEE Computer Society Technical Committee on Computer Architecture (TCCA)"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.14778\/3151113.3151114"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.14778\/2002938.2002940"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3465998.3466009"},{"key":"e_1_3_2_1_43_1","first-page":"293","article-title":"Making Sense of Performance in Data Analytics Frameworks","volume":"15","author":"Ousterhout Kay","year":"2015","unstructured":"Kay Ousterhout, Ryan Rasti, Sylvia Ratnasamy, Scott Shenker, Byung-Gon Chun, and V ICSI. 2015. Making Sense of Performance in Data Analytics Frameworks. In NSDI, Vol. 15. 293--307.","journal-title":"NSDI"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.14778\/3213880.3213890"},{"key":"e_1_3_2_1_45_1","volume-title":"Weld: A Common Runtime for High Performance Data Analytics. In Conference on Innovative Data Systems Research (CIDR).","author":"Palkar Shoumik","year":"2017","unstructured":"Shoumik Palkar, James Thomas, Anil Shanbhag, Deepak Narayanan, Holger Pirk, Malte Schwarzkopf, Saman Amarasinghe, and Matei Zaharia. 2017. Weld: A Common Runtime for High Performance Data Analytics. In Conference on Innovative Data Systems Research (CIDR)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2747645"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2463676.2465292"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2517349.2522715"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2019.00196"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/1995441.1995446"},{"key":"e_1_3_2_1_51_1","volume-title":"et al","author":"Stonebraker Mike","year":"2018","unstructured":"Mike Stonebraker, Daniel J Abadi, Adam Batkin, Xuedong Chen, Mitch Cherniack, Miguel Ferreira, Edmond Lau, Amerson Lin, Sam Madden, Elizabeth O'Neil, et al . 2018. C-store: a column-oriented DBMS. In Making Databases Work: the Pragmatic Wisdom of Michael Stonebraker. 491--518."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/216585.216588"},{"key":"e_1_3_2_1_53_1","unstructured":"Reynold Xin and Mostafa Mokhtar. 2021. Databricks Sets Official Data Warehousing Performance Record. https:\/\/databricks.com\/blog\/2021\/11\/02\/databricks-sets-official-data-warehousing-performance-record.html."},{"key":"e_1_3_2_1_54_1","volume-title":"Project Tungsten: Bringing Apache Spark Closer to Bare Metal. https:\/\/databricks.com\/blog\/2015\/04\/28\/project-tungsten-bringing-spark-closer-to-bare-metal.html.","author":"Xin Reynold","year":"2015","unstructured":"Reynold Xin and Josh Rosen. 2015. Project Tungsten: Bringing Apache Spark Closer to Bare Metal. https:\/\/databricks.com\/blog\/2015\/04\/28\/project-tungsten-bringing-spark-closer-to-bare-metal.html."},{"key":"e_1_3_2_1_55_1","unstructured":"MaryAnn Xue and Allison Wang. 2018. Faster SQL: Adaptive Query Execution in Databricks. https:\/\/databricks.com\/blog\/2020\/10\/21\/faster-sql-adaptive-query-execution-in-databricks.html."},{"key":"e_1_3_2_1_56_1","first-page":"1","article-title":"DryadLINQ: A System for General- Purpose Distributed Data-Parallel Computing Using a High-Level Language","volume":"8","author":"Yu Yuan","year":"2008","unstructured":"Yuan Yu, Michael Isard, Dennis Fetterly, Mihai Budiu, \u00dalfar Erlingsson, Pradeep Kumar Gunda, and Jon Currey. 2008. DryadLINQ: A System for General- Purpose Distributed Data-Parallel Computing Using a High-Level Language.. In OSDI, Vol. 8. 1--14.","journal-title":"OSDI"},{"key":"e_1_3_2_1_57_1","volume-title":"Proceedings of the 9th USENIX conference on Networked Systems Design and Implementation. USENIX Association, 2--2.","author":"Zaharia Matei","year":"2012","unstructured":"Matei Zaharia, Mosharaf Chowdhury, Tathagata Das, Ankur Dave, Justin Ma, Murphy McCauley, Michael J. Franklin, Scott Shenker, and Ion Stoica. 2012. Resilient Distributed Datasets: A Fault-tolerant Abstraction for In-memory Cluster Computing. In Proceedings of the 9th USENIX conference on Networked Systems Design and Implementation. USENIX Association, 2--2."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/2934664"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/564691.564709"}],"event":{"name":"SIGMOD\/PODS '22: International Conference on Management of Data","location":"Philadelphia PA USA","acronym":"SIGMOD\/PODS '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2022 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3526054","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3514221.3526054","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:10:07Z","timestamp":1750183807000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3526054"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,10]]},"references-count":59,"alternative-id":["10.1145\/3514221.3526054","10.1145\/3514221"],"URL":"https:\/\/doi.org\/10.1145\/3514221.3526054","relation":{},"subject":[],"published":{"date-parts":[[2022,6,10]]},"assertion":[{"value":"2022-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}