{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T20:36:31Z","timestamp":1780346191454,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":88,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T00:00:00Z","timestamp":1654819200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,10]]},"DOI":"10.1145\/3514221.3526141","type":"proceedings-article","created":{"date-parts":[[2022,6,12]],"date-time":"2022-06-12T02:33:49Z","timestamp":1655001229000},"page":"587-601","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":38,"title":["End-to-end Optimization of Machine Learning Prediction Queries"],"prefix":"10.1145","author":[{"given":"Kwanghyun","family":"Park","sequence":"first","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Karla","family":"Saur","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dalitso","family":"Banda","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rathijit","family":"Sen","sequence":"additional","affiliation":[{"name":"Microsoft, Madison, WI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Matteo","family":"Interlandi","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Konstantinos","family":"Karanasos","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2022,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Benoit Steiner, Paul A. Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng.","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Manjunath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek Gordon Murray, Benoit Steiner, Paul A. Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. TensorFlow: A System for Large-Scale Machine Learning. In OSDI."},{"key":"e_1_3_2_1_2_1","unstructured":"Ashvin Agrawal Rony Chatterjee Carlo Curino Avrilia Floratou Neha Godwal Matteo Interlandi Alekh Jindal Konstantinos Karanasos Subru Krishnan Brian Kroth Jyoti Leeka Kwanghyun Park Hiren Patel Olga Poppe Fotis Psallidas Raghu Ramakrishnan Abhishek Roy Karla Saur Rathijit Sen Markus Weimer Travis Wright and Yiwen Zhu. 2020. Cloudy with high chance of DBMS: a 10-year prediction for Enterprise-Grade ML. In CIDR."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415545"},{"key":"e_1_3_2_1_4_1","volume-title":"Markus Weimer, Shauheen Zahirazami, and Yiwen Zhu.","author":"Ahmed Zeeshan","year":"2019","unstructured":"Zeeshan Ahmed, Saeed Amizadeh, Mikhail Bilenko, Rogan Carr, Wei-Sheng Chin, Yael Dekel, Xavier Dupr\u00e9, Vadim Eksarevskiy, Senja Filipi, Tom Finley, Abhishek Goswami, Monte Hoover, Scott Inglis, Matteo Interlandi, Najeeb Kazmi, Gleb Krivosheev, Pete Luferenko, Ivan Matantsev, Sergiy Matusevych, Shahab Moradi, Gani Nazirov, Justin Ormont, Gal Oshri, Artidoro Pagnoni, Jignesh Parmar, Prabhat Roy, Mohammad Zeeshan Siddiqui, Markus Weimer, Shauheen Zahirazami, and Yiwen Zhu. 2019. Machine Learning at Microsoft with ML.NET. In SIGKDD."},{"key":"e_1_3_2_1_5_1","volume-title":"Magpie: Python at Speed and Scale using Cloud Backends. In CIDR.","author":"Olga Poppe Brandon Haynes Maureen Daum","year":"2021","unstructured":"Maureen Daum Olga Poppe Brandon Haynes Anna Pavlenko Ayushi Gupta Karthik Ramachandra Carlo Curino Andreas Mueller Wentao Wu Hiren Patel Alekh Jindal, Venkatesh Emani. 2021. Magpie: Python at Speed and Scale using Cloud Backends. In CIDR."},{"key":"e_1_3_2_1_6_1","unstructured":"Amazon.com. 2021. Redshift. https:\/\/aws.amazon.com\/redshift"},{"key":"e_1_3_2_1_7_1","unstructured":"Amazon.com. 2021. Redshift ML. https:\/\/aws.amazon.com\/blogs\/big-data\/create-train-and-deploy-machine-learning-models-in-amazon-redshift-using-sql-with-amazon-redshift-ml"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415560"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Michael Armbrust Reynold S. Xin Cheng Lian Yin Huai Davies Liu Joseph K. Bradley Xiangrui Meng Tomer Kaftan Michael J. Franklin Ali Ghodsi and Matei Zaharia. 2015. Spark SQL: Relational Data Processing in Spark. In SIGMOD.","DOI":"10.1145\/2723372.2742797"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098021"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Catriel Beeri and Raghu Ramakrishnan. 1987. On the Power of Magic. In PODS.","DOI":"10.1145\/28659.28689"},{"key":"e_1_3_2_1_12_1","unstructured":"Bernd Bischl Giuseppe Casalicchio Matthias Feurer Frank Hutter Michel Lang Rafael G. Mantovani Jan N. van Rijn and Joaquin Vanschoren. 2019. OpenML Benchmarking Suites. arXiv:1708.03731 [stat.ML]"},{"key":"e_1_3_2_1_13_1","volume-title":"Pattern recognition and machine learning","author":"Bishop Christopher M.","unstructured":"Christopher M. Bishop. 2007. Pattern recognition and machine learning, 5th Edition. Springer.","edition":"5"},{"key":"e_1_3_2_1_14_1","volume-title":"Niketan Pansare, Berthold Reinwald, Frederick Reiss, Prithviraj Sen, Arvind Surve, and Shirish Tatikonda.","author":"Boehm Matthias","year":"2016","unstructured":"Matthias Boehm, Michael Dusenberry, Deron Eriksson, Alexandre V. Evfimievski, Faraz Makari Manshadi, Niketan Pansare, Berthold Reinwald, Frederick Reiss, Prithviraj Sen, Arvind Surve, and Shirish Tatikonda. 2016. SystemML: Declarative Machine Learning on Spark. PVLDB (2016)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"e_1_3_2_1_16_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In OSDI.","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In OSDI."},{"key":"e_1_3_2_1_17_1","unstructured":"Compact.nl. 2021. Enterprise Data Management: Value and Necessity. https:\/\/statics.teams.cdn.office.net\/evergreen-assets\/safelinks\/1\/atp-safelinks.html"},{"key":"e_1_3_2_1_18_1","volume-title":"Clipper: A Low-Latency Online Prediction Serving System. In NSDI.","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J. Franklin, Joseph E. Gonzalez, and Ion Stoica. 2017. Clipper: A Low-Latency Online Prediction Serving System. In NSDI."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Beno\u00eet Dageville Thierry Cruanes Marcin Zukowski Vadim Antonov Artin Avanes Jon Bock Jonathan Claybaugh Daniel Engovatov Martin Hentschel Jiansheng Huang Allison W. Lee Ashish Motivala Abdul Q. Munir Steven Pelley Peter Povinec Greg Rahn Spyridon Triantafyllis and Philipp Unterbrunner. 2016. The Snowflake Elastic Data Warehouse. In SIGMOD. ACM.","DOI":"10.1145\/2882903.2903741"},{"key":"e_1_3_2_1_20_1","unstructured":"Databricks. 2021. Data Skipping in Apache Spark. https:\/\/docs.databricks.com\/spark\/latest\/spark-sql\/dataskipping-index.html"},{"key":"e_1_3_2_1_21_1","unstructured":"Apache Spark Documentation. 2021. Data Skipping in Apache Spark. https:\/\/spark.apache.org\/docs\/3.1.2\/sql-data-sources-parquet.htmlpartition-discovery"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476311.3476375"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2213836.2213874"},{"key":"e_1_3_2_1_24_1","unstructured":"Inc. Google. 2021. Big Query ML. https:\/\/cloud.google.com\/bigquery-ml\/docs"},{"key":"e_1_3_2_1_25_1","unstructured":"Inc. Google. 2021. MLIR. https:\/\/github.com\/tensorflow\/mlir"},{"key":"e_1_3_2_1_26_1","unstructured":"Inc. Google. 2021. XLAn. http:\/\/www.tensorflow.org\/xla"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.14778\/2367502.2367510"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3070607.3070608"},{"key":"e_1_3_2_1_29_1","unstructured":"Ihab Ilyas. 2021. AI Should not Leave Structured Data Behind! https:\/\/towardsdatascience.com\/ai-should-not-leave-structured-data-behind-33474f9cd07a"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3380575"},{"key":"e_1_3_2_1_31_1","unstructured":"Andy Jassy. 2021. AWS re:Invent 2018 keynote. https:\/\/www.youtube.com\/watch?v=ZOIkOnW640A&t=5316s"},{"key":"e_1_3_2_1_32_1","unstructured":"Kaggle. 2020. The State of Data Science. http:\/\/www.kaggle.com\/kaggle-survey-2020"},{"key":"e_1_3_2_1_33_1","unstructured":"Kaggle. 2021. . https:\/\/www.kaggle.com\/mlg-ulb\/creditcardfraud"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Daniel Kang Ankit Mathur Teja Veeramacheneni Peter Bailis and Matei Zaharia. 2020. Jointly Optimizing Preprocessing and Inference for DNN-based Visual Analytics. arXiv:2007.13005 [cs.DB]","DOI":"10.14778\/3425879.3425881"},{"key":"e_1_3_2_1_35_1","unstructured":"Konstantinos Karanasos Matteo Interlandi Fotis Psallidas Rathijit Sen Kwanghyun Park Ivan Popivanov Doris Xin Supun Nakandala Subru Krishnan Markus Weimer Yuan Yu Raghu Ramakrishnan and Carlo Curino. 2020. Extending Relational Query Processing with ML Inference. In CIDR."},{"key":"e_1_3_2_1_36_1","first-page":"I","article-title":"LightGBM: A Highly Efficient Gradient Boosting Decision Tree","volume":"30","author":"Ke Guolin","year":"2017","unstructured":"Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. LightGBM: A Highly Efficient Gradient Boosting Decision Tree. In Advances in Neural Information Processing Systems 30, I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.). Curran Associates, Inc., 3146--3154. http:\/\/papers.nips.cc\/paper\/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree.pdf","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.14778\/3467861.3467869"},{"key":"e_1_3_2_1_38_1","volume-title":"Willump: A Statistically-Aware End-to-end Optimizer for Machine Learning Inference. CoRR","author":"Kraft Peter","year":"2019","unstructured":"Peter Kraft, Daniel Kang, Deepak Narayanan, Shoumik Palkar, Peter Bailis, and Matei Zaharia. 2019. Willump: A Statistically-Aware End-to-end Optimizer for Machine Learning Inference. CoRR (2019)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3054775"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2723713"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.14778\/3342263.3342633"},{"key":"e_1_3_2_1_42_1","volume-title":"Markus Weimer, and Matteo Interlandi.","author":"Lee Yunseong","year":"2018","unstructured":"Yunseong Lee, Alberto Scolari, Byung-Gon Chun, Marco Domenico Santambrogio, Markus Weimer, and Matteo Interlandi. 2018. PRETZEL: Opening the Black Box of Machine Learning Prediction Serving Systems. In OSDI."},{"key":"e_1_3_2_1_43_1","volume-title":"From the Edge to the Cloud: Model Serving in ML","author":"Lee Yunseong","year":"2018","unstructured":"Yunseong Lee, Alberto Scolari, Byung-Gon Chun, Markus Weimer, and Matteo Interlandi. 2018. From the Edge to the Cloud: Model Serving in ML.NET. IEEE Data Eng. Bull. 41, 4 (2018)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3183751"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3405470"},{"key":"e_1_3_2_1_46_1","unstructured":"Nantia Makrynioti Ruy Ley-Wild and Vasilis Vassalos. 2019. sql4ml A declarative end-to-end workflow for machine learning. arXiv:1907.12415 [cs.DB]"},{"key":"e_1_3_2_1_47_1","first-page":"1","article-title":"MLlib: Machine Learning in Apache Spark","volume":"17","author":"Meng Xiangrui","year":"2016","unstructured":"Xiangrui Meng, Joseph Bradley, Burak Yavuz, Evan Sparks, Shivaram Venkataraman, Davies Liu, Jeremy Freeman, DB Tsai, Manish Amde, Sean Owen, Doris Xin, Reynold Xin, Michael J. Franklin, Reza Zadeh, Matei Zaharia, and Ameet Talwalkar. 2016. MLlib: Machine Learning in Apache Spark. Journal of Machine Learning Research 17, 34 (2016), 1--7. http:\/\/jmlr.org\/papers\/v17\/15--237.html","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_48_1","unstructured":"Microsoft. 2021. Execute external scripts in SQL Server. https:\/\/docs.microsoft.com\/en-us\/sql\/relational-databases\/system-stored-procedures\/sp-execute-external-script-transact-sql?view=sql-server-2017"},{"key":"e_1_3_2_1_49_1","unstructured":"Microsoft. 2021. HDInsight. https:\/\/azure.microsoft.com\/en-us\/free\/hdinsight"},{"key":"e_1_3_2_1_50_1","unstructured":"Microsoft. 2021. Machine Learning Inference in Azure SQL Edge. https:\/\/docs.microsoft.com\/en-us\/azure\/azure-sql-edge\/deploy-onnx"},{"key":"e_1_3_2_1_51_1","unstructured":"Microsoft. 2021. ONNX Runtime. http:\/\/github.com\/microsoft\/onnxruntime"},{"key":"e_1_3_2_1_52_1","unstructured":"Microsoft. 2021. PREDICT in T-SQL. https:\/\/docs.microsoft.com\/en-us\/sql\/t-sql\/queries\/predict-transact-sql?view=sql-server-ver15"},{"key":"e_1_3_2_1_53_1","unstructured":"Microsoft. 2021. Predicting Length of Stay in Hospitals. https:\/\/github.com\/Microsoft\/r-server-hospital-length-of-stay"},{"key":"e_1_3_2_1_54_1","unstructured":"Microsoft. 2021. Score machine learning models with PREDICT in serverless Apache Spark pools. https:\/\/docs.microsoft.com\/en-us\/azure\/synapse-analytics\/machine-learning\/tutorial-score-model-predict-spark-pool"},{"key":"e_1_3_2_1_55_1","unstructured":"Susan Moore. 2016. What We Can Do With Machine Learning. https:\/\/www.gartner.com\/smarterwithgartner\/what-we-can-do-with-machine-learning\/"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Derek G. Murray Jiri Simsa Ana Klimovic and Ihor Indyk. 2021. tf.data: A Machine Learning Data Processing Framework. arXiv:2101.12127 [cs.LG]","DOI":"10.14778\/3476311.3476374"},{"key":"e_1_3_2_1_57_1","volume-title":"Markus Weimer, and Matteo Interlandi.","author":"Nakandala Supun","year":"2020","unstructured":"Supun Nakandala, Karla Saur, Gyeong-In Yu, Konstantinos Karanasos, Carlo Curino, Markus Weimer, and Matteo Interlandi. 2020. A Tensor Compiler for Unified Machine Learning Prediction Serving. In OSDI."},{"key":"e_1_3_2_1_58_1","volume-title":"Proceedings of the 26th International Conference on Very Large Databases (proceedings of the 26th international conference on very large databases ed.)","author":"Netz Amir","unstructured":"Amir Netz, Jeff Bernhardt, Usama Fayyad, and Surajit Chaudhuri. 2000. Integration of Data Mining and Relational Databases. In Proceedings of the 26th International Conference on Very Large Databases (proceedings of the 26th international conference on very large databases ed.). Very Large Data Bases Endowment Inc. https:\/\/www.microsoft.com\/en-us\/research\/publication\/integration-of-data-mining-and-relational-databases\/"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415572"},{"key":"e_1_3_2_1_60_1","unstructured":"ONNX. 2021. ONNX. http:\/\/onnx.ai"},{"key":"e_1_3_2_1_61_1","unstructured":"ONNX. 2021. ONNXMLTools. http:\/\/github.com\/onnx\/onnxmltools"},{"key":"e_1_3_2_1_62_1","volume-title":"OpenML Benchmarking Suites and the OpenML-CC18","author":"ML.","unstructured":"OpenML. 2021. OpenML Benchmarking Suites and the OpenML-CC18. http:\/\/www.openml.org\/s\/99"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.14778\/3368289.3368292"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.14778\/3213880.3213890"},{"key":"e_1_3_2_1_65_1","unstructured":"Apache Parquet. 2021. Apache Parquet. https:\/\/parquet.apache.org"},{"key":"e_1_3_2_1_66_1","volume-title":"Garnett (Eds.)","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc., 8026--8037. https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/bdbca288fee7f92f2bfa9f7012727740-Paper.pdf"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_1_68_1","unstructured":"Project Hamlet 2021. Project Hamlet. https:\/\/adalabucsd.github.io\/hamlet.html."},{"key":"e_1_3_2_1_69_1","volume-title":"Carlo Curino, and Markus Weimer","author":"Psallidas Fotis","year":"2019","unstructured":"Fotis Psallidas, Yiwen Zhu, Bojan Karlas, Matteo Interlandi, Avrilia Floratou, Konstantinos Karanasos, Wentao Wu, Ce Zhang, Subru Krishnan, Carlo Curino, and Markus Weimer. 2019. Data Science through the looking glass and what we found there. CoRR (2019). http:\/\/arxiv.org\/abs\/1912.09536"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499370.2462176"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3186728.3164140"},{"key":"e_1_3_2_1_72_1","volume-title":"Database Management Systems (3 ed.)","author":"Ramakrishnan Raghu","unstructured":"Raghu Ramakrishnan and Johannes Gehrke. 2003. Database Management Systems (3 ed.). McGraw-Hill, Inc., New York, NY, USA."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"crossref","unstructured":"Raghu Ramakrishnan Baskar Sridharan John R. Douceur Pavan Kasturi Balaji Krishnamachari-Sampath Karthick Krishnamoorthy Peng Li Mitica Manu Spiro Michaylov Rog\u00e9rio Ramos Neil Sharman Zee Xu Youssef Barakat Chris Douglas Richard Draves Shrikant S. Naidu Shankar Shastry Atul Sikaria Simon Sun and Ramarathnam Venkatesan. 2017. Azure Data Lake Store: A Hyperscale Distributed File Service for Big Data Analytics. In SIGMOD. ACM.","DOI":"10.1145\/3035918.3056100"},{"key":"e_1_3_2_1_74_1","unstructured":"Isaac Sacolick. 2020. Amazon Google and Microsoft take their clouds to the edge. https:\/\/www.infoworld.com\/article\/3575071\/amazon-google-and-microsoft-take-their-clouds-to-the-edge.html"},{"key":"e_1_3_2_1_75_1","volume-title":"Samsara: Declarative Machine Learning on Distributed Dataflow Systems.","author":"Schelter Sebastian","year":"2016","unstructured":"Sebastian Schelter, Shannon Quinn, Suneel Marthi, and Andrew Musselman. 2016. Samsara: Declarative Machine Learning on Distributed Dataflow Systems."},{"key":"e_1_3_2_1_76_1","unstructured":"Maximilian Sch\u00fcle Matthias Bungeroth Dimitri Vorona Alfons Kemper Stephan G\u00fcnnemann and Thomas Neumann. 2019. ML2SQL - Compiling a Declarative Machine Learning Language to SQL and Python. In EDBT."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.18420\/btw2019--16"},{"key":"e_1_3_2_1_78_1","volume-title":"Scikit-learn: Machine Learning in Python. https:\/\/scikit-learn.org\/","year":"2021","unstructured":"Scikit-learn. 2021. Scikit-learn: Machine Learning in Python. https:\/\/scikit-learn.org\/"},{"key":"e_1_3_2_1_79_1","unstructured":"Scikit-learn. 2021. Scikit-learn OneHot Encoder. https:\/\/scikit-learn.org\/stable\/modules\/generated\/sklearn.preprocessing.OneHotEncoder.html"},{"key":"e_1_3_2_1_80_1","unstructured":"Scikit-learn. 2021. Scikit-learn Scaler. https:\/\/scikit-learn.org\/stable\/modules\/generated\/sklearn.preprocessing.StandardScaler.html"},{"key":"e_1_3_2_1_81_1","unstructured":"Apache Spark. 2021. Pandas Vectorized UDF in Apache Spark. https:\/\/spark.apache.org\/docs\/latest\/sql-pyspark-pandas-with-arrow.html#pandas-udfs-aka-vectorized-udfs"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2013.158"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2017.109"},{"key":"e_1_3_2_1_84_1","unstructured":"Sebastian Schelter Stefan Grafberger Julia Stoyanovich. 2021. Lightweight Inspection of Data Preprocessing in Native Machine Learning Pipelines. In CIDR."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.14778\/3282495.3282499"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407799"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"crossref","unstructured":"Binhang Yuan Dimitrije Jankov Jia Zou Yuxin Tang Daniel Bourgeois and Chris Jermaine. 2020. Tensor Relational Algebra for Machine Learning System Design. arXiv:2009.00524 [cs.DB]","DOI":"10.14778\/3457390.3457399"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.14778\/3467861.3467867"}],"event":{"name":"SIGMOD\/PODS '22: International Conference on Management of Data","location":"Philadelphia PA USA","acronym":"SIGMOD\/PODS '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2022 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3526141","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3514221.3526141","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:10:13Z","timestamp":1750183813000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3526141"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,10]]},"references-count":88,"alternative-id":["10.1145\/3514221.3526141","10.1145\/3514221"],"URL":"https:\/\/doi.org\/10.1145\/3514221.3526141","relation":{},"subject":[],"published":{"date-parts":[[2022,6,10]]},"assertion":[{"value":"2022-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}