{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T00:13:45Z","timestamp":1783037625110,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,4]]},"DOI":"10.1145\/3694715.3695958","type":"proceedings-article","created":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T19:28:18Z","timestamp":1731698898000},"page":"488-504","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["SilvanForge: A Schedule-Guided Retargetable Compiler for Decision Tree Inference"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9741-9392","authenticated-orcid":false,"given":"Ashwin","family":"Prasad","sequence":"first","affiliation":[{"name":"Indian Institute of Science, Bangalore, Karnataka, India"},{"name":"NI R&amp;D, Bangalore, Karnataka, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2868-0260","authenticated-orcid":false,"given":"Sampath","family":"Rajendra","sequence":"additional","affiliation":[{"name":"Microsoft Research, Bangalore, Karnataka, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6117-3036","authenticated-orcid":false,"given":"Kaushik","family":"Rajan","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, Washington, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2517-9994","authenticated-orcid":false,"given":"R","family":"Govindarajan","sequence":"additional","affiliation":[{"name":"Indian Institute of Science, Bangalore, Karnataka, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8297-6159","authenticated-orcid":false,"given":"Uday","family":"Bondhugula","sequence":"additional","affiliation":[{"name":"Indian Institute of Science, Bangalore, Karnataka, India"},{"name":"PolyMage Labs, Bangalore, Karnataka, India"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,11,15]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"AI Report","year":"2023","unstructured":"[n. d.]. Kaggle AI Report 2023. https:\/\/www.kaggle.com\/ai-report-2023. Accessed: 2024-04-16."},{"key":"e_1_3_2_1_2_1","volume-title":"d.]. Kaggle State of Data Science and Machine Learning","year":"2021","unstructured":"[n. d.]. Kaggle State of Data Science and Machine Learning 2021. https:\/\/www.kaggle.com\/kaggle-survey-2021. Accessed: 2022-04-16."},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. NVIDIA CUTLASS. https:\/\/github.com\/NVIDIA\/cutlass. Accessed: 2022-04-16."},{"key":"e_1_3_2_1_4_1","unstructured":"[n. d.]. scikit-learn : Machine Learning in Python. https:\/\/scikit-learn.org\/stable\/. Accessed: 2022-04-16."},{"key":"e_1_3_2_1_5_1","unstructured":"[n. d.]. Treelite : model compiler for decision tree ensembles. https:\/\/treelite.readthedocs.io\/en\/latest\/. Accessed: 2022-04-16."},{"key":"e_1_3_2_1_6_1","unstructured":"2019. RAPIDS Forest Inference Library: Prediction at 100 million rows per second. https:\/\/medium.com\/rapids-ai\/rapids-forest-inference-library-prediction-at-100-million-rows-per-second-19558890bc35. Accessed: 2024-04-15."},{"key":"e_1_3_2_1_7_1","unstructured":"2020. Intel Machine Learning Benchmarks. https:\/\/github.com\/IntelPython\/scikit-learn_bench."},{"key":"e_1_3_2_1_8_1","unstructured":"2020. The total cost of ownership of Amazon SageMaker. https:\/\/pages.awscloud.com\/rs\/112-TZM-766\/images\/Amazon_SageMaker_TCO_uf.pdf."},{"key":"e_1_3_2_1_9_1","unstructured":"2024. CUB: API Reference for CUB. https:\/\/docs.nvidia.com\/cuda\/cub\/index.html. Accessed: 2024-04-15."},{"key":"e_1_3_2_1_10_1","unstructured":"2024. RAPIDS: GPU Accelerated Data Science. https:\/\/rapids.ai\/. Accessed: 2024-04-15."},{"key":"e_1_3_2_1_11_1","unstructured":"2024. Thrust. https:\/\/developer.nvidia.com\/thrust. Accessed: 2024-04-15."},{"key":"e_1_3_2_1_12_1","unstructured":"2024. XGBoost GPU Support. https:\/\/xgboost.readthedocs.io\/en\/stable\/gpu\/index.html. Accessed: 2024-04-15."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2013.73"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-012-1196-7"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.17"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/3314872.3314896"},{"key":"e_1_3_2_1_17_1","unstructured":"Simon Boehm. [n. d.]. lleaves. https:\/\/github.com\/siboehm\/lleaves"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"e_1_3_2_1_19_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, 578--594. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/chen"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2013.01.012"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/301618.301661"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.30"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","unstructured":"Simon Garcia De Gonzalo Sitao Huang Juan G\u00f3mez-Luna Simon Hammond Onur Mutlu and Wen-mei Hwu. 2019. Automatic Generation of Warp-Level Primitives and Atomic Instructions for Fast and Portable Parallel Reduction on GPUs. In 2019 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO). 73--84. 10.1109\/CGO.2019.8661187","DOI":"10.1109\/CGO.2019.8661187"},{"key":"e_1_3_2_1_24_1","unstructured":"L\u00e9o Grinsztajn Edouard Oyallon and Ga\u00ebl Varoquaux. 2022. Why do tree-based models still outperform deep learning on tabular data? arXiv:2207.08815 [cs.LG]"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599933"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3282307"},{"key":"e_1_3_2_1_27_1","volume-title":"2014 IEEE International Parallel & Distributed Processing Symposium Workshops","author":"Jansson Karl","year":"2014","unstructured":"Karl Jansson, H\u00e5kan Sundell, and Henrik Bostr\u00f6m. 2014. gpuRF and gpuERT: Efficient and Scalable GPU Algorithms for Decision Tree Ensembles. 2014 IEEE International Parallel & Distributed Processing Symposium Workshops (2014), 1612--1621."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911520"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/2523721.2523770"},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","author":"Ke Guolin","year":"2017","unstructured":"Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. LightGBM: A Highly Efficient Gradient Boosting Decision Tree. In Proceedings of the 31st International Conference on Neural Information Processing Systems (Long Beach, California, USA) (NIPS'17). Curran Associates Inc., Red Hook, NY, USA, 3149--3157."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-011-9272-4"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","unstructured":"Vidhi Lalchand. 2020. Extracting more from boosted decision trees: A high energy physics case study. 10.48550\/ARXIV.2001.06033","DOI":"10.48550\/ARXIV.2001.06033"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2925426.2926261"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766462.2767733"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2914758"},{"key":"e_1_3_2_1_37_1","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Nakandala Supun","year":"2020","unstructured":"Supun Nakandala, Karla Saur, Gyeong-In Yu, Konstantinos Karanasos, Carlo Curino, Markus Weimer, and Matteo Interlandi. 2020. A Tensor Compiler for Unified Machine Learning Prediction Serving. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). USENIX Association, 899--917. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/nakandala"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00607-013-0343-z"},{"key":"e_1_3_2_1_39_1","unstructured":"Jongsoo Park Maxim Naumov Protonu Basu Summer Deng Aravind Kalaiah Daya Shanker Khudia James Law Parth Malani Andrey Malevich Nadathur Satish Juan Miguel Pino Martin Schatz Alexander Sidorov Viswanath Sivakumar Andrew Tulloch Xiaodong Wang Yiming Wu Hector Yuen Utku Diril Dmytro Dzhulgakov Kim M. Hazelwood Bill Jia Yangqing Jia Lin Qiao Vijay Rao Nadav Rotem Sungjoo Yoo and Mikhail Smelyanskiy. 2018. Deep Learning Inference in Facebook Data Centers: Characterization Performance Optimizations and Hardware Implications. CoRR abs\/1811.09886 (2018). arXiv:1811.09886 http:\/\/arxiv.org\/abs\/1811.09886"},{"key":"e_1_3_2_1_40_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32. Curran Associates, Inc., 8024--8035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/1993498.1993501"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00043"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1912.09536"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840306"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2967938.2967950"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2632215"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO53902.2022.9741285"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2021.11.011"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.5120\/2237-2860"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.5555\/3049832.3049863"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609525"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT58117.2023.00023"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2764454"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1802.04730"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"R. Clint Whaley and Jack Dongarra. 1998. Automatically Tuned Linear Algebra Software. In SuperComputing 1998: High Performance Networking and Computing.","DOI":"10.1109\/SC.1998.10004"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447786.3456251"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3276491"}],"event":{"name":"SOSP '24: ACM SIGOPS 30th Symposium on Operating Systems Principles","location":"Austin TX USA","acronym":"SOSP '24","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","USENIX"]},"container-title":["Proceedings of the ACM SIGOPS 30th Symposium on Operating Systems Principles"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3694715.3695958","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3694715.3695958","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:05:48Z","timestamp":1750291548000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3694715.3695958"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,4]]},"references-count":58,"alternative-id":["10.1145\/3694715.3695958","10.1145\/3694715"],"URL":"https:\/\/doi.org\/10.1145\/3694715.3695958","relation":{},"subject":[],"published":{"date-parts":[[2024,11,4]]},"assertion":[{"value":"2024-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}