{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T22:38:06Z","timestamp":1778279886315,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Novo Nordisk Fonden","doi-asserted-by":"publisher","award":["NNF22OC0079398"],"award-info":[{"award-number":["NNF22OC0079398"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3719159.3721223","type":"proceedings-article","created":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T19:45:39Z","timestamp":1743104739000},"page":"23-26","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Towards A Modular End-To-End Machine Learning Benchmarking Framework"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1052-3855","authenticated-orcid":false,"given":"Robert","family":"Bayer","sequence":"first","affiliation":[{"name":"IT University of Copenhagen, Denmark"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3451-5602","authenticated-orcid":false,"given":"Ties","family":"Robroek","sequence":"additional","affiliation":[{"name":"IT University of Copenhagen, Denmark"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6838-4854","authenticated-orcid":false,"given":"Pinar","family":"T\u00f6z\u00fcn","sequence":"additional","affiliation":[{"name":"IT University of Copenhagen, Denmark"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2020. Weights and Biases. https:\/\/wandb.ai\/site"},{"key":"e_1_3_2_1_2_1","unstructured":"Martin Abadi. 2016. TensorFlow: A system for large-scale machine learning. (2016) 21."},{"key":"e_1_3_2_1_3_1","unstructured":"Akari Asai et al. 2023. Self-RAG: Learning to Retrieve Generate and Critique through Self-Reflection."},{"key":"e_1_3_2_1_4_1","unstructured":"Colby Banbury et al. 2021. MLPerf Tiny Benchmark. arXiv:2106.07597 [cs]"},{"key":"e_1_3_2_1_5_1","unstructured":"Sebastian Baunsgaard et al. 2020. Training for Speech Recognition on Coprocessors. arXiv:2003.12366 (March 2020)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Robert Bayer et al. 2023. TPCx-AI on NVIDIA Jetsons. In Performance Evaluation and Benchmarking. Vol. 13860. Springer Nature Switzerland Cham 49--66.","DOI":"10.1007\/978-3-031-29576-8_4"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.14778\/3611540.3611554"},{"key":"e_1_3_2_1_8_1","unstructured":"Paul Elvinger et al. 2025. Measuring GPU utilization one level deeper."},{"key":"e_1_3_2_1_9_1","volume-title":"The Coral Dev Board Takes Google's AI to the Edge","author":"Spectrum IEEE","year":"2019","unstructured":"IEEE Spectrum. 2019. The Coral Dev Board Takes Google's AI to the Edge. IEEE Spectrum (2019). https:\/\/spectrum.ieee.org\/the-coral-dev-board-takes-googles-ai-to-the-edge"},{"key":"e_1_3_2_1_10_1","first-page":"352","article-title":"Mlperf Mobile Inference Benchmark: An Industry-Standard Open-Source Machine Learning Benchmark for on-Device Ai","volume":"4","author":"Reddi Vijay Janapa","year":"2022","unstructured":"Vijay Janapa Reddi, et al. 2022. Mlperf Mobile Inference Benchmark: An Industry-Standard Open-Source Machine Learning Benchmark for on-Device Ai. Proceedings of MLSys 4 (2022), 352--369.","journal-title":"Proceedings of MLSys"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 2019 USENIX Annual Technical Conference, USENIX ATC 2019","author":"Jeon Myeongjae","year":"2019","unstructured":"Myeongjae Jeon, et al. 2019. Analysis of Large-Scale Multi-Tenant GPU Clusters for DNN Training Workloads. In Proceedings of the 2019 USENIX Annual Technical Conference, USENIX ATC 2019, Renton, WA, USA, July 10-12, 2019. USENIX Association, 947--960."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.14778\/3342263.3342276"},{"key":"e_1_3_2_1_13_1","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, et al. 2020. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems, Vol. 33. Curran Associates, Inc., 9459--9474."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics. Association for Computational Linguistics, Abu Dhabi, UAE, 9760--9779","author":"Li Xinzhe","year":"2025","unstructured":"Xinzhe Li. 2025. A Review of Prominent Paradigms for LLM-Based Agents: Tool Use, Planning (Including RAG), and Feedback Learning. In Proceedings of the 31st International Conference on Computational Linguistics. Association for Computational Linguistics, Abu Dhabi, UAE, 9760--9779."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.14778\/3685800.3685916"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874254"},{"key":"e_1_3_2_1_17_1","unstructured":"Mourad Mourafiq. [n. d.]. Polyaxon: Cloud native machine learning platform. https:\/\/github.com\/polyaxon\/polyaxon"},{"key":"e_1_3_2_1_18_1","unstructured":"Vijay Janapa Reddi et al. 2020. MLPerf Inference Benchmark. arXiv:1911.02549 [cs stat] (May 2020). arXiv:1911.02549 [cs stat]"},{"key":"e_1_3_2_1_19_1","unstructured":"Vijay Janapa Reddi et al. 2020. MLPerf Mobile Inference Benchmark: Why Mobile AI Benchmarking Is Hard and What to Do about It. arXiv:2012.02328 (2020). arXiv:2012.02328"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3595360.3595851"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3642970.3655827"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629578"},{"key":"e_1_3_2_1_23_1","unstructured":"Neptune Team. 2019. neptune.ai. Technical Report. https:\/\/neptune.ai\/"},{"key":"e_1_3_2_1_24_1","unstructured":"torchtune maintainers. 2024. torchtune: PyTorch's finetuning library. https\/\/github.com\/pytorch\/torchtune"},{"key":"e_1_3_2_1_25_1","unstructured":"Peter Torelli. 2021. Measuring Inference Performance of Machine-Learning Frameworks on Edge-Class Devices with the Mlmark Benchmark. Techincal Report. Available online: https:\/\/www.eembc.org\/techlit\/articles\/MLMARK-WHITEPAPERFINAL-1.pdf (2021)."},{"key":"e_1_3_2_1_26_1","unstructured":"Shang Wang et al. 2021. Horizontally Fused Training Array: An Effective Hardware Utilization Squeezer for Training Novel Deep Learning Models. arXiv:2102.02344 [cs] (March 2021)."},{"key":"e_1_3_2_1_27_1","unstructured":"Thomas Wolf et al. 2020. HuggingFace's transformers: State-of-the-art natural language processing."},{"key":"e_1_3_2_1_28_1","unstructured":"Zeyu Yang et al. 2024. Part-time Power Measurements: nvidia-smi's Lack of Attention."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/3485849.3485855"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578356.3592589"},{"key":"e_1_3_2_1_31_1","first-page":"39","article-title":"Accelerating the machine learning lifecycle with MLflow","volume":"41","author":"Zaharia Matei","year":"2018","unstructured":"Matei Zaharia, et al. 2018. Accelerating the machine learning lifecycle with MLflow. IEEE Data Eng. Bull. 41, 4 (2018), 39--45.","journal-title":"IEEE Data Eng. Bull."}],"event":{"name":"EuroSys '25: Twentieth European Conference on Computer Systems","location":"Rotterdam Netherlands","acronym":"EuroSys '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 3rd International Workshop on Testing Distributed Internet of Things Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3719159.3721223","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719159.3721223","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T20:33:54Z","timestamp":1755981234000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3719159.3721223"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":31,"alternative-id":["10.1145\/3719159.3721223","10.1145\/3719159"],"URL":"https:\/\/doi.org\/10.1145\/3719159.3721223","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}