{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T20:42:48Z","timestamp":1757450568236,"version":"3.30.2"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1109\/mascots64422.2024.10786558","type":"proceedings-article","created":{"date-parts":[[2024,12,13]],"date-time":"2024-12-13T18:50:09Z","timestamp":1734115809000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["LLMPerf: GPU Performance Modeling meets Large Language Models"],"prefix":"10.1109","author":[{"given":"Minh-Khoi","family":"Nguyen-Nhat","sequence":"first","affiliation":[{"name":"FPT Software AI Center,Hanoi,Vietnam"}]},{"given":"Hoang Duy Nguyen","family":"Do","sequence":"additional","affiliation":[{"name":"FPT Software AI Center,Hanoi,Vietnam"}]},{"given":"Huyen Thao","family":"Le","sequence":"additional","affiliation":[{"name":"FPT Software AI Center,Hanoi,Vietnam"}]},{"given":"Thanh Tuan","family":"Dao","sequence":"additional","affiliation":[{"name":"FPT University,Hanoi,Vietnam"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555775"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2749246.2749265"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3126546"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/NAS.2011.51"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3522712"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2016.04.002"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CADS.2013.6714232"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3431731"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476221"},{"article-title":"Llama: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref10"},{"key":"ref11","article-title":"Teaching large language models to self-debug","author":"Chen","year":"2023","journal-title":"arXiv preprint arXiv:2304.05128"},{"key":"ref12","article-title":"Pytorch: An imperative style, highperformance deep learning library","volume":"abs\/1912.01703","author":"Paszke","year":"2019","journal-title":"CoRR"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3559009.3569644"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2017.7863731"},{"article-title":"Understanding latency hiding on GPUs","year":"2016","author":"Volkov","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2014.2333526"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-7563-6_53"},{"article-title":"Stanford alpaca: An instruction-following llama model","year":"2023","author":"Taori","key":"ref18"},{"key":"ref19","article-title":"Codegen: An open large language model for code with multi-turn program synthesis","author":"Nijkamp","year":"2023","journal-title":"ICLR"},{"year":"2","key":"ref20","article-title":"Opencl c++ bindings"},{"key":"ref21","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017","journal-title":"arXiv preprint arXiv:1711.05101"},{"key":"ref22","article-title":"Generalization and parameter estimation in feedforward nets: Some experiments","volume":"2","author":"Morgan","year":"1989","journal-title":"Advances in neural information processing systems"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735702"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"}],"event":{"name":"2024 32nd International Conference on Modeling, Analysis and Simulation of Computer and Telecommunication Systems (MASCOTS)","start":{"date-parts":[[2024,10,21]]},"location":"Krakow, Poland","end":{"date-parts":[[2024,10,23]]}},"container-title":["2024 32nd International Conference on Modeling, Analysis and Simulation of Computer and Telecommunication Systems (MASCOTS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10786488\/10786336\/10786558.pdf?arnumber=10786558","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,14]],"date-time":"2024-12-14T06:44:30Z","timestamp":1734158670000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10786558\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/mascots64422.2024.10786558","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]}}}