{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T20:20:51Z","timestamp":1740169251563,"version":"3.37.3"},"reference-count":50,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/access.2022.3203401","type":"journal-article","created":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T19:36:34Z","timestamp":1662060994000},"page":"98048-98064","source":"Crossref","is-referenced-by-count":3,"title":["Deep Reinforcement Learning for System-on-Chip: Myths and Realities"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2967-4258","authenticated-orcid":false,"given":"Tegg Taekyong","family":"Sung","sequence":"first","affiliation":[{"name":"EpiSys Science Inc., Poway, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5447-5015","authenticated-orcid":false,"given":"Bo","family":"Ryu","sequence":"additional","affiliation":[{"name":"EpiSys Science Inc., Poway, CA, USA"}]}],"member":"263","reference":[{"volume-title":"ODROID-XU3","year":"2019","key":"ref1"},{"key":"ref2","first-page":"265","article-title":"TensorFlow: A system for large-scale machine learning","volume-title":"Proc. 12th USENIX Symp. Operating Syst. Design Implement. (OSDI)","author":"Abadi"},{"key":"ref3","first-page":"104","article-title":"An optimistic perspective on offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Agarwal"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/LCA.2021.3085505"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/TC.2020.2986963"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/CCGRID.2005.1558639"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1006\/jpdc.2000.1714"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1145\/2898442.2898444"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1007\/978-1-4614-0676-1"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1609\/aaai.v34i04.5739"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/ACCESS.2019.2948150"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1145\/1327452.1327492"},{"year":"2017","author":"Dhariwal","article-title":"Openai baselines","key":"ref13"},{"volume-title":"OpenAI Baselines","year":"2017","author":"Dhariwal","key":"ref14"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/TPDS.2020.3041829"},{"key":"ref16","first-page":"1263","article-title":"Neural message passing for quantum chemistry","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gilmer"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/LES.2021.3110426"},{"key":"ref18","first-page":"485","article-title":"Tiresias: A GPU cluster manager for distributed deep learning","volume-title":"Proc. 16th USENIX Symp. Netw. Syst. Design Implement. (NSDI)","author":"Gu"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1145\/3282307"},{"year":"2020","author":"Holt","article-title":"Novel learning-based task schedulers for domain-specific SoCs","key":"ref20"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/ICDCS.2019.00201"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1016\/j.jss.2016.07.006"},{"key":"ref23","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv:1412.6980"},{"key":"ref24","first-page":"1","article-title":"Actor-critic algorithms","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"12","author":"Konda"},{"year":"2022","author":"Krishnakumar","article-title":"Design run-time resource management of domain-specific systems on chip (DSSoCs)","key":"ref25"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1080\/09540091.2022.2052265"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1145\/3342195.3387547"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/TPDS.2021.3135876"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1145\/3529257"},{"key":"ref30","first-page":"289","article-title":"Themis: Fair and efficient GPU cluster scheduling","volume-title":"Proc. 17th USENIX Symp. Netw. Syst. Design Implement. (NSDI)","author":"Mahajan"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1145\/3005745.3005750"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1145\/3341302.3342080"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1145\/3516825"},{"year":"2020","author":"Moazzemi","article-title":"Runtime resource management of emerging applications in heterogeneous architectures","key":"ref34"},{"key":"ref35","first-page":"481","article-title":"Heterogeneity-aware cluster scheduling policies for deep learning workloads","volume-title":"Proc. 14th USENIX Symp. Operating Syst. Design Implement. (OSDI)","author":"Narayanan"},{"key":"ref36","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Proc. ICML","volume":"99","author":"Ng"},{"key":"ref37","article-title":"Neural heterogeneous scheduler","author":"Sung","year":"2019","journal-title":"arXiv:1906.03724"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.3390\/electronics9060936"},{"key":"ref39","article-title":"A scalable and reproducible system-on-chip simulation for reinforcement learning","author":"Sung","year":"2021","journal-title":"arXiv:2104.13187"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1109\/ICMLA52953.2021.00083"},{"doi-asserted-by":"publisher","key":"ref41","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref42","first-page":"1","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"12","author":"Sutton"},{"doi-asserted-by":"publisher","key":"ref43","DOI":"10.1145\/3357223.3362710"},{"doi-asserted-by":"publisher","key":"ref44","DOI":"10.1109\/HCW.1999.765092"},{"doi-asserted-by":"publisher","key":"ref45","DOI":"10.1145\/2901318.2901355"},{"key":"ref46","first-page":"1","article-title":"Attention is all you need","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Vaswani"},{"doi-asserted-by":"publisher","key":"ref47","DOI":"10.1145\/2523616.2523633"},{"key":"ref48","article-title":"StarCraft II: A new challenge for reinforcement learning","author":"Vinyals","year":"2017","journal-title":"arXiv:1708.04782"},{"key":"ref49","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","volume-title":"Proc. 13th USENIX Symp. Operating Syst. Design Implement. (OSDI)","author":"Xiao"},{"key":"ref50","first-page":"7154","article-title":"DAG-GNN: DAG structure learning with graph neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yu"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9668973\/09874880.pdf?arnumber=9874880","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T12:41:27Z","timestamp":1706791287000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9874880\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1109\/access.2022.3203401","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2022]]}}}