{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T16:32:43Z","timestamp":1772641963120,"version":"3.50.1"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T00:00:00Z","timestamp":1656288000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T00:00:00Z","timestamp":1656288000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6,27]]},"DOI":"10.1109\/netsoft54395.2022.9844032","type":"proceedings-article","created":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T19:34:09Z","timestamp":1659555249000},"page":"207-215","source":"Crossref","is-referenced-by-count":3,"title":["On the Training of Reinforcement Learning-based Algorithms in 5G and Beyond Radio Access Networks"],"prefix":"10.1109","author":[{"given":"I.","family":"Vila","sequence":"first","affiliation":[{"name":"Universitat Polit&#x00E8;cnica de Catalunya (UPC),Dept. of Signal Theory and Communications,Barcelona,Spain"}]},{"given":"J.","family":"Perez-Romero","sequence":"additional","affiliation":[{"name":"Universitat Polit&#x00E8;cnica de Catalunya (UPC),Dept. of Signal Theory and Communications,Barcelona,Spain"}]},{"given":"O.","family":"Sallent","sequence":"additional","affiliation":[{"name":"Universitat Polit&#x00E8;cnica de Catalunya (UPC),Dept. of Signal Theory and Communications,Barcelona,Spain"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Zero-touch network and Service Management (ZSM); Means of Automation","year":"2020"},{"key":"ref11","article-title":"Experiential Networked Intelligence (ENI); System Architecture","year":"2019"},{"key":"ref12","article-title":"Study on Artificial Intelligence (AI)\/Machine Learning (ML) for NR Air Interface","year":"2021","journal-title":"3GPP TSG RAN Meeting"},{"key":"ref13","article-title":"Reinforcement Learning: An Introduction","author":"sutton","year":"2018"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2986050"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/VTCFall.2015.7390815"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2935010"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/WD.2019.8734192"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2018.2846401"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CCNC49033.2022.9700652"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2904897"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2018.1701031"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MVT.2019.2919236"},{"key":"ref8","article-title":"AI\/ML Workflow Description and Requirements 01.01","year":"2021"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CSCI49370.2019.00203"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.2965856"},{"key":"ref9","article-title":"Architectural framework for machine learning in future networks including IMT-2020","year":"2019"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2924243"},{"key":"ref20","article-title":"UnityFlexML: Training Reinforcement Learning Agents in a Simulated Surgical Environment","author":"tagliabue","year":"2020","journal-title":"I-RIM Conf"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2828859"},{"key":"ref21","article-title":"Offline Reinforcement Learning: Tutorial, Review and Perspectives on Open Problems","author":"levine","year":"2020"},{"key":"ref24","article-title":"Management and orchestration; 5G Network Resource Model (NRM) (Release 16)","year":"2020"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2021.3099557"},{"key":"ref26","article-title":"O-RAN Architecture Description version 5.00","year":"2021","journal-title":"O-RAN Alliance Working Group 1 Technical specification"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"}],"event":{"name":"2022 IEEE 8th International Conference on Network Softwarization (NetSoft)","location":"Milan, Italy","start":{"date-parts":[[2022,6,27]]},"end":{"date-parts":[[2022,7,1]]}},"container-title":["2022 IEEE 8th International Conference on Network Softwarization (NetSoft)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9844017\/9844019\/09844032.pdf?arnumber=9844032","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,22]],"date-time":"2022-08-22T20:02:17Z","timestamp":1661198537000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9844032\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,27]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/netsoft54395.2022.9844032","relation":{},"subject":[],"published":{"date-parts":[[2022,6,27]]}}}