{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T07:23:36Z","timestamp":1769066616915,"version":"3.49.0"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"Fundamental Research Program of Shanxi Province","award":["202303021222300"],"award-info":[{"award-number":["202303021222300"]}]},{"name":"Fundamental Research Program of Shanxi Province","award":["202103021223352"],"award-info":[{"award-number":["202103021223352"]}]},{"name":"5th Discipline Leader Project of Taiyuan Institute of Technology"},{"name":"Shanxi Province Department of Education University Science and Technology Innovation Project","award":["2022L523"],"award-info":[{"award-number":["2022L523"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2024.3438930","type":"journal-article","created":{"date-parts":[[2024,8,5]],"date-time":"2024-08-05T17:57:27Z","timestamp":1722880647000},"page":"111653-111662","source":"Crossref","is-referenced-by-count":3,"title":["Satellite Communication Resource Scheduling Using a Dynamic Weight-Based Soft Actor Critic Reinforcement Learning"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5829-4353","authenticated-orcid":false,"given":"Zhimin","family":"Qiao","sequence":"first","affiliation":[{"name":"Department of Automation, Taiyuan Institute of Technology, Taiyuan, China"}]},{"given":"Weibo","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Automobile, Chang&#x2019;an University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8963-7723","authenticated-orcid":false,"given":"Feng","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Automation, Taiyuan Institute of Technology, Taiyuan, China"}]},{"given":"Yongwei","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Automation, Taiyuan Institute of Technology, Taiyuan, China"}]},{"given":"Ye","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Automation, Taiyuan Institute of Technology, Taiyuan, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.3028247"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.001.2200659"},{"issue":"9","key":"ref3","first-page":"40","article-title":"Dynamic channel assignment algorithm for fusion beam coverage in GMR-1 satellite mobile communication system","volume":"44","author":"Yanan","year":"2021","journal-title":"Mobile phone Lett."},{"issue":"5","key":"ref4","first-page":"99","article-title":"A feedback optimization access mechanism for satellite channel","volume":"40","author":"Yi","year":"2021","journal-title":"China Space Sci. Technol."},{"key":"ref5","first-page":"15","volume-title":"Research on Channel Allocation Strategies for Satellite Mobile Communication System Adapting to High Speed Terminal","author":"Jiani","year":"2017"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/LWC.2019.2949277"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2018.2867487"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IWCMC.2019.8766489"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC49053.2021.9417306"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1561\/2200000086"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2023.3296160"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3249745"},{"issue":"2","key":"ref13","doi-asserted-by":"crossref","first-page":"250","DOI":"10.1002\/1097-4628(20001010)78:2<250::AID-APP30>3.0.CO;2-Q","article-title":"Q-learning based relay selection strategy for hybrid satellite-terrestrial cooperative transmission","volume":"39","author":"Wang","year":"2021","journal-title":"J. Appl. Sci."},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2953809"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2907682"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ISWCS.2018.8491238"},{"issue":"4","key":"ref17","first-page":"1242","article-title":"Resource allocation algorithm for low earth orbit satellites oriented to user demand","volume":"44","author":"Chen","year":"2023","journal-title":"J. Comput. Appl."},{"issue":"4","key":"ref18","first-page":"543","article-title":"Downlink power allocation scheme for LEO satellites based on deep reinforcement learning","volume":"39","author":"Huaming","year":"2022","journal-title":"J. Univ. Chin. Acad. Sci."},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/LWC.2019.2908371"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2023.03.071"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2022.3151651"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-018-1296-x"},{"key":"ref23","article-title":"One forward is enough for neural network training via likelihood ratio method","author":"Jiang","year":"2023","journal-title":"arXiv:2305.08960"},{"key":"ref24","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv:1509.02971"},{"key":"ref25","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref26","first-page":"1","article-title":"Double Q-learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"23","author":"Hasselt"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.3389\/frcmn.2022.850781"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1561\/2200000071"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2020.2976053"},{"key":"ref31","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Duan"},{"key":"ref32","article-title":"Q-Prop: Sample-efficient policy gradient with an off-policy critic","author":"Gu","year":"2016","journal-title":"arXiv:1611.02247"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10380310\/10623480.pdf?arnumber=10623480","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T05:27:40Z","timestamp":1724131660000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10623480\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/access.2024.3438930","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}