{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T13:21:38Z","timestamp":1761744098611,"version":"build-2065373602"},"reference-count":38,"publisher":"Informa UK Limited","issue":"8","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62273320"],"award-info":[{"award-number":["62273320"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["International Journal of General Systems"],"published-print":{"date-parts":[[2025,11,17]]},"DOI":"10.1080\/03081079.2025.2474517","type":"journal-article","created":{"date-parts":[[2025,3,17]],"date-time":"2025-03-17T11:17:16Z","timestamp":1742210236000},"page":"1044-1070","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":0,"title":["Single trajectory-based policy optimization for discrete-time stochastic systems"],"prefix":"10.1080","volume":"54","author":[{"given":"Jing","family":"Lai","sequence":"first","affiliation":[{"name":"Hefei University of Technology","place":["Hefei, People's Republic of China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junlin","family":"Xiong","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China","place":["Hefei, People's Republic of China"]}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"301","published-online":{"date-parts":[[2025,3,17]]},"reference":[{"key":"e_1_3_2_2_1","volume-title":"Dynamic Programming and Optimal Control","author":"Bertsekas D. P.","year":"1995","unstructured":"Bertsekas, D. P. 1995. Dynamic Programming and Optimal Control. MA: Athena Scientific."},{"key":"e_1_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441"},{"key":"e_1_3_2_4_1","unstructured":"Bu J. A. Mesbahi M. Fazel and M. Mesbahi. 2019. \u201cLQR Through the Lens of First Order Methods: Discrete-Time Case.\u201d arXiv preprint arXiv:1907.08921."},{"key":"e_1_3_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2013.2277869"},{"key":"e_1_3_2_6_1","doi-asserted-by":"crossref","unstructured":"Carmona R. K. Hamidouche M. Lauri\u00e8re and Z. Tan. 2020. \u201cPolicy Optimization for Linear-Quadratic Zero-Sum Mean-Field Type Games.\u201d In IEEE Conference on Decision and Control Seoul South Korea pp. 1038\u20131043.","DOI":"10.1109\/CDC42340.2020.9303734"},{"key":"e_1_3_2_7_1","unstructured":"Dean S. H. Mania N. Matni B. Recht and S. Tu. 2018. \u201cRegret Bounds for Robust Adaptive Control of the Linear Quadratic Regulator.\u201d Advances in Neural Information Processing Systems 31 Montreal Canada pp. 4188\u20134197."},{"key":"e_1_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2023.3323316"},{"key":"e_1_3_2_9_1","unstructured":"Fazel M. R. Ge S. Kakade and M. Mesbahi. 2018. \u201clobal Convergence of Policy Gradient Methods for the Linear Quadratic Regulator.\u201d In International Conference on Machine Learning Stockholm Sweden pp. 1467\u20131476."},{"key":"e_1_3_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2023.3328993"},{"key":"e_1_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2019.09.008"},{"key":"e_1_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3037046"},{"key":"e_1_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139020411"},{"key":"e_1_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2022.3176439"},{"key":"e_1_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isatra.2023.11.011"},{"key":"e_1_3_2_16_1","doi-asserted-by":"publisher","DOI":"10.1002\/9781119132677"},{"key":"e_1_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1985.1103840"},{"key":"e_1_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2022.110685"},{"key":"e_1_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2016.2608799"},{"key":"e_1_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1987.1104686"},{"key":"e_1_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_22_1","unstructured":"Park Y. R. Rossi Z. Wen G. Wu and H. Zhao. 2020. \u201cStructured Policy Iteration for Linear Quadratic Regulator.\u201d In International Conference on Machine Learning Vienna Austria pp. 7521\u20137531."},{"key":"e_1_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1146\/control.2019.2.issue-1"},{"key":"e_1_3_2_24_1","unstructured":"Schacke K. 2013. \u201cOn the Kronecker Product.\u201d Master's Thesis University of Waterloo."},{"key":"e_1_3_2_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09997-9"},{"key":"e_1_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2017.2748922"},{"key":"e_1_3_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"e_1_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1214\/ECP.v16-1624"},{"key":"e_1_3_2_29_1","unstructured":"Tu S. and B. Recht. 2018. \u201cLeast-Squares Temporal Difference Learning for the Linear Quadratic Regulator.\u201d In International Conference on Machine Learning Stockholm Sweden pp.\u00a05005\u20135014."},{"key":"e_1_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1017\/9781108231596"},{"key":"e_1_3_2_31_1","doi-asserted-by":"publisher","DOI":"10.1088\/0034-4885\/79\/5\/053901"},{"key":"e_1_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2008.08.017"},{"key":"e_1_3_2_33_1","unstructured":"Wang Z. Y. Gao S. Wang M. M. Zavlanos A. Abate and K. H. Johansson. 2023. \u201cPolicy Evaluation in Distributional LQR.\u201d In Learning for Dynamics and Control Conference Philadelphia PA USA pp. 1245\u20131256."},{"key":"e_1_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2023.3284612"},{"key":"e_1_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2022.110486"},{"key":"e_1_3_2_36_1","unstructured":"Yang Z. Y. Chen M. Hong and Z. Wang. 2019. \u201cProvably Global Convergence of Actor-Critic: A Case for Linear Quadratic Regulator with Ergodic Cost.\u201d Advances in Neural Information Processing Systems 32 Vancouver Canada pp. 8353\u20138365."},{"key":"e_1_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1137\/20M1347942"},{"issue":"222","key":"e_1_3_2_38_1","first-page":"1","article-title":"Single Time-Scale Actor-Critic Method to Solve the Linear Quadratic Regulator with Convergence Guarantees","volume":"24","author":"Zhou M.","year":"2023","unstructured":"Zhou, M., and J. Lu. 2023. \u201cSingle Time-Scale Actor-Critic Method to Solve the Linear Quadratic Regulator with Convergence Guarantees.\u201d Journal of Machine Learning Research 24 (222): 1\u201334.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2012.05.057"}],"container-title":["International Journal of General Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/03081079.2025.2474517","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T08:24:08Z","timestamp":1761726248000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/03081079.2025.2474517"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,17]]},"references-count":38,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,11,17]]}},"alternative-id":["10.1080\/03081079.2025.2474517"],"URL":"https:\/\/doi.org\/10.1080\/03081079.2025.2474517","relation":{},"ISSN":["0308-1079","1563-5104"],"issn-type":[{"type":"print","value":"0308-1079"},{"type":"electronic","value":"1563-5104"}],"subject":[],"published":{"date-parts":[[2025,3,17]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=ggen20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=ggen20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2024-04-25","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-02-17","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-03-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}