{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T03:21:13Z","timestamp":1740108073102,"version":"3.37.3"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2022,2,26]],"date-time":"2022-02-26T00:00:00Z","timestamp":1645833600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,2,26]],"date-time":"2022-02-26T00:00:00Z","timestamp":1645833600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s00521-022-06988-x","type":"journal-article","created":{"date-parts":[[2022,2,26]],"date-time":"2022-02-26T13:02:36Z","timestamp":1645880556000},"page":"10023-10037","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Improving actor-critic structure by relatively optimal historical information for discrete system"],"prefix":"10.1007","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9109-1889","authenticated-orcid":false,"given":"Xinyu","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Weidong","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xiaoke","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Xiao-Yuan","family":"Jing","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,2,26]]},"reference":[{"issue":"7587","key":"6988_CR1","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, van den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M, Dieleman S, Grewe D, Nham J, Kalchbrenner N, Sutskever I, Lillicrap TP, Leach M, Kavukcuoglu K, Graepel T, Hassabis D (2016) Mastering the game of go with deep neural networks and tree search. Nature 529(7587):484\u2013489","journal-title":"Nature"},{"issue":"4","key":"6988_CR2","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1109\/TCIAIG.2013.2294713","volume":"6","author":"MJ Hausknecht","year":"2014","unstructured":"Hausknecht MJ, Lehman J, Miikkulainen R, Stone P (2014) A neuroevolution approach to general atari game playing. IEEE Trans Comput Intell AI Games 6(4):355\u2013366","journal-title":"IEEE Trans Comput Intell AI Games"},{"issue":"1","key":"6988_CR3","doi-asserted-by":"publisher","first-page":"7","DOI":"10.3390\/robotics9010007","volume":"9","author":"E Omerdic","year":"2020","unstructured":"Omerdic E, Trslic P, Kaknjo A, Weir A, Rao M, Dooly G, Toal D (2020) Geometric insight into the control allocation problem for open-frame rovs and visualisation of solution. Robotics 9(1):7","journal-title":"Robotics"},{"issue":"5","key":"6988_CR4","doi-asserted-by":"publisher","first-page":"495","DOI":"10.1007\/s00422-018-0775-9","volume":"112","author":"S Kuwada","year":"2018","unstructured":"Kuwada S, Aota T, Uehara K, Nara S (2018) Application of chaos in a recurrent neural network to control in ill-posed problems: a novel autonomous robot arm. Biol Cybern 112(5):495\u2013508","journal-title":"Biol Cybern"},{"key":"6988_CR5","doi-asserted-by":"publisher","first-page":"410","DOI":"10.1016\/j.ins.2018.10.058","volume":"477","author":"X Xu","year":"2019","unstructured":"Xu X, Du Z, Chen X, Cai C (2019) Confidence consensus-based model for large-scale group decision making: a novel approach to managing non-cooperative behaviors. Inf Sci 477:410\u2013427","journal-title":"Inf Sci"},{"key":"6988_CR6","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1016\/j.knosys.2017.12.033","volume":"144","author":"F Meng","year":"2018","unstructured":"Meng F, Tang J, Wang P, Chen X (2018) A programming-based algorithm for interval-valued intuitionistic fuzzy group decision making. Knowl Based Syst 144:122\u2013143","journal-title":"Knowl Based Syst"},{"issue":"8","key":"6988_CR7","doi-asserted-by":"publisher","first-page":"2069","DOI":"10.1109\/TSMC.2016.2606647","volume":"47","author":"F Meng","year":"2017","unstructured":"Meng F, An Q, Tan C, Chen X (2017) An approach for group decision making with interval fuzzy preference relations based on additive consistency and consensus analysis. IEEE Trans Syst Man Cybern Syst 47(8):2069\u20132082","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"6988_CR8","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller MA (2014) Deterministic policy gradient algorithms. In: Proceedings of the 31th international conference on machine learning, ICML 2014, Beijing, China, 21\u201326 June, vol\u00a032 of JMLR workshop and conference proceedings, 2014, pp 387\u2013395"},{"issue":"12","key":"6988_CR9","doi-asserted-by":"publisher","first-page":"3534","DOI":"10.1109\/TNNLS.2018.2884797","volume":"30","author":"W Shi","year":"2019","unstructured":"Shi W, Song S, Wu C, Chen CLP (2019) Multi pseudo q-learning-based deterministic policy gradient for tracking control of autonomous underwater vehicles. IEEE Trans Neural Netw Learn Syst 30(12):3534\u20133546","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"1","key":"6988_CR10","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1109\/TII.2017.2718729","volume":"14","author":"J Otto","year":"2018","unstructured":"Otto J, Vogel-Heuser B, Niggemann O (2018) Automatic parameter estimation for reusable software components of modular and reconfigurable cyber-physical production systems in the domain of discrete manufacturing. IEEE Trans Ind Inform 14(1):275\u2013282","journal-title":"IEEE Trans Ind Inform"},{"key":"6988_CR11","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1016\/j.neucom.2020.01.079","volume":"390","author":"DA Sim\u00f5es","year":"2020","unstructured":"Sim\u00f5es DA, Lau N, Reis LP (2020) Multi-agent actor centralized-critic with communication. Neurocomputing 390:40\u201356","journal-title":"Neurocomputing"},{"issue":"7540","key":"6988_CR12","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller MA, Fidjeland A, Ostrovski G, Petersen S, Beattie C, Sadik A, Antonoglou I, King H, Kumaran D, Wierstra D, Legg S, Hassabis D (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"issue":"4","key":"6988_CR13","doi-asserted-by":"publisher","first-page":"1034","DOI":"10.1109\/TCCN.2019.2947550","volume":"5","author":"Y Wang","year":"2019","unstructured":"Wang Y, Li Y, Lan T, Aggarwal V (2019) Deepchunk: deep q-learning for chunk-based caching in wireless data processing networks. IEEE Trans Cogn Commun Netw 5(4):1034\u20131045","journal-title":"IEEE Trans Cogn Commun Netw"},{"key":"6988_CR14","doi-asserted-by":"publisher","first-page":"65569","DOI":"10.1109\/ACCESS.2019.2917141","volume":"7","author":"X Bu","year":"2019","unstructured":"Bu X (2019) Actor-critic reinforcement learning control of non-strict feedback nonaffine dynamic systems. IEEE Access 7:65569\u201365578","journal-title":"IEEE Access"},{"issue":"1","key":"6988_CR15","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1109\/JSYST.2019.2891520","volume":"14","author":"H Yang","year":"2020","unstructured":"Yang H, Xie X (2020) An actor-critic deep reinforcement learning approach for transmission scheduling in cognitive internet of things systems. IEEE Syst J 14(1):51\u201360","journal-title":"IEEE Syst J"},{"key":"6988_CR16","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: 4th International conference on learning representations, ICLR 2016, San Juan, Puerto Rico, May 2\u20134, 2016, conference track proceedings"},{"key":"6988_CR17","unstructured":"Degris T, White M, Sutton RS(2012) Linear off-policy actor-critic. In: Proceedings of the 29th international conference on machine learning, ICML, Edinburgh, Scotland, UK, June 26\u2013July 1, 2012"},{"key":"6988_CR18","unstructured":"Mnih V, Badia A.P, Mirza M, Graves A, Lillicrap T.P, Harley T, Silver D, Kavukcuoglu K (2016) Degris2012degris2012. In: Proceedings of the 33nd international conference on machine learning, ICML 2016, New York City, NY, USA, June 19\u201324, 2016"},{"key":"6988_CR19","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Dy JG, Krause A (eds) Proceedings of the 35th international conference on machine learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10\u201315, 2018, vol 80 of proceedings of machine learning research, pp 1856\u20131865"},{"issue":"9","key":"6988_CR20","doi-asserted-by":"publisher","first-page":"4227","DOI":"10.1007\/s00521-019-04330-6","volume":"32","author":"N Passalis","year":"2020","unstructured":"Passalis N, Tefas A (2020) Continuous drone control using deep reinforcement learning for frontal view person shooting. Neural Comput Appl 32(9):4227\u20134238","journal-title":"Neural Comput Appl"},{"key":"6988_CR21","doi-asserted-by":"crossref","unstructured":"Aboussalah AM, Lee C (2020) Continuous control with stacked deep dynamic recurrent reinforcement learning for portfolio optimization. Expert Syst Appl 140","DOI":"10.1016\/j.eswa.2019.112891"},{"issue":"11","key":"6988_CR22","doi-asserted-by":"publisher","first-page":"5174","DOI":"10.1109\/TNNLS.2018.2805379","volume":"29","author":"Z Yang","year":"2018","unstructured":"Yang Z, Merrick KE, Jin L, Abbass HA (2018) Hierarchical deep reinforcement learning for continuous action control. IEEE Trans Neural Netw Learn Syst 29(11):5174\u20135184","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"6988_CR23","doi-asserted-by":"publisher","first-page":"1522","DOI":"10.1109\/TIP.2019.2942814","volume":"29","author":"W Xu","year":"2020","unstructured":"Xu W, Miao Z, Yu J, Ji Q (2020) Deep reinforcement learning for weak human activity localization. IEEE Trans Image Process 29:1522\u20131535","journal-title":"IEEE Trans Image Process"},{"key":"6988_CR24","unstructured":"Zhang X, Ma H (2018) Pretraining deep actor-critic reinforcement learning algorithms with expert demonstrations. CoRR"},{"issue":"4","key":"6988_CR25","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J Peters","year":"2008","unstructured":"Peters J, Schaal S (2008) Reinforcement learning of motor skills with policy gradients. Neural Netw 21(4):682\u2013697","journal-title":"Neural Netw"},{"key":"6988_CR26","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1016\/j.neucom.2018.10.072","volume":"330","author":"Z Huang","year":"2019","unstructured":"Huang Z, Zhang Y, Liu Y, Zhang G (2019) Four actor-critic structures and algorithms for nonlinear multi-input multi-output system. Neurocomputing 330:172\u2013187","journal-title":"Neurocomputing"},{"key":"6988_CR27","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1016\/j.neunet.2018.10.007","volume":"109","author":"R Iwaki","year":"2019","unstructured":"Iwaki R, Asada M (2019) Implicit incremental natural actor critic algorithm. Neural Netw 109:103\u2013112","journal-title":"Neural Netw"},{"key":"6988_CR28","unstructured":"Gu S, Lillicrap TP, Ghahramani Z, Turner RE, Levine S (2017) Q-prop: Sample-efficient policy gradient with an off-policy critic. In: 5th International conference on learning representations, ICLR 2017, Toulon, France, April 24\u201326, 2017, conference track proceedings"},{"key":"6988_CR29","unstructured":"O\u2019Donoghue B, Munos R, Kavukcuoglu K, Mnih V (2017) Combining policy gradient and q-learning. In: 5th International conference on learning representations, ICLR 2017, Toulon, France, April 24\u201326, 2017, conference track proceedings"},{"issue":"5","key":"6988_CR30","doi-asserted-by":"publisher","first-page":"1041","DOI":"10.1109\/TCYB.2015.2421338","volume":"46","author":"R Song","year":"2016","unstructured":"Song R, Lewis FL, Wei Q, Zhang H (2016) Off-policy actor-critic structure for optimal control of unknown systems with disturbances. IEEE Trans Cybern 46(5):1041\u20131050","journal-title":"IEEE Trans Cybern"},{"key":"6988_CR31","unstructured":"Suttle W, Yang Z, Zhang K, Wang Z, Basar T, Liu J. A multi-agent off-policy actor-critic algorithm for distributed reinforcement learning. CoRR arXiv:1903.06372"},{"issue":"3","key":"6988_CR32","doi-asserted-by":"publisher","first-page":"540","DOI":"10.1080\/00207179.2017.1361044","volume":"92","author":"R Vrabel","year":"2019","unstructured":"Vrabel R (2019) Stabilisation and state trajectory tracking problem for nonlinear control systems in the presence of disturbances. Int J Control 92(3):540\u2013548","journal-title":"Int J Control"},{"issue":"1","key":"6988_CR33","first-page":"14","volume":"10","author":"MB Hafez","year":"2019","unstructured":"Hafez MB, Weber C, Kerzel M, Wermter S (2019) Deep intrinsically motivated continuous actor-critic for efficient robotic visuomotor skill learning. Paladyn 10(1):14\u201329","journal-title":"Paladyn"},{"key":"6988_CR34","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2004) Apprenticeship learning via inverse reinforcement learning. In: Brodley CE (ed.) Machine learning, proceedings of the twenty-first international conference (ICML 2004), Banff, Alberta, Canada, July 4\u20138, 2004, vol 69 of ACM international conference proceeding series","DOI":"10.1145\/1015330.1015430"},{"key":"6988_CR35","doi-asserted-by":"publisher","first-page":"678","DOI":"10.1007\/978-1-4899-7687-1_142","volume-title":"Encyclopedia of machine learning and data mining","author":"P Abbeel","year":"2017","unstructured":"Abbeel P, Ng AY (2017) Inverse reinforcement learning. In: Sammut C, Webb GI (eds) Encyclopedia of machine learning and data mining. Springer, Berlin, pp 678\u2013682"},{"key":"6988_CR36","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.neucom.2020.01.016","volume":"388","author":"G Zuo","year":"2020","unstructured":"Zuo G, Chen K, Lu J, Huang X (2020) Deterministic generative adversarial imitation learning. Neurocomputing 388:60\u201369","journal-title":"Neurocomputing"},{"key":"6988_CR37","unstructured":"Ho J, Gupta J.K, Ermon S (2016) Model-free imitation learning with policy optimization. In: Balcan M, Weinberger KQ (eds) Proceedings of the 33nd international conference on machine learning, ICML, New York City, NY, USA, June 19\u201324, vol 48 of JMLR workshop and conference proceedings, 2016, pp 2760\u20132769"},{"key":"6988_CR38","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1016\/j.compind.2018.04.021","volume":"105","author":"B Bhattacharya","year":"2019","unstructured":"Bhattacharya B, Winer E (2019) Augmented reality via expert demonstration authoring (AREDA). Comput Ind 105:61\u201379","journal-title":"Comput Ind"},{"key":"6988_CR39","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1016\/j.eswa.2018.06.035","volume":"112","author":"A Ezzeddine","year":"2018","unstructured":"Ezzeddine A, Mourad N, Araabi BN, Ahmadabadi MN (2018) Combination of learning from non-optimal demonstrations and feedbacks using inverse reinforcement learning and bayesian policy improvement. Expert Syst Appl 112:331\u2013341","journal-title":"Expert Syst Appl"},{"key":"6988_CR40","doi-asserted-by":"crossref","unstructured":"Yan T, Zhang W, Yang SX, Yu L (2019) Soft actor-critic reinforcement learning for robotic manipulator with hindsight experience replay. Int J Robotics Autom 34(5)","DOI":"10.2316\/J.2019.206-0216"},{"issue":"3\u20134","key":"6988_CR41","doi-asserted-by":"publisher","first-page":"2239","DOI":"10.1007\/s11042-019-08331-4","volume":"79","author":"Y Ming","year":"2020","unstructured":"Ming Y, Zhang Y (2020) Efficient scalable spatiotemporal visual tracking based on recurrent neural networks. Multimed Tools Appl 79(3\u20134):2239\u20132261","journal-title":"Multimed Tools Appl"},{"issue":"4","key":"6988_CR42","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1109\/LGRS.2019.2926776","volume":"17","author":"L Tian","year":"2020","unstructured":"Tian L, Li X, Ye Y, Xie P, Li Y (2020) A generative adversarial gated recurrent unit model for precipitation nowcasting. IEEE Geosci Remote Sens Lett 17(4):601\u2013605","journal-title":"IEEE Geosci Remote Sens Lett"},{"issue":"2","key":"6988_CR43","doi-asserted-by":"publisher","first-page":"1387","DOI":"10.1109\/LRA.2019.2895892","volume":"4","author":"M Pflueger","year":"2019","unstructured":"Pflueger M, Agha-Mohammadi A, Sukhatme GS (2019) Rover-irl: inverse reinforcement learning with soft value iteration networks for planetary rover path planning. IEEE Robotics Autom Lett 4(2):1387\u20131394","journal-title":"IEEE Robotics Autom Lett"},{"key":"6988_CR44","unstructured":"Hausknecht MJ, Stone P (2015) Deep recurrent q-learning for partially observable mdps. In: AAAI Fall symposia, Arlington, Virginia, USA, November 12\u201314, 2015, pp 29\u201337"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-06988-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-022-06988-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-06988-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T06:15:33Z","timestamp":1652681733000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-022-06988-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,26]]},"references-count":44,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["6988"],"URL":"https:\/\/doi.org\/10.1007\/s00521-022-06988-x","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2022,2,26]]},"assertion":[{"value":"10 November 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 January 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 February 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}