{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T17:43:32Z","timestamp":1772905412363,"version":"3.50.1"},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,10,24]],"date-time":"2023-10-24T00:00:00Z","timestamp":1698105600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,24]],"date-time":"2023-10-24T00:00:00Z","timestamp":1698105600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"The National Nature Science Foundation of China","doi-asserted-by":"crossref","award":["61772120"],"award-info":[{"award-number":["61772120"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"The National Nature Science Foundation of China","doi-asserted-by":"crossref","award":["62276065"],"award-info":[{"award-number":["62276065"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s13042-023-01981-9","type":"journal-article","created":{"date-parts":[[2023,10,24]],"date-time":"2023-10-24T13:03:22Z","timestamp":1698152602000},"page":"1533-1552","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Multi-view reinforcement learning for sequential decision-making with insufficient state information"],"prefix":"10.1007","volume":"15","author":[{"given":"Min","family":"Li","sequence":"first","affiliation":[]},{"given":"William","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Shiping","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,24]]},"reference":[{"key":"1981_CR1","unstructured":"Littman ML, Algorithms for sequential decision-making, Brown University, 1996"},{"key":"1981_CR2","volume-title":"Learning and sequential decision making","author":"AG Barto","year":"1989","unstructured":"Barto AG, Sutton RS, Watkins C (1989) Learning and sequential decision making. University of Massachusetts Amherst, MA"},{"key":"1981_CR3","doi-asserted-by":"crossref","unstructured":"Lample G, Chaplot DS (2017) Playing fps games with deep reinforcement learning, in: Proceedings of AAAI Conference on Artificial Intelligence, San Francisco, California, USA, 2017, pp. 2140\u20132146","DOI":"10.1609\/aaai.v31i1.10827"},{"key":"1981_CR4","doi-asserted-by":"crossref","unstructured":"Littman M\u00a0L (1994) Markov games as a framework for multi-agent reinforcement learning, in: Machine learning proceedings 1994, Elsevier, pp. 157\u2013163","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"1981_CR5","doi-asserted-by":"crossref","unstructured":"Zheng L, Fiez T, Alumbaugh Z, Chasnov B, Ratliff LJ (2022) Stackelberg actor-critic: Game-theoretic reinforcement learning algorithms, in: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 9217\u20139224","DOI":"10.1609\/aaai.v36i8.20908"},{"issue":"6","key":"1981_CR6","doi-asserted-by":"publisher","first-page":"967","DOI":"10.1007\/s13042-014-0300-y","volume":"7","author":"Y Sholeh","year":"2016","unstructured":"Sholeh Y, Mohammad BNS, Ali K (2016) Reinforcement learning and neural networks for multi-agent nonzero-sum games of nonlinear constrained input systems. Int J Mach Learn Cybern 7(6):967\u2013980","journal-title":"Int J Mach Learn Cybern"},{"key":"1981_CR7","doi-asserted-by":"crossref","unstructured":"Johannink T, Bahl S, Nair A, Luo J, Kumar A, Loskyll M, Ojea JA, Solowjow E, Levine S (2019) Residual reinforcement learning for robot control, in: 2019 International Conference on Robotics and Automation, IEEE, Montreal, Canada, pp. 6023\u20136029","DOI":"10.1109\/ICRA.2019.8794127"},{"issue":"11","key":"1981_CR8","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: a survey. Int J Robot Res 32(11):1238\u20131274","journal-title":"Int J Robot Res"},{"key":"1981_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.117389","volume":"203","author":"Y Gui","year":"2022","unstructured":"Gui Y, Hu W, Rahmani A (2022) A reinforcement learning based artificial bee colony algorithm with application in robot path planning. Expert Syst Appl 203:117389","journal-title":"Expert Syst Appl"},{"key":"1981_CR10","first-page":"2025","volume":"2019","author":"A Folkers","year":"2019","unstructured":"Folkers A, Rick M, B\u00fcskens C (2019) Controlling an autonomous vehicle with deep reinforcement learning in, IEEE Intelligent Vehicles Symposium. IEEE, Paris France 2019:2025\u20132031","journal-title":"IEEE, Paris France"},{"key":"1981_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.116830","volume":"199","author":"M Noaeen","year":"2022","unstructured":"Noaeen M, Naik A, Goodman L, Crebo J, Abrar T, Abad ZSH, Bazzan AL, Far B (2022) Reinforcement learning in urban network traffic signal control: A systematic literature review. Expert Syst Appl 199:116830","journal-title":"Expert Syst Appl"},{"issue":"7896","key":"1981_CR12","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1038\/s41586-021-04357-7","volume":"602","author":"PR Wurman","year":"2022","unstructured":"Wurman PR, Barrett S, Kawamoto K, MacGlashan J, Subramanian K, Walsh TJ, Capobianco R, Devlic A, Eckert F, Fuchs F et al (2022) Outracing champion gran turismo drivers with deep reinforcement learning. Nature 602(7896):223\u2013228","journal-title":"Nature"},{"key":"1981_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108823","volume":"130","author":"F Yang","year":"2022","unstructured":"Yang F, Liu Y, Ding X, Ma F, Cao J (2022) Asymmetric cross-modal hashing with high-level semantic similarity. Pattern Recogn 130:108823","journal-title":"Pattern Recogn"},{"issue":"3","key":"1981_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2023.103301","volume":"60","author":"F Yang","year":"2023","unstructured":"Yang F, Ding X, Ma F, Tong D, Cao J (2023) Edmh: efficient discrete matrix factorization hashing for multi-modal similarity retrieval. Inform Process Manage 60(3):103301","journal-title":"Inform Process Manage"},{"key":"1981_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.109176","volume":"251","author":"F Yang","year":"2022","unstructured":"Yang F, Ding X, Liu Y, Ma F, Cao J (2022) Scalable semantic-enhanced supervised hashing for cross-modal retrieval. Knowl-Based Syst 251:109176","journal-title":"Knowl-Based Syst"},{"issue":"1","key":"1981_CR16","first-page":"99","volume":"24","author":"M-C Cristescu","year":"2021","unstructured":"Cristescu M-C (2021) Machine learning techniques for improving the performance metrics of functional verification. Sci Technol 24(1):99\u2013116","journal-title":"Sci Technol"},{"key":"1981_CR17","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1016\/j.neucom.2021.10.086","volume":"472","author":"J Li","year":"2022","unstructured":"Li J, Sun A, Guan Z, Cheema MA, Min G (2022) Real-time dynamic network learning for location inference modelling and computing. Neurocomputing 472:198\u2013200","journal-title":"Neurocomputing"},{"key":"1981_CR18","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1016\/j.ins.2021.11.051","volume":"585","author":"IA Zamfirache","year":"2022","unstructured":"Zamfirache IA, Precup R-E, Roman R-C, Petriu EM (2022) Policy iteration reinforcement learning-based control using a grey wolf optimizer algorithm. Inf Sci 585:162\u2013175","journal-title":"Inf Sci"},{"key":"1981_CR19","unstructured":"Sutton RS, Barto AG, Reinforcement learning: An introduction, MIT press, 2018"},{"key":"1981_CR20","doi-asserted-by":"publisher","first-page":"2935","DOI":"10.1109\/TSG.2022.3154718","volume":"13","author":"X Chen","year":"2022","unstructured":"Chen X, Qu G, Tang Y, Low S, Li N (2022) Reinforcement learning for selective key applications in power systems: recent advances and future challenges. IEEE Trans Smart Grid 13:2935","journal-title":"IEEE Trans Smart Grid"},{"key":"1981_CR21","first-page":"331","volume":"2","author":"ML Puterman","year":"1990","unstructured":"Puterman ML (1990) Markov decision processes. Handb Oper Res Manage Sci 2:331\u2013434","journal-title":"Handb Oper Res Manage Sci"},{"key":"1981_CR22","doi-asserted-by":"crossref","unstructured":"Otterlo MV, Wiering M (2012) Reinforcement learning and markov decision processes, in: Reinforcement learning, Springer, pp. 3\u201342","DOI":"10.1007\/978-3-642-27645-3_1"},{"key":"1981_CR23","unstructured":"Daswani M, Sunehag P, Hutter M (2013) Q-learning for history-based reinforcement learning, in: Asian Conference on Machine Learning, Canberra, Australia, pp. 213\u2013228"},{"key":"1981_CR24","unstructured":"Leike J (2016) Nonparametric general reinforcement learning, Ph.D. thesis, Australian National University"},{"issue":"1","key":"1981_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1287\/mnsc.28.1.1","volume":"28","author":"GE Monahan","year":"1982","unstructured":"Monahan GE (1982) State of the art - a survey of partially observable markov decision processes: theory, models, and algorithms. Manage Sci 28(1):1\u201316","journal-title":"Manage Sci"},{"key":"1981_CR26","doi-asserted-by":"crossref","unstructured":"Majeed SJ, Hutter M (2018) On q-learning convergence for non-markov decision processes, in: Proceedings of the 27th International Joint Conference on Artificial Intelligence, Stockholm, Sweden, pp. 2546\u20132552","DOI":"10.24963\/ijcai.2018\/353"},{"key":"1981_CR27","doi-asserted-by":"crossref","unstructured":"Bellemare MG, Ostrovski G, Guez A, Thomas P, Munos R (2016) Increasing the action gap: New operators for reinforcement learning, in: Proceedings of the AAAI Conference on Artificial Intelligence, Phoenix, Arizona USA","DOI":"10.1609\/aaai.v30i1.10303"},{"key":"1981_CR28","unstructured":"Melo FS (2001)Convergence of q-learning: A simple proof. Instit Syst Robot, Tech Rep 1\u20134"},{"key":"1981_CR29","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning, in: International Conference on Learning Representations, San Juan, Puerto Rico,"},{"key":"1981_CR30","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods, in: International Conference on Machine Learning, Stockholm, Sweden, pp. 1587\u20131596"},{"key":"1981_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108875","volume":"131","author":"M Li","year":"2022","unstructured":"Li M, Huang T, Zhu W (2022) Clustering experience replay for the effective exploitation in reinforcement learning. Pattern Recogn 131:108875","journal-title":"Pattern Recogn"},{"issue":"12","key":"1981_CR32","doi-asserted-by":"publisher","first-page":"3491","DOI":"10.1007\/s13042-021-01387-5","volume":"12","author":"M Li","year":"2021","unstructured":"Li M, Huang T, Zhu W (2021) Adaptive exploration policy for exploration-exploitation tradeoff in continuous action control optimization. Int J Mach Learn Cybern 12(12):3491\u20133501","journal-title":"Int J Mach Learn Cybern"},{"key":"1981_CR33","unstructured":"Konda VR, Tsitsiklis JN (2000) Actor-critic algorithms, in: Advances in Neural Information Processing Systems, Denver, CO, USA, pp. 1008\u20131014"},{"issue":"4","key":"1981_CR34","doi-asserted-by":"publisher","first-page":"1125","DOI":"10.1109\/TCCN.2019.2952909","volume":"5","author":"C Zhong","year":"2019","unstructured":"Zhong C, Lu Z, Gursoy MC, Velipasalar S (2019) A deep actor-critic reinforcement learning framework for dynamic multichannel access. IEEE Trans Cogn Commun Netw 5(4):1125\u20131139","journal-title":"IEEE Trans Cogn Commun Netw"},{"issue":"6","key":"1981_CR35","doi-asserted-by":"publisher","first-page":"1291","DOI":"10.1109\/TSMCC.2012.2218595","volume":"42","author":"I Grondman","year":"2012","unstructured":"Grondman I, Busoniu L, Lopes GA, Babuska R (2012) A survey of actor-critic reinforcement learning: standard and natural policy gradients. IEEE Trans Syst Man Cybern Part C (Appl Rev) 42(6):1291\u20131307","journal-title":"IEEE Trans Syst Man Cybern Part C (Appl Rev)"},{"key":"1981_CR36","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller M (2014) Deterministic policy gradient algorithms, in: International Conference on Machine Learning, Beijing, China, , pp. 1387\u20131395"},{"key":"1981_CR37","unstructured":"Sutton RS, McAllester D, Singh S, Mansour Y (2000) Policy gradient methods for reinforcement learning with function approximation, in: Advances in Neural Information Processing Systems, Denver, CO, USA, pp. 1057\u20131063"},{"key":"1981_CR38","unstructured":"Hasselt HV (2010) Double q-learning, in: Advances in Neural Information Processing Systems, Vancouver, Canada, pp. 2613\u20132621"},{"key":"1981_CR39","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning, in: Proceedings of the AAAI Conference on Artificial Intelligence, New York, USA","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"1981_CR40","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1016\/j.neucom.2021.10.004","volume":"468","author":"T Huang","year":"2022","unstructured":"Huang T, Li M, Qin X, Zhu W (2022) A cnn-based policy for optimizing continuous action control by learning state sequences. Neurocomputing 468:286\u2013295","journal-title":"Neurocomputing"},{"key":"1981_CR41","first-page":"4517","volume":"22","author":"J Zhao","year":"2022","unstructured":"Zhao J, Guan Z, Xu C, Zhao W, Chen E (2022) Charge prediction by constitutive elements matching of crimes. Proceed Thirty-First Int Joint Conf Artif Intell IJCAI 22:4517\u20134523","journal-title":"Proceed Thirty-First Int Joint Conf Artif Intell IJCAI"},{"issue":"2","key":"1981_CR42","doi-asserted-by":"publisher","first-page":"1456","DOI":"10.1109\/TII.2022.3206343","volume":"19","author":"C Xu","year":"2022","unstructured":"Xu C, Zhao W, Zhao J, Guan Z, Song X, Li J (2022) Uncertainty-aware multiview deep learning for internet of things applications. IEEE Trans Industr Inf 19(2):1456\u20131466","journal-title":"IEEE Trans Industr Inf"},{"key":"1981_CR43","doi-asserted-by":"crossref","unstructured":"Xu C, Guan Z, Zhao W, Niu Y, Wang Q, Wang Z (2018) Deep multi-view concept learning., in: IJCAI, Stockholm, pp. 2898\u20132904","DOI":"10.24963\/ijcai.2018\/402"},{"issue":"2","key":"1981_CR44","doi-asserted-by":"publisher","first-page":"814","DOI":"10.1109\/TNNLS.2020.2979532","volume":"32","author":"W Zhao","year":"2020","unstructured":"Zhao W, Xu C, Guan Z, Liu Y (2020) Multiview concept learning via deep matrix factorization. IEEE Trans Neural Netw Learn Syst 32(2):814\u2013825","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1981_CR45","first-page":"3933","volume":"7","author":"C Xu","year":"2019","unstructured":"Xu C, Guan Z, Zhao W, Wu H, Niu Y, Ling B (2019) Adversarial incomplete multi-view clustering. IJCAI 7:3933\u20133939","journal-title":"IJCAI"},{"issue":"10","key":"1981_CR46","doi-asserted-by":"publisher","first-page":"10490","DOI":"10.1109\/TCYB.2021.3062830","volume":"52","author":"C Xu","year":"2021","unstructured":"Xu C, Liu H, Guan Z, Wu X, Tan J, Ling B (2021) Adversarial incomplete multiview subspace clustering networks. IEEE Trans Cybern 52(10):10490\u201310503","journal-title":"IEEE Trans Cybern"},{"key":"1981_CR47","unstructured":"Li M, Wu L, Wang J, Bou\u00a0Ammar H (2019) Multi-view reinforcement learning, Advances in neural information processing systems 32 (2019)"},{"key":"1981_CR48","doi-asserted-by":"publisher","first-page":"2461","DOI":"10.1007\/s13042-020-01130-6","volume":"11","author":"Y Hu","year":"2020","unstructured":"Hu Y, Sun S, Xu X, Zhao J (2020) Attentive multi-view reinforcement learning. Int J Mach Learn Cybern 11:2461\u20132474","journal-title":"Int J Mach Learn Cybern"},{"key":"1981_CR49","unstructured":"Fan J, Li W, (2022) Dribo: Robust deep reinforcement learning via multi-view information bottleneck, in: International Conference on Machine Learning, PMLR, pp. 6074\u20136102"},{"key":"1981_CR50","unstructured":"Goodfellow I, Bengio Y. a Courville (2016) A, Deep learning, Vol.\u00a01, MIT press Cambridge"},{"key":"1981_CR51","unstructured":"Cai X, Nie F, Huang H (2013) Multi-view k-means clustering on big data, in: Proceedings of the 23th International Joint conference on artificial intelligence, Beijing China, pp. 2598\u20132604"},{"issue":"2","key":"1981_CR52","doi-asserted-by":"publisher","first-page":"816","DOI":"10.1109\/TKDE.2020.2986201","volume":"34","author":"J Han","year":"2020","unstructured":"Han J, Xu J, Nie F, Li X (2020) Multi-view k-means clustering with adaptive sparse memberships and weight allocation. IEEE Trans Knowl Data Eng 34(2):816\u2013827","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1981_CR53","doi-asserted-by":"publisher","first-page":"148","DOI":"10.1016\/j.neucom.2020.02.104","volume":"402","author":"L Fu","year":"2020","unstructured":"Fu L, Lin P, Vasilakos AV, Wang S (2020) An overview of recent multi-view clustering. Neurocomputing 402:148\u2013161","journal-title":"Neurocomputing"},{"key":"1981_CR54","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Mujoco Y(2012) A physics engine for model-based control, in: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, IEEE, Algarve, Portugal, pp. 5026\u20135033","DOI":"10.1109\/IROS.2012.6386109"},{"key":"1981_CR55","unstructured":"Palanisamy P (2018) Hands-On Intelligent Agents with OpenAI Gym: Your guide to developing AI agents using deep reinforcement learning, Packt Publishing Ltd"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-023-01981-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-023-01981-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-023-01981-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,10]],"date-time":"2024-03-10T17:25:01Z","timestamp":1710091501000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-023-01981-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,24]]},"references-count":55,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["1981"],"URL":"https:\/\/doi.org\/10.1007\/s13042-023-01981-9","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,24]]},"assertion":[{"value":"12 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 September 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 October 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}