{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T23:43:08Z","timestamp":1764027788883,"version":"3.37.3"},"reference-count":119,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2023,3,22]],"date-time":"2023-03-22T00:00:00Z","timestamp":1679443200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,3,22]],"date-time":"2023-03-22T00:00:00Z","timestamp":1679443200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s10462-023-10450-2","type":"journal-article","created":{"date-parts":[[2023,3,22]],"date-time":"2023-03-22T03:03:11Z","timestamp":1679454191000},"page":"12655-12688","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Reinforcement learning architecture for cyber\u2013physical\u2013social AI: state-of-the-art and perspectives"],"prefix":"10.1007","volume":"56","author":[{"given":"Xue","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1261-8687","authenticated-orcid":false,"given":"Puming","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Qian","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Saowen","family":"Yao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,22]]},"reference":[{"key":"10450_CR1","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1016\/j.comcom.2022.09.029","volume":"196","author":"P Almasan","year":"2022","unstructured":"Almasan P, Su\u00e1rez-Varela J, Rusek K, Barlet-Ros P, Cabellos-Aparicio A (2022) Deep reinforcement learning meets graph neural networks: exploring a routing optimization use case. Comput Commun 196:184\u2013194","journal-title":"Comput Commun"},{"issue":"6","key":"10450_CR2","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"Arulkumaran K, Deisenroth MP, Brundage M, Bharath AA (2017) Deep reinforcement learning: a brief survey. IEEE Signal Process Mag 34(6):26\u201338","journal-title":"IEEE Signal Process Mag"},{"key":"10450_CR4","doi-asserted-by":"crossref","unstructured":"Barto AG (2018) Some learning tasks from a control perspective. In: 1990 Lectures in complex systems. CRC Press, pp 195\u2013224","DOI":"10.1201\/9780429503573-7"},{"key":"10450_CR3","volume-title":"Neuronlike adaptive elements that can solve difficult learning control problems","author":"AG Barto","year":"1988","unstructured":"Barto AG, Sutton RS, Anderson CW (1988) Neuronlike adaptive elements that can solve difficult learning control problems. MIT Press, Cambridge"},{"issue":"1","key":"10450_CR5","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J Baxter","year":"2001","unstructured":"Baxter J, Bartlett PL (2001) Infinite-horizon policy-gradient estimation. J Artif Intell Res 15(1):319\u2013350","journal-title":"J Artif Intell Res"},{"key":"10450_CR6","doi-asserted-by":"crossref","unstructured":"Bellemare MG, Veness J, Bowling M (2012) Investigating contingency awareness using Atari 2600 games. In: Twenty-sixth AAAI conference on artificial intelligence, 2012, pp 864\u2013871","DOI":"10.1609\/aaai.v26i1.8321"},{"issue":"3731","key":"10450_CR7","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1126\/science.153.3731.34","volume":"153","author":"R Bellman","year":"1966","unstructured":"Bellman R (1966) Dynamic programming. Science 153(3731):34\u201337","journal-title":"Science"},{"key":"10450_CR8","doi-asserted-by":"crossref","first-page":"125290","DOI":"10.1016\/j.energy.2022.125290","volume":"261","author":"C Blad","year":"2022","unstructured":"Blad C, Bogh S, Kallesoe CS (2022) Data-driven offline reinforcement learning for HVAC-systems. Energy 261:125290","journal-title":"Energy"},{"issue":"2","key":"10450_CR9","doi-asserted-by":"crossref","first-page":"542","DOI":"10.1137\/100804577","volume":"34","author":"M Brazell","year":"2013","unstructured":"Brazell M, Li N, Navasca C, Tamon C (2013) Solving multilinear systems via tensor inversion. SIAM J Matrix Anal Appl 34(2):542\u2013570","journal-title":"SIAM J Matrix Anal Appl"},{"issue":"2","key":"10450_CR10","doi-asserted-by":"crossref","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu L, Babuska R, De Schutter B (2008) A comprehensive survey of multiagent reinforcement learning. IEEE Trans Syst Man Cybern C 38(2):156\u2013172","journal-title":"IEEE Trans Syst Man Cybern C"},{"key":"10450_CR11","doi-asserted-by":"crossref","first-page":"8","DOI":"10.1016\/j.arcontrol.2018.09.005","volume":"46","author":"L Bu\u015foniu","year":"2018","unstructured":"Bu\u015foniu L, de Bruin T, Toli\u0107 D, Kober J, Palunko I (2018) Reinforcement learning for control: performance, stability, and deep approximators. Annu Rev Control 46:8\u201328","journal-title":"Annu Rev Control"},{"issue":"7","key":"10450_CR12","doi-asserted-by":"crossref","first-page":"1759","DOI":"10.1175\/1520-0442(1997)010<1759:SCOSVD>2.0.CO;2","volume":"10","author":"S Cherry","year":"1997","unstructured":"Cherry S (1997) Some comments on singular value decomposition analysis. J Clim 10(7):1759\u20131761","journal-title":"J Clim"},{"key":"10450_CR13","doi-asserted-by":"crossref","unstructured":"Curran W, Brys T, Aha D, Taylor M, Smart WD (2016) Dimensionality reduced reinforcement learning for assistive robots. In: Proceedings of artificial intelligence for human\u2013robot interaction at AAAI fall symposium series, 2016","DOI":"10.1109\/IROS.2017.8205962"},{"key":"10450_CR14","doi-asserted-by":"crossref","unstructured":"D\u00f6nderler ME, Ulusoy \u00d6, G\u00fcd\u00fckbay U (2000) A rule-based approach to represent spatio-temporal relations in video data. In: International conference on advances in information systems, 2000. Springer, pp 409\u2013418","DOI":"10.1007\/3-540-40888-6_39"},{"key":"10450_CR15","volume-title":"A local graph clustering algorithm for discovering subgoals in reinforcement learning","author":"N Entezari","year":"2010","unstructured":"Entezari N, Shiri ME, Moradi P (2010) A local graph clustering algorithm for discovering subgoals in reinforcement learning. Springer, Berlin"},{"issue":"12","key":"10450_CR16","doi-asserted-by":"crossref","first-page":"4903","DOI":"10.1109\/TPDS.2022.3205325","volume":"33","author":"Y Fan","year":"2022","unstructured":"Fan Y, Li B, Favorite D, Singh N, Childers T, Rich P, Allcock W, Papka ME, Lan Z (2022) DRAS: deep reinforcement learning for cluster scheduling in high performance computing. IEEE Trans Parallel Distrib Syst 33(12):4903\u20134917","journal-title":"IEEE Trans Parallel Distrib Syst"},{"issue":"3","key":"10450_CR17","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1109\/TBDATA.2018.2803841","volume":"5","author":"J Feng","year":"2019","unstructured":"Feng J, Yang LT, Dai G, Wang W, Zou D (2019) A secure high-order Lanczos-based orthogonal tensor SVD for big data reduction in cloud environment. IEEE Trans Big Data 5(3):355\u2013367","journal-title":"IEEE Trans Big Data"},{"key":"10450_CR18","unstructured":"Fujita Y, Maeda S-I (n.d.) Clipped action policy gradient. arXiv preprint. arXiv:1802.07564"},{"key":"10450_CR19","doi-asserted-by":"crossref","first-page":"112542","DOI":"10.1016\/j.enbuild.2022.112542","volume":"277","author":"Y Fulpagare","year":"2022","unstructured":"Fulpagare Y, Huang K-R, Liao Y-H, Wang C-C (2022) Optimal energy management for air cooled server fans using deep reinforcement learning control method. Energy Build 277:112542","journal-title":"Energy Build"},{"issue":"1","key":"10450_CR20","first-page":"86","volume":"30","author":"Y Gao","year":"2004","unstructured":"Gao Y, Chen S-F, Lu X (2004) Research on reinforcement learning technology: a review. Acta Autom Sin 30(1):86\u2013100","journal-title":"Acta Autom Sin"},{"issue":"1","key":"10450_CR21","first-page":"1437","volume":"16","author":"J Garc\u0131a","year":"2015","unstructured":"Garc\u0131a J, Fern\u00e1ndez F (2015) A comprehensive survey on safe reinforcement learning. J Mach Learn Res 16(1):1437\u20131480","journal-title":"J Mach Learn Res"},{"key":"10450_CR22","volume-title":"Game theory for applied economists","author":"RS Gibbons","year":"1992","unstructured":"Gibbons RS (1992) Game theory for applied economists. Princeton University Press, Princeton"},{"issue":"5","key":"10450_CR23","doi-asserted-by":"crossref","first-page":"1256","DOI":"10.1109\/TSMCB.2007.899419","volume":"37","author":"S Girgin","year":"2007","unstructured":"Girgin S, Polat F, Alhajj R (2007) Positive impact of state similarity on reinforcement learning performance. IEEE Trans Syst Man Cybern B 37(5):1256\u20131270","journal-title":"IEEE Trans Syst Man Cybern B"},{"issue":"2","key":"10450_CR24","doi-asserted-by":"crossref","first-page":"178","DOI":"10.1287\/ijoc.1080.0305","volume":"21","author":"A Gosavi","year":"2009","unstructured":"Gosavi A (2009) Reinforcement learning: a tutorial survey and recent advances. INFORMS J Comput 21(2):178\u2013192","journal-title":"INFORMS J Comput"},{"issue":"6","key":"10450_CR25","doi-asserted-by":"crossref","first-page":"1291","DOI":"10.1109\/TSMCC.2012.2218595","volume":"42","author":"I Grondman","year":"2012","unstructured":"Grondman I, Busoniu L, Lopes GA, Babuska R (2012) A survey of actor\u2013critic reinforcement learning: standard and natural policy gradients. IEEE Trans Syst Man Cybern C 42(6):1291\u20131307","journal-title":"IEEE Trans Syst Man Cybern C"},{"issue":"1","key":"10450_CR26","first-page":"7","volume":"48","author":"B Guo","year":"2015","unstructured":"Guo B, Wang Z, Yu Z, Wang Y, Yen NY, Huang R, Zhou X (2015) Mobile crowd sensing and computing: the review of an emerging human-powered sensing paradigm. ACM Comput Surv 48(1):7","journal-title":"ACM Comput Surv"},{"key":"10450_CR27","unstructured":"Hausknecht M, Stone P (2015) Deep recurrent Q-learning for partially observable MDPS. In: AAAI fall symposium on sequential decision making for intelligent agents, 2015"},{"issue":"6","key":"10450_CR28","doi-asserted-by":"crossref","first-page":"750","DOI":"10.1007\/s10458-019-09421-1","volume":"33","author":"P Hernandez-Leal","year":"2019","unstructured":"Hernandez-Leal P, Kartal B, Taylor ME (2019) A survey and critique of multiagent deep reinforcement learning. Auton Agents Multi-agent Syst 33(6):750\u2013797","journal-title":"Auton Agents Multi-agent Syst"},{"key":"10450_CR29","doi-asserted-by":"crossref","unstructured":"Huang BQ, Cao GY, Guo M (2005) Reinforcement learning neural network to the problem of autonomous mobile robot obstacle avoidance. In: Proceedings of 2005 international conference on machine learning and cybernetics, 2005, vol 1. IEEE, pp 85\u201389","DOI":"10.1109\/ICMLC.2005.1526924"},{"key":"10450_CR30","doi-asserted-by":"crossref","unstructured":"Jing Y, Jiang W, Su G, Zhou Z, Wang Y (2014) A learning automata-based singular value decomposition and its application in recommendation system. In: International conference on intelligent computing, 2014. Springer","DOI":"10.1007\/978-3-319-09339-0_3"},{"key":"10450_CR31","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML, Moore AW (1996) Reinforcement learning: a survey. J Artif Intell Res 4:237\u2013285","journal-title":"J Artif Intell Res"},{"issue":"Special Centenn","key":"10450_CR32","first-page":"1287","volume":"100","author":"KD Kim","year":"2012","unstructured":"Kim KD, Kumar PR (2012) Cyber\u2013physical systems: a perspective at the centennial. Proc IEEE 100(Special Centennial Issue):1287\u20131308","journal-title":"Proc IEEE"},{"key":"10450_CR33","unstructured":"Kious D, Tarrs P (n.d.) Reinforcement learning in social networks. arXiv preprint. arXiv:1601.00667"},{"issue":"11","key":"10450_CR34","doi-asserted-by":"crossref","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: a survey. Int J Robot Res 32(11):1238\u20131274","journal-title":"Int J Robot Res"},{"issue":"4","key":"10450_CR35","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V Konda","year":"1999","unstructured":"Konda V (1999) Actor\u2013critic algorithms. SIAM J Control Optim 42(4):1143\u20131166","journal-title":"SIAM J Control Optim"},{"key":"10450_CR37","doi-asserted-by":"crossref","unstructured":"Kov\u00e1cs S (2001) SVD reduction in continuous environment reinforcement learning. In: International conference on computational intelligence, 2001. Springer, pp 719\u2013738","DOI":"10.1007\/3-540-45493-4_71"},{"key":"10450_CR36","first-page":"107","volume":"1","author":"S Kov\u00e1cs","year":"2003","unstructured":"Kov\u00e1cs S, Baranyi PZ (2003) Fuzzy q-learning in SVD reduced dynamic state-space. Prod Syst Inf Eng Publ Univ Miskolc 1:107\u2013124","journal-title":"Prod Syst Inf Eng Publ Univ Miskolc"},{"key":"10450_CR38","doi-asserted-by":"crossref","unstructured":"Krodel M, Kuhnert KD (2002) Reinforcement learning to drive a car by pattern matching. In: IECON, 2002, pp 1728\u20131733","DOI":"10.1007\/3-540-45783-6_39"},{"issue":"3","key":"10450_CR39","doi-asserted-by":"crossref","first-page":"280","DOI":"10.1109\/TETC.2014.2330516","volume":"2","author":"L Kuang","year":"2014","unstructured":"Kuang L, Hao F, Yang LT, Lin M (2014) A tensor-based approach for big data representation and dimensionality reduction. IEEE Trans Emerg Top Comput 2(3):280\u2013291","journal-title":"IEEE Trans Emerg Top Comput"},{"key":"10450_CR40","doi-asserted-by":"crossref","unstructured":"Lahat D, Adali T, Jutten C (2015) Multimodal data fusion: an overview of methods, challenges and prospects. In: Proceedings of the IEEE, 2015, pp 1449\u20131477","DOI":"10.1109\/JPROC.2015.2460697"},{"issue":"3","key":"10450_CR41","doi-asserted-by":"crossref","first-page":"1722","DOI":"10.1109\/COMST.2020.2988367","volume":"22","author":"L Lei","year":"2020","unstructured":"Lei L, Tan Y, Zheng K, Liu S, Zhang K, Shen X (2020) Deep reinforcement learning for autonomous internet of things: model, applications and challenges. IEEE Commun Surv Tutor 22(3):1722\u20131760","journal-title":"IEEE Commun Surv Tutor"},{"key":"10450_CR43","doi-asserted-by":"crossref","unstructured":"Levine S, Wagener N, Abbeel P (2015) Learning contact-rich manipulation skills with guided policy search. In: IEEE international conference on robotics and automation, 2015, pp 156\u2013163","DOI":"10.1109\/ICRA.2015.7138994"},{"issue":"4\u20135","key":"10450_CR42","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1177\/0278364917710318","volume":"37","author":"S Levine","year":"2018","unstructured":"Levine S, Pastor P, Krizhevsky A, Ibarz J, Quillen D (2018) Learning hand\u2013eye coordination for robotic grasping with deep learning and large-scale data collection. Int J Robot Res 37(4\u20135):421\u2013436","journal-title":"Int J Robot Res"},{"key":"10450_CR44","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (n.d.) Continuous control with deep reinforcement learning. arXiv preprint. arXiv:1509.02971"},{"issue":"4","key":"10450_CR46","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1109\/MIS.2011.69","volume":"26","author":"Z Liu","year":"2011","unstructured":"Liu Z, Yang DS, Wen D, Zhang WM, Mao W (2011) Cyber\u2013physical\u2013social systems for command and control. IEEE Intell Syst 26(4):92\u201396","journal-title":"IEEE Intell Syst"},{"issue":"1","key":"10450_CR47","first-page":"1","volume":"41","author":"Q Liu","year":"2018","unstructured":"Liu Q, Zhai JW, Zhang Z-Z, Zhong S, Zhou Q, Zhang P, Xu J (2018) A survey on deep reinforcement learning. Chin J Comput 41(1):1\u201327","journal-title":"Chin J Comput"},{"key":"10450_CR45","doi-asserted-by":"crossref","first-page":"118926","DOI":"10.1016\/j.eswa.2022.118926","volume":"213","author":"H Liu","year":"2023","unstructured":"Liu H, Cai K, Li P, Qian C, Zhao P, Wu X (2023) REDRL: a review-enhanced deep reinforcement learning model for interactive recommendation. Expert Syst Appl 213:118926","journal-title":"Expert Syst Appl"},{"issue":"4","key":"10450_CR48","doi-asserted-by":"crossref","first-page":"3133","DOI":"10.1109\/COMST.2019.2916583","volume":"21","author":"NC Luong","year":"2019","unstructured":"Luong NC, Hoang DT, Gong S, Niyato D, Wang P, Liang Y-C, Kim DI (2019) Applications of deep reinforcement learning in communications and networking: a survey. IEEE Commun Surv Tutor 21(4):3133\u20133174","journal-title":"IEEE Commun Surv Tutor"},{"issue":"1","key":"10450_CR49","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1108\/17427370580000113","volume":"1","author":"J Ma","year":"2005","unstructured":"Ma J, Yang LT, Apduhan BO, Huang R, Barolli L, Takizawa M (2005) Towards a smart world and ubiquitous intelligence: a walkthrough from smart things to smart hyperspaces and UbicKids. Int J Pervasive Comput Commun 1(1):53\u201368","journal-title":"Int J Pervasive Comput Commun"},{"key":"10450_CR50","unstructured":"Machado MC, Bowling M (n.d.) Learning purposeful behaviour in the absence of rewards. arXiv preprint. arXiv:1605.07700"},{"issue":"1\u20133","key":"10450_CR51","first-page":"159","volume":"22","author":"S Mahadevan","year":"1996","unstructured":"Mahadevan S (1996) Average reward reinforcement learning: foundations, algorithms, and empirical results. Mach Learn 22(1\u20133):159\u2013195","journal-title":"Mach Learn"},{"issue":"1","key":"10450_CR52","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1017\/S0269888912000057","volume":"27","author":"L Matignon","year":"2012","unstructured":"Matignon L, Laurent GJ, Le Fort-Piat N (2012) Independent reinforcement learners in cooperative Markov games: a survey regarding coordination problems. Knowl Eng Rev 27(1):1\u201331","journal-title":"Knowl Eng Rev"},{"issue":"6","key":"10450_CR53","doi-asserted-by":"crossref","first-page":"1087","DOI":"10.1063\/1.1699114","volume":"21","author":"N Metropolis","year":"1953","unstructured":"Metropolis N, Rosenbluth AW, Rosenbluth MN, Teller AH, Teller E (1953) Equation of state by fast computing machines. J Chem Phys 21(6):1087\u20131092","journal-title":"J Chem Phys"},{"issue":"2","key":"10450_CR54","first-page":"137","volume":"2","author":"D Michie","year":"1968","unstructured":"Michie D, Chambers RA (1968) BOXES: an experiment in adaptive control. Mach Intell 2(2):137\u2013152","journal-title":"Mach Intell"},{"key":"10450_CR55","doi-asserted-by":"crossref","unstructured":"Milner EC (n.d.) The theory of graphs and its applications. J Lond Math Soc s1-39(1)","DOI":"10.1112\/jlms\/s1-39.1.186"},{"issue":"1","key":"10450_CR56","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TCSS.2016.2561200","volume":"3","author":"S Misra","year":"2016","unstructured":"Misra S, Goswami S, Taneja C (2016) Multivariate data fusion-based learning of video content and service distribution for cyber physical social systems. IEEE Trans Comput Soc Syst 3(1):1\u201312","journal-title":"IEEE Trans Comput Soc Syst"},{"issue":"7540","key":"10450_CR57","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529","journal-title":"Nature"},{"issue":"1","key":"10450_CR58","first-page":"253","volume":"47","author":"Y Naddaf","year":"2013","unstructured":"Naddaf Y, Naddaf Y, Veness J, Bowling M (2013) The Arcade learning environment: an evaluation platform for general agents. J Artif Intell Res 47(1):253\u2013279","journal-title":"J Artif Intell Res"},{"key":"10450_CR59","doi-asserted-by":"crossref","first-page":"27091","DOI":"10.1109\/ACCESS.2017.2777827","volume":"5","author":"ND Nguyen","year":"2017","unstructured":"Nguyen ND, Nguyen T, Nahavandi S (2017) System design perspective for human-level agents using deep reinforcement learning: a survey. IEEE Access 5:27091\u201327102","journal-title":"IEEE Access"},{"issue":"9","key":"10450_CR60","doi-asserted-by":"crossref","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","volume":"50","author":"TT Nguyen","year":"2020","unstructured":"Nguyen TT, Nguyen ND, Nahavandi S (2020) Deep reinforcement learning for multiagent systems: a review of challenges, solutions, and applications. IEEE Trans Cybern 50(9):3826\u20133839","journal-title":"IEEE Trans Cybern"},{"issue":"6","key":"10450_CR61","doi-asserted-by":"crossref","first-page":"1443","DOI":"10.1109\/72.809089","volume":"10","author":"J Nie","year":"1999","unstructured":"Nie J, Haykin S (1999) A dynamic channel assignment policy through Q-learning. IEEE Trans Neural Netw 10(6):1443\u20131455","journal-title":"IEEE Trans Neural Netw"},{"key":"10450_CR62","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1016\/j.future.2015.07.012","volume":"56","author":"H Ning","year":"2015","unstructured":"Ning H, Liu H, Ma J, Yang LT, Huang R (2015) Cybermatics: cyber\u2013physical\u2013social-thinking hyperspace based science and technology. Future Gener Comput Syst 56:504\u2013522","journal-title":"Future Gener Comput Syst"},{"key":"10450_CR63","doi-asserted-by":"crossref","unstructured":"Notsu A, Honda K, Ichihashi H, Ido A, Komori Y (2013) Information compression effect based on PCA for reinforcement learning agents\u2019 communication. In: Joint international conference on soft computing and intelligent systems, 2013, pp 1318\u20131321","DOI":"10.1109\/SCIS-ISIS.2012.6504999"},{"issue":"4","key":"10450_CR64","doi-asserted-by":"crossref","first-page":"272","DOI":"10.1016\/j.trc.2005.07.005","volume":"13","author":"S Ossowski","year":"2005","unstructured":"Ossowski S, Hern\u00e1ndez JZ, Belmonte MV et al (2005) Decision support for traffic management based on organisational and communicative multiagent abstractions. Transp Res C 13(4):272\u2013298","journal-title":"Transp Res C"},{"issue":"3","key":"10450_CR65","doi-asserted-by":"crossref","first-page":"387","DOI":"10.1007\/s10458-005-2631-2","volume":"11","author":"L Panait","year":"2005","unstructured":"Panait L, Luke S (2005) Cooperative multi-agent learning: the state of the art. Auton Agents Multi-agent Syst 11(3):387\u2013434","journal-title":"Auton Agents Multi-agent Syst"},{"issue":"1\u20133","key":"10450_CR66","first-page":"283","volume":"22","author":"J Peng","year":"1996","unstructured":"Peng J, Williams RJ (1996) Incremental multi-step Q-learning. Mach Learn 22(1\u20133):283\u2013290","journal-title":"Mach Learn"},{"issue":"8","key":"10450_CR67","doi-asserted-by":"crossref","first-page":"732","DOI":"10.1016\/j.datak.2011.03.004","volume":"70","author":"N Prat","year":"2011","unstructured":"Prat N, Comyn-Wattiau I, Akoka J (2011) Combining objects with rules to represent aggregation knowledge in data warehouse and OLAP systems. Data Knowl Eng 70(8):732\u2013752","journal-title":"Data Knowl Eng"},{"issue":"3","key":"10450_CR68","first-page":"353","volume":"37","author":"ML Puterman","year":"2009","unstructured":"Puterman ML (2009) Markov decision processes: discrete stochastic dynamic programming. Technometrics 37(3):353\u2013353","journal-title":"Technometrics"},{"key":"10450_CR69","doi-asserted-by":"crossref","unstructured":"Rajkumar R, Lee I, Sha L, Stankovic J (2010) Cyber\u2013physical systems: the next computing revolution. In: Design automation conference, 2010, pp 731\u2013736","DOI":"10.1145\/1837274.1837461"},{"key":"10450_CR70","volume-title":"Game theory: introduction and applications","author":"G Romp","year":"1997","unstructured":"Romp G (1997) Game theory: introduction and applications. Oxford University Press, Oxford"},{"key":"10450_CR71","unstructured":"Rosenfeld A, Taylor ME, Kraus S (2017) Speeding up tabular reinforcement learning using state\u2013action similarities. In: Conference on autonomous agents and multiagent systems, 2017, pp 1722\u20131724"},{"key":"10450_CR72","unstructured":"Roy N, Gordon GJ (2003) Exponential family PCA for belief compression in POMDPS. In: Advances in neural information processing systems, 2003, pp 1667\u20131674"},{"key":"10450_CR74","unstructured":"Rummery GA (n.d.) Problem solving with reinforcement learning. PhD Thesis, University of Cambridge"},{"key":"10450_CR73","volume-title":"On-line Q-learning using connectionist systems","author":"GA Rummery","year":"1994","unstructured":"Rummery GA, Niranjan M (1994) On-line Q-learning using connectionist systems. University of Cambridge, Department of Engineering, Cambridge"},{"key":"10450_CR75","volume-title":"Dynamic macroeconomic theory","author":"TJ Sargent","year":"1987","unstructured":"Sargent TJ (1987) Dynamic macroeconomic theory. Harvard University Press, Cambridge"},{"key":"10450_CR76","unstructured":"Schraudolph NN, Yu J, Aberdeen D (2006) Fast online policy gradient learning with SMD gain vector adaptation. In: Advances in neural information processing systems, 2006, pp 1185\u20131192"},{"key":"10450_CR77","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (n.d.) Proximal policy optimization algorithms. arXiv preprint. arXiv:1707.06347"},{"issue":"5306","key":"10450_CR78","doi-asserted-by":"crossref","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","volume":"275","author":"W Schultz","year":"1997","unstructured":"Schultz W, Dayan P, Montague PR (1997) A neural substrate of prediction and reward. Science 275(5306):1593\u20131599","journal-title":"Science"},{"key":"10450_CR79","doi-asserted-by":"crossref","first-page":"78","DOI":"10.1109\/MIS.2013.20","volume":"1","author":"A Sheth","year":"2013","unstructured":"Sheth A, Anantharam P, Henson C (2013) Physical\u2013cyber\u2013social computing: an early 21st century approach. IEEE Intell Syst 1:78\u201382","journal-title":"IEEE Intell Syst"},{"key":"10450_CR80","doi-asserted-by":"crossref","unstructured":"Shi J, Wan J, Yan H, Hui S (2011) A survey of cyber physical systems. In: 2011 International conference on wireless communications and signal processing (WCSP), 2011","DOI":"10.1109\/WCSP.2011.6096958"},{"key":"10450_CR81","doi-asserted-by":"crossref","first-page":"104","DOI":"10.1016\/j.patrec.2016.08.009","volume":"87","author":"F Shoeleh","year":"2017","unstructured":"Shoeleh F, Asadpour M (2017) Graph based skill acquisition and transfer learning for continuous reinforcement learning domains. Pattern Recognit Lett 87:104\u2013116","journal-title":"Pattern Recognit Lett"},{"key":"10450_CR82","doi-asserted-by":"crossref","unstructured":"Skyrms B, Pemantle R (2009) A dynamic model of social network formation. In: Adaptive networks. Springer, pp 231\u2013251","DOI":"10.1007\/978-3-642-01284-6_11"},{"key":"10450_CR83","unstructured":"Sorber L (n.d.) Data fusion: tensor factorizations by complex optimization. PhD Thesis, Faculty of Engineering, KU Leuven, Leuven"},{"issue":"12","key":"10450_CR84","doi-asserted-by":"crossref","first-page":"8812","DOI":"10.1109\/TPAMI.2021.3119645","volume":"44","author":"T Sun","year":"2022","unstructured":"Sun T, Shen H, Chen T, Li D (2022) Adaptive temporal difference learning with linear function approximation. IEEE Trans Pattern Anal Mach Intell 44(12):8812\u20138824","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10450_CR87","doi-asserted-by":"crossref","unstructured":"Sutton RS (1990) Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Proceedings of the seventh international conference, 1990","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"10450_CR85","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"issue":"1","key":"10450_CR86","first-page":"285","volume":"16","author":"RS Sutton","year":"2005","unstructured":"Sutton RS, Barto AG (2005) Reinforcement learning: an introduction, Bradford book. Mach Learn 16(1):285\u2013286","journal-title":"Mach Learn"},{"key":"10450_CR88","unstructured":"Sutton RS, McAllester DA, Singh SP, Mansour Y (2000) Policy gradient methods for reinforcement learning with function approximation. In: Advances in neural information processing systems, 2000, pp 1057\u20131063"},{"issue":"3","key":"10450_CR89","doi-asserted-by":"crossref","first-page":"6002","DOI":"10.1016\/j.sbspro.2010.04.014","volume":"2","author":"D Tamagawa","year":"2010","unstructured":"Tamagawa D, Taniguchi E, Yamada T (2010) Evaluating city logistics measures using a multi-agent model. Procedia Soc Behav Sci 2(3):6002\u20136012","journal-title":"Procedia Soc Behav Sci"},{"key":"10450_CR90","doi-asserted-by":"crossref","unstructured":"Tan Y, Liu W, Qiu Q (2009) Adaptive power management using reinforcement learning. In: Proceedings of the 2009 international conference on computer-aided design, 2009. ACM, pp 461\u2013467","DOI":"10.1145\/1687399.1687486"},{"issue":"July","key":"10450_CR91","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor ME, Stone P (2009) Transfer learning for reinforcement learning domains: a survey. J Mach Learn Res 10(July):1633\u20131685","journal-title":"J Mach Learn Res"},{"key":"10450_CR92","doi-asserted-by":"crossref","unstructured":"Tesauro G (1995) TD-Gammon: a self-teaching backgammon program. In: Applications of neural networks. Springer, Boston, pp 267\u2013285","DOI":"10.1007\/978-1-4757-2379-3_11"},{"issue":"10","key":"10450_CR93","doi-asserted-by":"crossref","first-page":"1125","DOI":"10.1037\/0003-066X.53.10.1125","volume":"53","author":"EL Thorndike","year":"1998","unstructured":"Thorndike EL (1998) Animal intelligence: an experimental study of the associative processes in animals. Am Psychol 53(10):1125\u20131127","journal-title":"Am Psychol"},{"key":"10450_CR94","doi-asserted-by":"crossref","unstructured":"Tousi M, Hosseinian S, Jadidinejad A, Menhaj M (2008) Application of Saras learning algorithm for reactive power control in power system. In: Proceedings of the 2nd international conference on power and energy, 2008. IEEE, pp 1198\u20131202","DOI":"10.1109\/PECON.2008.4762658"},{"key":"10450_CR95","unstructured":"Vogel A, Jurafsky D (2010) Learning to follow navigational directions. In: Proceedings of the 48th annual meeting of the Association for Computational Linguistics, 2010. Association for Computational Linguistics, pp 806\u2013814"},{"issue":"4","key":"10450_CR96","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1109\/MIS.2010.104","volume":"25","author":"FY Wang","year":"2010","unstructured":"Wang FY (2010) The emergence of intelligent enterprises: from CPS to CPSS. IEEE Intell Syst 25(4):85\u201388","journal-title":"IEEE Intell Syst"},{"issue":"6","key":"10450_CR97","doi-asserted-by":"crossref","first-page":"36","DOI":"10.1109\/MCC.2016.130","volume":"3","author":"X Wang","year":"2016","unstructured":"Wang X, Yang LT, Feng J, Chen X, Deen MJ (2016) A tensor-based big service framework for enhanced living environments. IEEE Cloud Comput Mag 3(6):36\u201343","journal-title":"IEEE Cloud Comput Mag"},{"issue":"5","key":"10450_CR98","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1109\/MCC.2018.053711665","volume":"5","author":"P Wang","year":"2018","unstructured":"Wang P, Yang LT, Li J (2018) An edge cloud-assisted CPSS framework for smart city. IEEE Cloud Comput 5(5):37\u201346","journal-title":"IEEE Cloud Comput"},{"key":"10450_CR99","doi-asserted-by":"crossref","first-page":"42","DOI":"10.1016\/j.inffus.2018.11.002","volume":"51","author":"P Wang","year":"2019","unstructured":"Wang P, Yang LT, Li J, Chen J, Hu S (2019) Data fusion in cyber\u2013physical\u2013social systems: state-of-the-art and perspectives. Inf Fusion 51:42\u201357","journal-title":"Inf Fusion"},{"issue":"4","key":"10450_CR100","doi-asserted-by":"crossref","first-page":"675","DOI":"10.1109\/TSC.2020.2964663","volume":"13","author":"P Wang","year":"2020","unstructured":"Wang P, Yang LT, Li J, Zhou X (2020a) MMDP: a mobile-IoT based multi-modal reinforcement learning service framework. IEEE Trans Serv Comput 13(4):675\u2013684","journal-title":"IEEE Trans Serv Comput"},{"key":"10450_CR101","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1016\/j.ins.2019.08.047","volume":"513","author":"P Wang","year":"2020","unstructured":"Wang P, Yang LT, Nie X, Ren Z, Li J, Kuang L (2020b) Data-driven software defined network attack detection: state-of-the-art and perspectives. Inf Sci 513:65\u201383","journal-title":"Inf Sci"},{"issue":"2","key":"10450_CR102","doi-asserted-by":"crossref","first-page":"907","DOI":"10.1109\/TNSE.2019.2913669","volume":"7","author":"P Wang","year":"2020","unstructured":"Wang P, Yang LT, Peng Y, Li J, Xie X (2020c) $${M^2}{T^2}$$: the multivariate multi-step transition tensor for user mobility pattern prediction. IEEE Trans Netw Sci Eng 7(2):907\u2013917","journal-title":"IEEE Trans Netw Sci Eng"},{"issue":"2","key":"10450_CR103","doi-asserted-by":"crossref","first-page":"713","DOI":"10.1109\/TNSE.2019.2929155","volume":"7","author":"P Wang","year":"2020","unstructured":"Wang P, Yang LT, Qian G, Li J, Yan Z (2020d) HO-OTSVD: a novel tensor decomposition and its incremental computation for cyber\u2013physical\u2013social networks (CPSN). IEEE Trans Netw Sci Eng 7(2):713\u2013725","journal-title":"IEEE Trans Netw Sci Eng"},{"issue":"3","key":"10450_CR104","doi-asserted-by":"crossref","first-page":"481","DOI":"10.1109\/TSUSC.2018.2799940","volume":"6","author":"P Wang","year":"2021","unstructured":"Wang P, Yang LT, Qian G, Lu F (2021) The cyber\u2013physical\u2013social transition tensor service framework. IEEE Trans Sustain Comput 6(3):481\u2013492","journal-title":"IEEE Trans Sustain Comput"},{"key":"10450_CR105","unstructured":"Wang Z, Schaul T, Hessel M, Van Hasselt H, Lanctot M, De Freitas N (n.d.) Dueling network architectures for deep reinforcement learning. arXiv preprint. arXiv:1511.06581"},{"issue":"4","key":"10450_CR106","first-page":"233","volume":"15","author":"C Watkins","year":"1989","unstructured":"Watkins C (1989a) Learning from delayed rewards. Robot Auton Syst 15(4):233\u2013235","journal-title":"Robot Auton Syst"},{"key":"10450_CR107","unstructured":"Watkins C (1989b) Learning from delayed rewards. PhD Thesis, Cambridge University"},{"issue":"4","key":"10450_CR108","doi-asserted-by":"crossref","first-page":"2509","DOI":"10.1109\/TIE.2014.2361485","volume":"62","author":"Q Wei","year":"2015","unstructured":"Wei Q, Liu D, Shi G (2015) A novel dual iterative Q-learning method for optimal battery management in smart residential environments. IEEE Trans Ind Electron 62(4):2509\u20132518","journal-title":"IEEE Trans Ind Electron"},{"key":"10450_CR109","unstructured":"Whong C (n.d.) Foiling NYC\u2019s taxi trip data. http:\/\/chriswhong.com\/opendata\/foil-nyc-taxi\/"},{"issue":"3\u20134","key":"10450_CR110","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8(3\u20134):229\u2013256","journal-title":"Mach Learn"},{"key":"10450_CR111","first-page":"2991","volume":"6","author":"W Wisetjindawat","year":"2005","unstructured":"Wisetjindawat W, Sano K, Matsumoto S (2005) Supply chain simulation for modeling the interactions in freight movement. J East Asia Soc Transp Stud 6:2991\u20133004","journal-title":"J East Asia Soc Transp Stud"},{"key":"10450_CR112","doi-asserted-by":"crossref","unstructured":"Xiang Y, Niu W, Liu J, Chen T, Han Z (2018a) A PCA-based model to predict adversarial examples on Q-learning of path finding. In: IEEE third international conference on data science in cyberspace, 2018, pp 773\u2013780","DOI":"10.1109\/DSC.2018.00125"},{"key":"10450_CR113","doi-asserted-by":"crossref","unstructured":"Xiang Y, Niu W, Liu J, Chen T, Han Z (2018b) A PCA-based model to predict adversarial examples on q-learning of path finding. In: 2018 IEEE third international conference on data science in cyberspace (DSC), 2018, pp 773\u2013780","DOI":"10.1109\/DSC.2018.00125"},{"key":"10450_CR115","unstructured":"Xu X, He H-G (2002) Residual-gradient-based neural reinforcement learning for the optimal control of an acrobat. In: Proceedings of the international symposium on intelligent control, 2002. IEEE, pp 758\u2013763"},{"key":"10450_CR114","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.ins.2013.08.037","volume":"261","author":"X Xu","year":"2014","unstructured":"Xu X, Zuo L, Huang Z (2014) Reinforcement learning algorithms with function approximation: recent advances and applications. Inf Sci 261:1\u201331","journal-title":"Inf Sci"},{"issue":"3","key":"10450_CR116","first-page":"1","volume":"50","author":"K-LA Yau","year":"2017","unstructured":"Yau K-LA, Qadir J, Khoo HL, Ling MH, Komisarczuk P (2017) A survey on reinforcement learning models and algorithms for traffic signal control. ACM Comput Surv (CSUR) 50(3):1\u201338","journal-title":"ACM Comput Surv (CSUR)"},{"issue":"2","key":"10450_CR117","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/2834119","volume":"15","author":"J Zeng","year":"2016","unstructured":"Zeng J, Yang LT, Ma J (2016) A system-level modeling and design for cyber\u2013physical\u2013social systems. ACM Trans Embed Comput Syst 15(2):1\u201326","journal-title":"ACM Trans Embed Comput Syst"},{"issue":"6","key":"10450_CR118","first-page":"701","volume":"33","author":"D Zhao","year":"2016","unstructured":"Zhao D, Shao K, Zhu Y, Li D, Chen Y, Wang H, Liu D-R, Zhou T, Wang C-H (2016) Review of deep reinforcement learning and discussions on the development of computer go. Control Theory Appl 33(6):701\u2013717","journal-title":"Control Theory Appl"},{"key":"10450_CR119","unstructured":"Zhou Y (n.d.) Data-driven cyber\u2013physical\u2013social system for knowledge discovery in smart cities. PhD Thesis, University of Surrey"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-023-10450-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-023-10450-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-023-10450-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,16]],"date-time":"2024-10-16T19:58:46Z","timestamp":1729108726000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-023-10450-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,22]]},"references-count":119,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["10450"],"URL":"https:\/\/doi.org\/10.1007\/s10462-023-10450-2","relation":{},"ISSN":["0269-2821","1573-7462"],"issn-type":[{"type":"print","value":"0269-2821"},{"type":"electronic","value":"1573-7462"}],"subject":[],"published":{"date-parts":[[2023,3,22]]},"assertion":[{"value":"22 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}