{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T16:56:52Z","timestamp":1778691412186,"version":"3.51.4"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"22","license":[{"start":{"date-parts":[[2023,6,14]],"date-time":"2023-06-14T00:00:00Z","timestamp":1686700800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,6,14]],"date-time":"2023-06-14T00:00:00Z","timestamp":1686700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61921004"],"award-info":[{"award-number":["61921004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62173251"],"award-info":[{"award-number":["62173251"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1713209"],"award-info":[{"award-number":["U1713209"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s00521-023-08385-4","type":"journal-article","created":{"date-parts":[[2023,6,14]],"date-time":"2023-06-14T13:02:05Z","timestamp":1686747725000},"page":"16247-16265","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Multi-objective deep reinforcement learning for crowd-aware robot navigation with dynamic human preference"],"prefix":"10.1007","volume":"35","author":[{"given":"Guangran","family":"Cheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuanda","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lu","family":"Dong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenzhe","family":"Cai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9269-334X","authenticated-orcid":false,"given":"Changyin","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,6,14]]},"reference":[{"issue":"3","key":"8385_CR1","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1038\/s42256-021-00324-z","volume":"3","author":"H Su","year":"2021","unstructured":"Su H, Lallo AD, Murphy RR, Taylor RH, Krieger A (2021) Physical human-robot interaction for clinical care in infectious environments. Nat Mach Intell 3(3):184\u2013186","journal-title":"Nat Mach Intell"},{"key":"8385_CR2","doi-asserted-by":"crossref","unstructured":"Chen C, Liu Y, Kreiss S, Alahi A (2019) Crowd-robot interaction: Crowd-aware robot navigation with attention-based deep reinforcement learning. In: International conference on robotics and automation, pp 6015\u20136022","DOI":"10.1109\/ICRA.2019.8794134"},{"issue":"2","key":"8385_CR3","doi-asserted-by":"publisher","first-page":"1178","DOI":"10.1109\/LRA.2019.2891491","volume":"4","author":"T Fan","year":"2019","unstructured":"Fan T, Cheng X, Pan J, Long P, Liu W, Yang R, Manocha D (2019) Getting robots unfrozen and unlost in dense pedestrian crowds. IEEE Robot Autom Lett 4(2):1178\u20131185","journal-title":"IEEE Robot Autom Lett"},{"issue":"3","key":"8385_CR4","doi-asserted-by":"publisher","first-page":"4352","DOI":"10.1109\/LRA.2020.2996593","volume":"5","author":"AJ Sathyamoorthy","year":"2020","unstructured":"Sathyamoorthy AJ, Patel U, Guan T, Manocha D (2020) Frozone: freezing-free, pedestrian-friendly navigation in human crowds. IEEE Robot Autom Lett 5(3):4352\u20134359","journal-title":"IEEE Robot Autom Lett"},{"key":"8385_CR5","doi-asserted-by":"crossref","unstructured":"Trautman P, Krause A (2010) Unfreezing the robot: navigation in dense, interacting crowds. In: 2010 IEEE\/RSJ international conference on intelligent robots and systems, pp 797\u2013803","DOI":"10.1109\/IROS.2010.5654369"},{"key":"8385_CR6","doi-asserted-by":"crossref","unstructured":"Kayukawa S, Higuchi K, Guerreiro J, Morishima S, Sato Y, Kitani K, Asakawa C (2019) Bbeep: A sonic collision avoidance system for blind travellers and nearby pedestrians. In: CHI conference on human factors in computing systems, pp 1\u201312","DOI":"10.1145\/3290605.3300282"},{"key":"8385_CR7","doi-asserted-by":"crossref","unstructured":"Watanabe A, Ikeda T, Morales Y, Shinozawa K, Miyashita T, Hagita N (2015) Communicating robotic navigational intentions. In: 2015 IEEE\/RSJ international conference on intelligent robots and systems, pp 5763\u20135769","DOI":"10.1109\/IROS.2015.7354195"},{"issue":"4","key":"8385_CR8","doi-asserted-by":"publisher","first-page":"775","DOI":"10.1007\/s10514-016-9584-y","volume":"41","author":"G Ferrer","year":"2017","unstructured":"Ferrer G, Zulueta AG, Cotarelo FH, Sanfeliu A (2017) Robot social-aware navigation framework to accompany people walking side-by-side. Auton Robot 41(4):775\u2013793","journal-title":"Auton Robot"},{"key":"8385_CR9","doi-asserted-by":"crossref","unstructured":"Van\u00a0den Berg J, Lin M, Manocha D (2008) Reciprocal velocity obstacles for real-time multi-agent navigation. In: IEEE international conference on robotics and automation, pp 1928\u20131935","DOI":"10.1109\/ROBOT.2008.4543489"},{"key":"8385_CR10","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-642-19457-3_1","volume":"1","author":"J Van Den Berg","year":"2011","unstructured":"Van Den Berg J, Guy SJ, Lin M, Manocha D (2011) Reciprocal n-body collision avoidance. Robot Res 1:3\u201319","journal-title":"Robot Res"},{"key":"8385_CR11","doi-asserted-by":"crossref","unstructured":"Trautman P, Ma J, Murray RM, Krause A (2013) Robot navigation in dense human crowds: the case for cooperation. In: IEEE international conference on robotics and automation, pp 2153\u20132160","DOI":"10.1109\/ICRA.2013.6630866"},{"issue":"22","key":"8385_CR12","doi-asserted-by":"publisher","first-page":"16875","DOI":"10.1007\/s00521-018-03976-y","volume":"32","author":"X Yao","year":"2020","unstructured":"Yao X, Wang X, Zhang L, Jiang X (2020) Model predictive and adaptive neural sliding mode control for three-dimensional path following of autonomous underwater vehicle with input saturation. Neural Comput Appl 32(22):16875\u201316889","journal-title":"Neural Comput Appl"},{"key":"8385_CR13","first-page":"1","volume":"1","author":"J Wei","year":"2022","unstructured":"Wei J, Zhu B (2022) Model predictive control for trajectory-tracking and formation of wheeled mobile robots. Neural Comput Appl 1:1\u201315","journal-title":"Neural Comput Appl"},{"key":"8385_CR14","doi-asserted-by":"crossref","unstructured":"Chen YF, Liu M, Everett M, How JP (2017) Decentralized non-communicating multiagent collision avoidance with deep reinforcement learning. In: 2017 IEEE international conference on robotics and automation, pp 285\u2013292","DOI":"10.1109\/ICRA.2017.7989037"},{"key":"8385_CR15","doi-asserted-by":"crossref","unstructured":"Everett M, Chen YF, How JP (2018) Motion planning among dynamic, decision-making agents with deep reinforcement learning. In: 2018 IEEE\/RSJ international conference on intelligent robots and systems, pp 3052\u20133059","DOI":"10.1109\/IROS.2018.8593871"},{"issue":"2","key":"8385_CR16","doi-asserted-by":"publisher","first-page":"2754","DOI":"10.1109\/LRA.2020.2972868","volume":"5","author":"Y Chen","year":"2020","unstructured":"Chen Y, Liu C, Shi BE, Liu M (2020) Robot navigation in crowds by graph convolutional networks with attention learned from human gaze. IEEE Robot Autom Lett 5(2):2754\u20132761","journal-title":"IEEE Robot Autom Lett"},{"issue":"3","key":"8385_CR17","doi-asserted-by":"publisher","first-page":"4352","DOI":"10.1109\/LRA.2020.2996593","volume":"5","author":"AJ Sathyamoorthy","year":"2020","unstructured":"Sathyamoorthy AJ, Patel U, Guan T, Manocha D (2020) Frozone: freezing-free, pedestrian-friendly navigation in human crowds. IEEE Robot Autom Lett 5(3):4352\u20134359","journal-title":"IEEE Robot Autom Lett"},{"issue":"3","key":"8385_CR18","doi-asserted-by":"publisher","first-page":"5223","DOI":"10.1109\/LRA.2021.3071954","volume":"6","author":"SS Samsani","year":"2021","unstructured":"Samsani SS, Muhammad MS (2021) Socially compliant robot navigation in crowded environment by human behavior resemblance using deep reinforcement learning. IEEE Robot Autom Lett 6(3):5223\u20135230","journal-title":"IEEE Robot Autom Lett"},{"key":"8385_CR19","doi-asserted-by":"crossref","unstructured":"Nishimura M, Yonetani R (2020) L2b: learning to balance the safety-efficiency trade-off in interactive crowd-aware robot navigation. In: 2020 IEEE\/RSJ international conference on intelligent robots and systems, pp 11004\u201311010","DOI":"10.1109\/IROS45743.2020.9341519"},{"key":"8385_CR20","doi-asserted-by":"crossref","unstructured":"Jain A, Chen D, Bansal D, Scheele S, Kishore M, Sapra H, Kent D, Ravichandar H, Chernova S (2020) Anticipatory human-robot collaboration via multi-objective trajectory optimization. In: 2020 IEEE\/RSJ international conference on intelligent robots and systems, pp 11052\u201311057","DOI":"10.1109\/IROS45743.2020.9341058"},{"issue":"3","key":"8385_CR21","doi-asserted-by":"publisher","first-page":"1783","DOI":"10.1007\/s00521-021-05859-1","volume":"34","author":"P Vamplew","year":"2022","unstructured":"Vamplew P, Foale C, Dazeley R (2022) The impact of environmental stochasticity on value-based multiobjective reinforcement learning. Neural Comput Appl 34(3):1783\u20131799","journal-title":"Neural Comput Appl"},{"key":"8385_CR22","unstructured":"Xu J, Tian Y, Ma P, Rus D, Sueda S, Matusik W (2020) Prediction-guided multi-objective reinforcement learning for continuous robot control. In: International conference on machine learning, pp 10607\u201310616"},{"issue":"6","key":"8385_CR23","doi-asserted-by":"publisher","first-page":"1473","DOI":"10.1007\/s10514-018-9806-6","volume":"43","author":"G Ferrer","year":"2019","unstructured":"Ferrer G, Sanfeliu A (2019) Anticipative kinodynamic planning: multi-objective robot navigation in urban and dynamic environments. Auton Robot 43(6):1473\u20131488","journal-title":"Auton Robot"},{"key":"8385_CR24","doi-asserted-by":"publisher","first-page":"41466","DOI":"10.1109\/ACCESS.2020.2976586","volume":"8","author":"E Meyer","year":"2020","unstructured":"Meyer E, Robinson H, Rasheed A, San O (2020) Taming an autonomous surface vehicle for path following and collision avoidance using deep reinforcement learning. IEEE Access 8:41466\u201341481","journal-title":"IEEE Access"},{"key":"8385_CR25","unstructured":"Mannor S, Shimkin N (2001) The steering approach for multi-criteria reinforcement learning. In: Advances in neural information processing systems, pp 1563\u20131570"},{"key":"8385_CR26","doi-asserted-by":"crossref","unstructured":"Natarajan S, Tadepalli P (2005) Dynamic preferences in multi-criteria reinforcement learning. In: International conference on machine learning, pp 601\u2013608","DOI":"10.1145\/1102351.1102427"},{"key":"8385_CR27","doi-asserted-by":"crossref","unstructured":"Van\u00a0Moffaert K, Drugan MM, Now\u00e9 A (2013) Scalarized multi-objective reinforcement learning: Novel design techniques. In: 2013 IEEE symposium on adaptive dynamic programming and reinforcement learning (ADPRL), pp 191\u2013199","DOI":"10.1109\/ADPRL.2013.6615007"},{"key":"8385_CR28","unstructured":"Mossalam H, Assael YM, Roijers DM, Whiteson S (2016) Multi-objective deep reinforcement learning. arXiv preprint arXiv:1610.02707"},{"key":"8385_CR29","unstructured":"Abels A, Roijers D, Lenaerts T, Now\u00e9 A, Steckelmacher D (2019) Dynamic weights in multi-objective deep reinforcement learning. In: International conference on machine learning, pp 11\u201320"},{"key":"8385_CR30","first-page":"1","volume":"32","author":"R Yang","year":"2019","unstructured":"Yang R, Sun X, Narasimhan K (2019) A generalized algorithm for multi-objective reinforcement learning and policy adaptation. Adv Neural Inf Process Syst 32:1","journal-title":"Adv Neural Inf Process Syst"},{"key":"8385_CR31","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers DM, Vamplew P, Whiteson S, Dazeley R (2013) A survey of multi-objective sequential decision-making. J Artif Intell Res 48:67\u2013113","journal-title":"J Artif Intell Res"},{"issue":"7","key":"8385_CR32","doi-asserted-by":"publisher","first-page":"2869","DOI":"10.1109\/TAC.2018.2869462","volume":"64","author":"VG Lopez","year":"2018","unstructured":"Lopez VG, Lewis FL (2018) Dynamic multiobjective control for continuous-time systems using reinforcement learning. IEEE Trans Autom Control 64(7):2869\u20132874","journal-title":"IEEE Trans Autom Control"},{"key":"8385_CR33","doi-asserted-by":"crossref","unstructured":"Hayes CF, R\u0103dulescu R, Bargiacchi E, K\u00e4llstr\u00f6m J, Macfarlane M, Reymond M, Verstraeten T, Zintgraf LM, Dazeley R, Heintz F et al (2021) A practical guide to multi-objective reinforcement learning and planning. arXiv preprint arXiv:2103.09568","DOI":"10.1007\/s10458-022-09552-y"},{"key":"8385_CR34","doi-asserted-by":"crossref","unstructured":"Nishimura M, Yonetani R (2020) L2b: learning to balance the safety-efficiency trade-off in interactive crowd-aware robot navigation. In: 2020 IEEE\/RSJ international conference on intelligent robots and systems, pp 11004\u201311010","DOI":"10.1109\/IROS45743.2020.9341519"},{"key":"8385_CR35","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. arXiv preprint arXiv:1406.2199"},{"issue":"4","key":"8385_CR36","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1109\/TG.2018.2849942","volume":"10","author":"Y Wang","year":"2018","unstructured":"Wang Y, He H, Sun C (2018) Learning to navigate through complex dynamic environment with modular deep reinforcement learning. IEEE Trans Games 10(4):400\u2013412","journal-title":"IEEE Trans Games"},{"issue":"1","key":"8385_CR37","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1016\/0166-2236(92)90344-8","volume":"15","author":"MA Goodale","year":"1992","unstructured":"Goodale MA, Milner AD (1992) Separate visual pathways for perception and action. Trends Neurosci 15(1):20\u201325","journal-title":"Trends Neurosci"},{"issue":"1","key":"8385_CR38","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.automatica.2010.10.003","volume":"47","author":"AS Matveev","year":"2011","unstructured":"Matveev AS, Teimoori H, Savkin AV (2011) Navigation of a unicycle-like mobile robot for environmental extremum seeking. Automatica 47(1):85\u201391","journal-title":"Automatica"},{"issue":"2","key":"8385_CR39","doi-asserted-by":"publisher","first-page":"2007","DOI":"10.1109\/LRA.2019.2899918","volume":"4","author":"H-TL Chiang","year":"2019","unstructured":"Chiang H-TL, Faust A, Fiser M, Francis A (2019) Learning navigation behaviors end-to-end with autorl. IEEE Robot Autom Lett 4(2):2007\u20132014","journal-title":"IEEE Robot Autom Lett"},{"key":"8385_CR40","unstructured":"Schaul T, Quan J, Antonoglou I, Silver D (2015) Prioritized experience replay. arXiv preprint arXiv:1511.05952"},{"key":"8385_CR41","doi-asserted-by":"crossref","unstructured":"Arzate\u00a0Cruz C, Igarashi T (2020) A survey on interactive reinforcement learning: design principles and open challenges. In: Proceedings of the 2020 ACM designing interactive systems conference, pp 1195\u20131209","DOI":"10.1145\/3357236.3395525"},{"key":"8385_CR42","unstructured":"Thomaz AL, Hoffman G, Breazeal C (2005) Real-time interactive reinforcement learning for robots. In: AAAI 2005 workshop on human comprehensible machine learning, pp 9\u201313"},{"issue":"4","key":"8385_CR43","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.76.046105","volume":"76","author":"W Yu","year":"2007","unstructured":"Yu W, Johansson A (2007) Modeling crowd turbulence by many-particle simulations. Phys Rev E 76(4):046105","journal-title":"Phys Rev E"},{"issue":"1","key":"8385_CR44","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1287\/trsc.1040.0108","volume":"39","author":"D Helbing","year":"2005","unstructured":"Helbing D, Buzna L, Johansson A, Werner T (2005) Self-organized pedestrian crowd dynamics: experiments, simulations, and design solutions. Transp Sci 39(1):1\u201324","journal-title":"Transp Sci"},{"issue":"4","key":"8385_CR45","doi-asserted-by":"publisher","first-page":"797","DOI":"10.1109\/TSMC.2017.2725300","volume":"49","author":"C Jiang","year":"2017","unstructured":"Jiang C, Ni Z, Guo Y, He H (2017) Learning human-robot interaction for robot-assisted pedestrian flow optimization. IEEE Trans Syst Man Cybern: Syst 49(4):797\u2013813","journal-title":"IEEE Trans Syst Man Cybern: Syst"},{"issue":"4","key":"8385_CR46","doi-asserted-by":"publisher","first-page":"1669","DOI":"10.1109\/TCYB.2018.2878977","volume":"50","author":"Z Wan","year":"2018","unstructured":"Wan Z, Jiang C, Fahad M, Ni Z, Guo Y, He H (2018) Robot-assisted pedestrian regulation based on deep reinforcement learning. IEEE Trans Cybern 50(4):1669\u20131682","journal-title":"IEEE Trans Cybern"},{"issue":"7540","key":"8385_CR47","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"8385_CR48","unstructured":"Kingma DP, Ba J (2014) Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-08385-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-023-08385-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-08385-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,12]],"date-time":"2023-07-12T21:00:21Z","timestamp":1689195621000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-023-08385-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,14]]},"references-count":48,"journal-issue":{"issue":"22","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["8385"],"URL":"https:\/\/doi.org\/10.1007\/s00521-023-08385-4","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6,14]]},"assertion":[{"value":"16 July 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 February 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 June 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}