{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T03:30:55Z","timestamp":1742959855852,"version":"3.40.3"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031781094"},{"type":"electronic","value":"9783031781100"}],"license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78110-0_1","type":"book-chapter","created":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T21:53:12Z","timestamp":1733089992000},"page":"1-16","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing Personalized Robot Actions with\u00a0Ranking of\u00a0Trajectories"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9131-5854","authenticated-orcid":false,"given":"Hao","family":"Huang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1541-2171","authenticated-orcid":false,"given":"Yiyun","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7092-7966","authenticated-orcid":false,"given":"Shuaihang","family":"Yuan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6448-003X","authenticated-orcid":false,"given":"Congcong","family":"Wen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9119-6114","authenticated-orcid":false,"given":"Yu","family":"Hao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9427-3883","authenticated-orcid":false,"given":"Yi","family":"Fang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,2]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Akrour, R., Schoenauer, M., Sebag, M.: April: active preference learning-based reinforcement learning. In: European Conference on Machine Learning and Knowledge Discovery in Databases, pp. 116\u2013131. Springer (2012)","DOI":"10.1007\/978-3-642-33486-3_8"},{"key":"1_CR2","unstructured":"Archambeau, C., Caron, F.: Plackett-luce regression: a new Bayesian model for polychotomous data. In: Conference on Uncertainty in Artificial Intelligence (2012)"},{"issue":"01","key":"1_CR3","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1142\/S0219843608001303","volume":"5","author":"A Bauer","year":"2008","unstructured":"Bauer, A., Wollherr, D., Buss, M.: Human-robot collaboration: a survey. Int. J. Humanoid Rob. 5(01), 47\u201366 (2008)","journal-title":"Int. J. Humanoid Rob."},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Biyik, E., Sadigh, D.: Batch active preference-based learning of reward functions. In: Conference on Robot Learning, pp. 519\u2013528. PMLR (2018)","DOI":"10.15607\/RSS.2017.XIII.053"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Bradley, R.A., Terry, M.E.: Rank analysis of incomplete block designs: I. the method of paired comparisons. Biometrika 39(3\/4), 324\u2013345 (1952)","DOI":"10.1093\/biomet\/39.3-4.324"},{"key":"1_CR6","unstructured":"Brown, D., Goo, W., Nagarajan, P., Niekum, S.: Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations. In: International Conference on Machine Learning, pp. 783\u2013792. PMLR (2019)"},{"key":"1_CR7","unstructured":"Christiano, P.F., Leike, J., Brown, T., Martic, M., Legg, S., Amodei, D.: Deep reinforcement learning from human preferences. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"1_CR8","doi-asserted-by":"crossref","unstructured":"Clabaugh, C., Matari\u0107, M.: Robots for the people, by the people: personalizing human-machine interaction. Sci. Robot. 3(21), eaat7451 (2018)","DOI":"10.1126\/scirobotics.aat7451"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Claure, H., et al.: Fairness and transparency in human-robot interaction. In: ACM\/IEEE International Conference on Human-Robot Interaction, pp. 1244\u20131246. IEEE (2022)","DOI":"10.1109\/HRI53351.2022.9889421"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"El-Shamouty, M., Wu, X., Yang, S., Albus, M., Huber, M.F.: Towards safe human-robot collaboration using deep reinforcement learning. In: IEEE International Conference on Robotics and Automation, pp. 4899\u20134905. IEEE (2020)","DOI":"10.1109\/ICRA40945.2020.9196924"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Erickson, Z., Gangaram, V., Kapusta, A., Liu, C.K., Kemp, C.C.: Assistive gym: a physics simulation framework for assistive robotics. In: IEEE International Conference on Robotics and Automation, pp. 10169\u201310176. IEEE (2020)","DOI":"10.1109\/ICRA40945.2020.9197411"},{"key":"1_CR12","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/s10994-012-5313-8","volume":"89","author":"J F\u00fcrnkranz","year":"2012","unstructured":"F\u00fcrnkranz, J., H\u00fcllermeier, E., Cheng, W., Park, S.H.: Preference-based reinforcement learning: a formal framework and a policy iteration algorithm. Mach. Learn. 89, 123\u2013156 (2012)","journal-title":"Mach. Learn."},{"key":"1_CR13","unstructured":"Ge, L., et al.: Axioms for AI alignment from human feedback. arXiv preprint arXiv:2405.14758 (2024)"},{"issue":"11\u201312","key":"1_CR14","doi-asserted-by":"publisher","first-page":"1507","DOI":"10.1177\/0278364909343970","volume":"28","author":"S Haddadin","year":"2009","unstructured":"Haddadin, S., Albu-Sch\u00e4ffer, A., Hirzinger, G.: Requirements for safe robots: measurements, analysis and new insights. Int. J. Robot. Res. 28(11\u201312), 1507\u20131527 (2009)","journal-title":"Int. J. Robot. Res."},{"key":"1_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"issue":"5","key":"1_CR16","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1016\/0893-6080(89)90020-8","volume":"2","author":"K Hornik","year":"1989","unstructured":"Hornik, K., Stinchcombe, M., White, H.: Multilayer feedforward networks are universal approximators. Neural Netw. 2(5), 359\u2013366 (1989)","journal-title":"Neural Netw."},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Howard, A.: Are we trusting AI too much? Examining human-robot interactions in the real world. In: Proceedings of the ACM\/IEEE International Conference on Human-Robot Interaction, p. 1 (2020)","DOI":"10.1145\/3319502.3374842"},{"key":"1_CR18","unstructured":"Ibarz, B., Leike, J., Pohlen, T., Irving, G., Legg, S., Amodei, D.: Reward learning from human preferences and demonstrations in atari. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"1_CR19","doi-asserted-by":"publisher","first-page":"2183","DOI":"10.1007\/s10514-019-09865-0","volume":"43","author":"A Kapusta","year":"2019","unstructured":"Kapusta, A., et al.: Personalized collaborative plans for robot-assisted dressing via optimization and simulation. Auton. Robot. 43, 2183\u20132207 (2019)","journal-title":"Auton. Robot."},{"key":"1_CR20","first-page":"18661","volume":"33","author":"P Khosla","year":"2020","unstructured":"Khosla, P., et al.: Supervised contrastive learning. Adv. Neural. Inf. Process. Syst. 33, 18661\u201318673 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR21","unstructured":"Lee, K., Smith, L.M., Abbeel, P.: Pebble: feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training. In: International Conference on Machine Learning, pp. 6152\u20136163. PMLR (2021)"},{"issue":"4","key":"1_CR22","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1109\/THMS.2019.2912447","volume":"49","author":"G Li","year":"2019","unstructured":"Li, G., Gomez, R., Nakamura, K., He, B.: Human-centered reinforcement learning: a survey. IEEE Trans. Hum.-Mach. Syst. 49(4), 337\u2013349 (2019)","journal-title":"IEEE Trans. Hum.-Mach. Syst."},{"key":"1_CR23","unstructured":"Liang, X., Shu, K., Lee, K., Abbeel, P.: Reward uncertainty for exploration in preference-based reinforcement learning. In: International Conference on Learning Representations (2021)"},{"key":"1_CR24","doi-asserted-by":"crossref","unstructured":"Liu, M., Chen, C.: Task decoupling in preference-based reinforcement learning for personalized human-robot interaction. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 848\u2013855. IEEE (2022)","DOI":"10.1109\/IROS47612.2022.9981076"},{"issue":"12","key":"1_CR25","doi-asserted-by":"publisher","first-page":"7654","DOI":"10.1109\/TSMC.2022.3161588","volume":"52","author":"M Liu","year":"2022","unstructured":"Liu, M., Xiao, C., Chen, C.: Perspective-corrected spatial referring expression generation for human-robot interaction. IEEE Trans. Syst. Man Cybern. Syst. 52(12), 7654\u20137666 (2022)","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"key":"1_CR26","volume-title":"Individual Choice Behavior","author":"RD Luce","year":"1959","unstructured":"Luce, R.D.: Individual Choice Behavior, vol. 4. Wiley, New York (1959)"},{"key":"1_CR27","unstructured":"Maystre, L., Grossglauser, M.: Fast and accurate inference of plackett\u2013luce models. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"issue":"7540","key":"1_CR28","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"1_CR29","doi-asserted-by":"crossref","unstructured":"Munzer, T., Toussaint, M., Lopes, M.: Preference learning on the execution of collaborative human-robot tasks. In: 2017 IEEE International Conference on Robotics and Automation, pp. 879\u2013885. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989108"},{"key":"1_CR30","unstructured":"Myers, V., Biyik, E., Anari, N., Sadigh, D.: Learning multimodal rewards from rankings. In: Conference on Robot Learning, pp. 342\u2013352. PMLR (2022)"},{"issue":"2","key":"1_CR31","doi-asserted-by":"publisher","first-page":"321","DOI":"10.30574\/gscarr.2024.18.2.0070","volume":"18","author":"A Obaigbena","year":"2024","unstructured":"Obaigbena, A., Lottu, O.A., Ugwuanyi, E.D., Jacks, B.S., Sodiya, E.O., Daraojimba, O.D.: Ai and human-robot interaction: a review of recent advances and challenges. GSC Adv. Res. Rev. 18(2), 321\u2013330 (2024)","journal-title":"GSC Adv. Res. Rev."},{"key":"1_CR32","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1016\/j.jmsy.2020.06.018","volume":"56","author":"H Oliff","year":"2020","unstructured":"Oliff, H., Liu, Y., Kumar, M., Williams, M., Ryan, M.: Reinforcement learning for facilitating human-robot-interaction in manufacturing. J. Manuf. Syst. 56, 326\u2013340 (2020)","journal-title":"J. Manuf. Syst."},{"key":"1_CR33","doi-asserted-by":"crossref","unstructured":"Palan, M., Shevchuk, G., Charles\u00a0Landolfi, N., Sadigh, D.: Learning reward functions by integrating human demonstrations and preferences. In: Robotics: Science and Systems (2019)","DOI":"10.15607\/RSS.2019.XV.023"},{"key":"1_CR34","unstructured":"Park, J., Seo, Y., Shin, J., Lee, H., Abbeel, P., Lee, K.: Surf: semi-supervised reward learning with data augmentation for feedback-efficient preference-based reinforcement learning. In: International Conference on Learning Representations (2021)"},{"key":"1_CR35","first-page":"895","volume":"5","author":"TJ Pleskac","year":"2015","unstructured":"Pleskac, T.J.: Decision and choice: Luce\u2019s choice axiom. Int. Encycl. Soc. Behav. Sci. 5, 895\u2013900 (2015)","journal-title":"Int. Encycl. Soc. Behav. Sci."},{"issue":"5","key":"1_CR36","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0252145","volume":"16","author":"S Sankaran","year":"2021","unstructured":"Sankaran, S., Derechin, J., Christakis, N.A.: Curmelo: the theory and practice of a forced-choice approach to producing preference rankings. PLoS ONE 16(5), e0252145 (2021)","journal-title":"PLoS ONE"},{"key":"1_CR37","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"issue":"7587","key":"1_CR38","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"1_CR39","doi-asserted-by":"crossref","unstructured":"Song, F., et al.: Preference ranking optimization for human alignment. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 18990\u201318998 (2024)","DOI":"10.1609\/aaai.v38i17.29865"},{"key":"1_CR40","first-page":"3008","volume":"33","author":"N Stiennon","year":"2020","unstructured":"Stiennon, N., et al.: Learning to summarize with human feedback. Adv. Neural. Inf. Process. Syst. 33, 3008\u20133021 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR41","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press (2018)"},{"key":"1_CR42","doi-asserted-by":"crossref","unstructured":"Tabrez, A., Hayes, B.: Improving human-robot interaction through explainable reinforcement learning. In: ACM\/IEEE International Conference on Human-Robot Interaction, pp. 751\u2013753. IEEE (2019)","DOI":"10.1109\/HRI.2019.8673198"},{"issue":"2","key":"1_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3439720","volume":"10","author":"L Tian","year":"2021","unstructured":"Tian, L., Oviatt, S.: A taxonomy of social errors in human-robot interaction. ACM Trans. Hum.-Robot Interact. 10(2), 1\u201332 (2021)","journal-title":"ACM Trans. Hum.-Robot Interact."},{"key":"1_CR44","unstructured":"Tien, J., Brown, D.: Causal confusion and reward misidentification in preference-based reward learning. In: International Conference on Learning Representations (2023)"},{"key":"1_CR45","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1016\/j.biosystemseng.2018.12.005","volume":"179","author":"JP Vasconez","year":"2019","unstructured":"Vasconez, J.P., Kantor, G.A., Cheein, F.A.A.: Human-robot interaction in agriculture: a survey and current challenges. Biosys. Eng. 179, 35\u201348 (2019)","journal-title":"Biosys. Eng."},{"key":"1_CR46","unstructured":"Wilson, A., Fern, A., Tadepalli, P.: A Bayesian approach for policy learning from trajectory preference queries. In: Advances in Neural Information Processing Systems, vol. 25 (2012)"},{"issue":"136","key":"1_CR47","first-page":"1","volume":"18","author":"C Wirth","year":"2017","unstructured":"Wirth, C., Akrour, R., Neumann, G., F\u00fcrnkranz, J., et al.: A survey of preference-based reinforcement learning methods. J. Mach. Learn. Res. 18(136), 1\u201346 (2017)","journal-title":"J. Mach. Learn. Res."},{"key":"1_CR48","unstructured":"Woodworth, B., Ferrari, F., Zosa, T.E., Riek, L.D.: Preference learning in assistive robotics: observational repeated inverse reinforcement learning. In: Machine Learning for Healthcare Conference, pp. 420\u2013439. PMLR (2018)"},{"issue":"2","key":"1_CR49","doi-asserted-by":"publisher","first-page":"3545","DOI":"10.1109\/LRA.2021.3063927","volume":"6","author":"H Zhan","year":"2021","unstructured":"Zhan, H., Tao, F., Cao, Y.: Human-guided robot behavior learning: a GAN-assisted preference-based reinforcement learning approach. IEEE Robot. Autom. Lett. 6(2), 3545\u20133552 (2021)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"1_CR50","doi-asserted-by":"crossref","unstructured":"Zhang, C., Chen, J., Li, J., Peng, Y., Mao, Z.: Large language models for human-robot interaction: a review. Biomimetic Intell. Robot. 100131 (2023)","DOI":"10.1016\/j.birob.2023.100131"},{"key":"1_CR51","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2021.102227","volume":"73","author":"R Zhang","year":"2022","unstructured":"Zhang, R., Lv, Q., Li, J., Bao, J., Liu, T., Liu, S.: A reinforcement learning method for human-robot collaboration in assembly tasks. Robot. Comput.-Integr. Manuf. 73, 102227 (2022)","journal-title":"Robot. Comput.-Integr. Manuf."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78110-0_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T23:31:52Z","timestamp":1733095912000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78110-0_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"ISBN":["9783031781094","9783031781100"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78110-0_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,2]]},"assertion":[{"value":"2 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}