{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T17:08:41Z","timestamp":1765213721026,"version":"3.46.0"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"US National Science Foundation grant","award":["1948224","1948224"],"award-info":[{"award-number":["1948224","1948224"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Intell Syst"],"DOI":"10.1007\/s44196-025-01064-3","type":"journal-article","created":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T17:04:01Z","timestamp":1765213441000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing Reinforcement Learning with Limited HRI Demonstrations: A Task-Oriented Weight Update Method with Analysis of Multi-head and Layer Feature Combinations"],"prefix":"10.1007","volume":"18","author":[{"given":"Qinghua","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jessica","family":"Korneder","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Osamah A.","family":"Rawashdeh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanfeng","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wing-Yue Geoffrey","family":"Louie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,8]]},"reference":[{"key":"1064_CR1","doi-asserted-by":"publisher","unstructured":"Hijaz, A., Korneder, J., Louie, W.-Y.G.: In-the-wild learning from demonstration for therapies for autism spectrum disorder. In: 2021 30th IEEE International Conference on Robot & Human Interactive Communication (RO-MAN), pp. 1224\u20131229 (2021). https:\/\/doi.org\/10.1109\/RO-MAN50785.2021.9515439","DOI":"10.1109\/RO-MAN50785.2021.9515439"},{"key":"1064_CR2","doi-asserted-by":"publisher","unstructured":"Clark-Turner, M., Begum, M.: Deep reinforcement learning of abstract reasoning from demonstrations. In: 2018 13th ACM\/IEEE International Conference on Human-Robot Interaction (HRI), pp. 160\u2013168 (2018). https:\/\/doi.org\/10.1145\/3171221.3171289","DOI":"10.1145\/3171221.3171289"},{"key":"1064_CR3","doi-asserted-by":"publisher","unstructured":"Tyshka, A., Louie, W.-Y.G.: Transparent learning from demonstration for robot-mediated therapy. In: 2022 31st IEEE International Conference on Robot and Human Interactive Communication (RO-MAN), pp. 891\u2013897 (2022). https:\/\/doi.org\/10.1109\/RO-MAN53752.2022.9900854","DOI":"10.1109\/RO-MAN53752.2022.9900854"},{"key":"1064_CR4","doi-asserted-by":"publisher","unstructured":"Liu, X., Zheng, Y., Du, Z., Ding, M., Qian, Y., Yang, Z.: Gpt understands, too. AI Open 5, 208\u2013215 (2024) https:\/\/doi.org\/10.1016\/j.aiopen.2023.08.012","DOI":"10.1016\/j.aiopen.2023.08.012"},{"key":"1064_CR5","unstructured":"Li, K., Burdick, J.W.: Meta Inverse Reinforcement Learning via Maximum Reward Sharing for Human Motion Analysis (2017). arxiv:1710.03592"},{"key":"1064_CR6","unstructured":"Guo, Y., Gao, J., Wu, Z., Shi, C., Chen, J.: Reinforcement learning with demonstrations from mismatched task under sparse reward. In: Conference on Robot Learning, pp. 1146\u20131156 (2023). https:\/\/proceedings.mlr.press\/v205\/guo23a.html"},{"key":"1064_CR7","unstructured":"Seyed Ghasemipour, S.K., Gu, S.S., Zemel, R.: Smile: Scalable meta inverse reinforcement learning through context-conditional policies. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"1064_CR8","unstructured":"Rakelly, K., Zhou, A., Finn, C., Levine, S., Quillen, D.: Efficient off-policy meta-reinforcement learning via probabilistic context variables. In: International Conference on Machine Learning, pp. 5331\u20135340 (2019). https:\/\/proceedings.mlr.press\/v97\/rakelly19a.html"},{"key":"1064_CR9","doi-asserted-by":"publisher","unstructured":"Wang, P., Li, H., Chan, C.-Y.: Meta-adversarial inverse reinforcement learning for decision-making tasks. In: 2021 IEEE International Conference on Robotics and Automation (ICRA), pp. 12632\u201312638 (2021). https:\/\/doi.org\/10.1109\/ICRA48506.2021.9561330","DOI":"10.1109\/ICRA48506.2021.9561330"},{"key":"1064_CR10","unstructured":"Xiong, Z., Zintgraf, L., Beck, J., Vuorio, R., Whiteson, S.: On the Practical Consistency of Meta-Reinforcement Learning Algorithms (2021). arxiv:2112.00478"},{"key":"1064_CR11","unstructured":"Wang, S., Wei, K., Zhang, H., Li, Y., Wu, W.: Let Me Check the Examples: Enhancing Demonstration Learning Via Explicit Imitation (2022). arxiv:2209.00455"},{"key":"1064_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111736","volume":"294","author":"Y Tang","year":"2024","unstructured":"Tang, Y., Guo, S., Liu, J., Wan, B., An, L., Liu, J.K.: Hierarchical reinforcement learning from imperfect demonstrations through reachable coverage-based subgoal filtering. Knowl. -Based Syst. 294, 111736 (2024). https:\/\/doi.org\/10.1016\/j.knosys.2024.111736","journal-title":"Knowl. -Based Syst."},{"issue":"7","key":"1064_CR13","doi-asserted-by":"publisher","first-page":"2263","DOI":"10.1007\/s10994-022-06273-x","volume":"112","author":"A Bighashdel","year":"2022","unstructured":"Bighashdel, A., Jancura, P., Dubbelman, G.: Model-free inverse reinforcement learning with multi-intention, unlabeled, and overlapping demonstrations. Mach. Learn. 112(7), 2263\u20132296 (2022). https:\/\/doi.org\/10.1007\/s10994-022-06273-x","journal-title":"Mach. Learn."},{"key":"1064_CR14","doi-asserted-by":"crossref","unstructured":"Rajeswaran, A., Kumar, V., Gupta, A., Vezzani, G., Schulman, J., Todorov, E., Levine, S.: Learning Complex Dexterous Manipulation with Deep Reinforcement Learning and Demonstrations (2018). arxiv:1709.10087","DOI":"10.15607\/RSS.2018.XIV.049"},{"key":"1064_CR15","doi-asserted-by":"crossref","unstructured":"Li, X., Michel, P., Anastasopoulos, A., Belinkov, Y., Durrani, N., Firat, O., Koehn, P., Neubig, G., Pino, J., Sajjad, H.: Findings of the First Shared Task on Machine Translation Robustness (2019). arxiv:1906.11943","DOI":"10.18653\/v1\/W19-5303"},{"issue":"9","key":"1064_CR16","doi-asserted-by":"publisher","first-page":"1699","DOI":"10.1007\/s10994-019-05849-4","volume":"109","author":"S-A Chen","year":"2020","unstructured":"Chen, S.-A., Tangkaratt, V., Lin, H.-T., Sugiyama, M.: Active deep q-learning with demonstration. Mach. Learn. 109(9), 1699\u20131725 (2020). https:\/\/doi.org\/10.1007\/s10994-019-05849-4","journal-title":"Mach. Learn."},{"key":"1064_CR17","doi-asserted-by":"publisher","unstructured":"Chen, Q., Dallas, E., Shahverdi, P., Korneder, J., Rawashdeh, O.A., Geoffrey Louie, W.-Y.: A sample efficiency improved method via hierarchical reinforcement learning networks. In: 2022 31st IEEE International Conference on Robot and Human Interactive Communication (RO-MAN), pp. 1498\u20131505 (2022). https:\/\/doi.org\/10.1109\/RO-MAN53752.2022.9900738","DOI":"10.1109\/RO-MAN53752.2022.9900738"},{"key":"1064_CR18","unstructured":"Gui, Y., Doshi, P.: Inversely Learning Transferable Rewards via Abstracted States (2025). arxiv:2501.01669"},{"key":"1064_CR19","doi-asserted-by":"crossref","unstructured":"Zhou, L., Xu, C., Corso, J.J.: Towards Automatic Learning of Procedures from Web Instructional Videos (2017). arxiv:1703.09788","DOI":"10.1609\/aaai.v32i1.12342"},{"key":"1064_CR20","unstructured":"Yu, T., Abbeel, P., Levine, S., Finn, C.: One-Shot Hierarchical Imitation Learning of Compound Visuomotor Tasks (2018). arxiv:1810.11043"},{"key":"1064_CR21","doi-asserted-by":"publisher","unstructured":"Karnan, H., Torabi, F., Warnell, G., Stone, P.: Adversarial imitation learning from video using a state observer. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 2452\u20132458 (2022). https:\/\/doi.org\/10.1109\/ICRA46639.2022.9811570","DOI":"10.1109\/ICRA46639.2022.9811570"},{"key":"1064_CR22","doi-asserted-by":"publisher","unstructured":"Jing, M., Ma, X., Huang, W., Sun, F., Yang, C., Fang, B., Liu, H.: Reinforcement learning from imperfect demonstrations under soft expert guidance. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 5109\u20135116 (2020). https:\/\/doi.org\/10.1609\/aaai.v34i04.5953","DOI":"10.1609\/aaai.v34i04.5953"},{"key":"1064_CR23","doi-asserted-by":"crossref","unstructured":"Pathak, D., Mahmoudieh, P., Luo, G., Agrawal, P., Chen, D., Shentu, Y., Shelhamer, E., Malik, J., Efros, A.A., Darrell, T.: Zero-shot visual imitation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 2050\u20132053 (2018). arxiv:1804.08606","DOI":"10.1109\/CVPRW.2018.00278"},{"key":"1064_CR24","doi-asserted-by":"publisher","unstructured":"Chen, Q.: Improving optimal prompt learning through multilayer fusion and latent Dirichlet allocation. Front. Robot. AI 12 (2025) https:\/\/doi.org\/10.3389\/frobt.2025.1579990","DOI":"10.3389\/frobt.2025.1579990"},{"key":"1064_CR25","first-page":"15084","volume":"34","author":"L Chen","year":"2021","unstructured":"Chen, L., Lu, K., Rajeswaran, A., Lee, K., Grover, A., Laskin, M., Abbeel, P., Srinivas, A., Mordatch, I.: Decision transformer: reinforcement learning via sequence modeling. Adv. Neural. Inf. Process. Syst. 34, 15084\u201315097 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1064_CR26","unstructured":"Melo, L.C.: Transformers are meta-reinforcement learners. In: International Conference on Machine Learning, pp. 15340\u201315359 (2022). https:\/\/proceedings.mlr.press\/v162\/melo22a.html"},{"key":"1064_CR27","unstructured":"Michel, P., Levy, O., Neubig, G.: Are sixteen heads really better than one?. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"1064_CR28","first-page":"2668","volume":"34","author":"H Gong","year":"2021","unstructured":"Gong, H., Tang, Y., Pino, J., Li, X.: Pay better attention to attention: head selection in multilingual and multi-domain sequence modeling. Adv. Neural. Inf. Process. Syst. 34, 2668\u20132681 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1064_CR29","doi-asserted-by":"publisher","first-page":"1442","DOI":"10.1162\/tacl_a_00436","volume":"9","author":"J Li","year":"2021","unstructured":"Li, J., Cotterell, R., Sachan, M.: Differentiable subset pruning of transformer heads. Trans. Assoc. Comput. Linguist. 9, 1442\u20131459 (2021). https:\/\/doi.org\/10.1162\/tacl_a_00436","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"1064_CR30","doi-asserted-by":"crossref","unstructured":"Ji, T., Jain, S., Ferdman, M., Milder, P., Schwartz, H.A., Balasubramanian, N.: On the Distribution, Sparsity, and Inference-time Quantization of Attention Values in Transformers (2021). arxiv:2106.01335","DOI":"10.18653\/v1\/2021.findings-acl.363"},{"key":"1064_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2022.101429","volume":"77","author":"H Sajjad","year":"2023","unstructured":"Sajjad, H., Dalvi, F., Durrani, N., Nakov, P.: On the effect of dropping layers of pre-trained transformer models. Comput. Speech Lang. 77, 101429 (2023). https:\/\/doi.org\/10.1016\/j.csl.2022.101429","journal-title":"Comput. Speech Lang."},{"key":"1064_CR32","doi-asserted-by":"crossref","unstructured":"Kovaleva, O., Romanov, A., Rogers, A., Rumshisky, A.: Revealing the Dark Secrets of BERT (2019). arxiv:1908.08593","DOI":"10.18653\/v1\/D19-1445"},{"key":"1064_CR33","doi-asserted-by":"publisher","unstructured":"Ma, W., Zhang, K., Lou, R., Wang, L., Vosoughi, S.: Contributions of transformer attention heads in multi- and cross-lingual tasks. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 1956\u20131966 (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.152","DOI":"10.18653\/v1\/2021.acl-long.152"},{"key":"1064_CR34","doi-asserted-by":"crossref","unstructured":"Clark, K., Khandelwal, U., Levy, O., Manning, C.D.: What Does BERT Look At? An Analysis of BERT\u2019s Attention (2019). arxiv:1906.04341","DOI":"10.18653\/v1\/W19-4828"},{"key":"1064_CR35","doi-asserted-by":"crossref","unstructured":"Voita, E., Talbot, D., Moiseev, F., Sennrich, R., Titov, I.: Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy Lifting, the Rest Can Be Pruned (2019). arxiv:1905.09418","DOI":"10.18653\/v1\/P19-1580"},{"key":"1064_CR36","doi-asserted-by":"publisher","first-page":"842","DOI":"10.1162\/tacl_a_00349","volume":"8","author":"A Rogers","year":"2021","unstructured":"Rogers, A., Kovaleva, O., Rumshisky, A.: A primer in bertology: What we know about how Bert works. Trans. Assoc. Comput. Linguist. 8, 842\u2013866 (2021). https:\/\/doi.org\/10.1162\/tacl_a_00349","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"1064_CR37","unstructured":"Brunner, G., Liu, Y., Pascual, D., Richter, O., Ciaramita, M., Wattenhofer, R.: On Identifiability in Transformers (2020). arxiv:1908.04211"},{"key":"1064_CR38","unstructured":"Liu, L., Liu, J., Han, J.: Multi-head or single-head? An empirical comparison for transformer training (2021). arxiv:2106.09650"},{"key":"1064_CR39","unstructured":"Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., Wierstra, D.: Continuous control with deep reinforcement learning (2019). arxiv:1509.02971"},{"key":"1064_CR40","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.neucom.2020.01.016","volume":"388","author":"G Zuo","year":"2020","unstructured":"Zuo, G., Chen, K., Lu, J., Huang, X.: Deterministic generative adversarial imitation learning. Neurocomputing 388, 60\u201369 (2020). https:\/\/doi.org\/10.1016\/j.neucom.2020.01.016","journal-title":"Neurocomputing"},{"key":"1064_CR41","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L.u., Polosukhin, I.: Attention is all you need. In: Guyon, I., Luxburg, U.V., Bengio, S., Wallach, H., Fergus, R., Vishwanathan, S., Garnett, R. (eds.) Advances in Neural Information Processing Systems (2017). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017 \/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"1064_CR42","doi-asserted-by":"publisher","unstructured":"Wo\u0142czyk, M., Krutsylo, A.: Remember more by recalling less: Investigating the role of batch size in continual learning with experience replay (student abstract). In: Proceedings of the AAAI Conference on Artificial Intelligence., vol. 35, pp. 15923\u201315924 (2021). https:\/\/doi.org\/10.1609\/aaai.v35i18.17958","DOI":"10.1609\/aaai.v35i18.17958"},{"key":"1064_CR43","unstructured":"Nichol, A., Achiam, J., Schulman, J.: On First-Order Meta-Learning Algorithms (2018). arxiv:1803.02999"},{"key":"1064_CR44","unstructured":"Yu, L., Yu, T., Finn, C., Ermon, S.: Meta-inverse reinforcement learning with probabilistic context variables. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"1064_CR45","unstructured":"Murphy, K.P.: Machine learning: a probabilistic perspective (adaptive computation and machine learning series). In: The MIT Press: London, p. 57 (2018)"},{"issue":"4","key":"1064_CR46","first-page":"35","volume":"24","author":"A Singhal","year":"2001","unstructured":"Singhal, A.: Modern information retrieval: a brief overview. IEEE Data Eng. Bull. 24(4), 35\u201343 (2001)","journal-title":"IEEE Data Eng. Bull."},{"issue":"4","key":"1064_CR47","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3230734","volume":"14","author":"O Gold","year":"2018","unstructured":"Gold, O., Sharir, M.: Dynamic time warping and geometric edit distance: breaking the quadratic barrier. ACM Trans. Algoritm (TALG) 14(4), 1\u201317 (2018). https:\/\/doi.org\/10.1145\/3230734","journal-title":"ACM Trans. Algoritm (TALG)"},{"key":"1064_CR48","doi-asserted-by":"crossref","unstructured":"Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Rault, T., Louf, R., Funtowicz, M., Davison, J., Shleifer, S., Platen, P., Ma, C., Jernite, Y., Plu, J., Xu, C., Scao, T.L., Gugger, S., Drame, M., Lhoest, Q., Rush, A.M.: Transformers: State-of-the-art natural language processing. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 38\u201345. Association for Computational Linguistics, Online (2020). https:\/\/www.aclweb.org\/anthology\/2020.emnlp-demos.6","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"issue":"1","key":"1064_CR49","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1515\/pjbr-2021-0005","volume":"12","author":"W-YG Louie","year":"2021","unstructured":"Louie, W.-Y.G., Korneder, J., Abbas, I., Pawluk, C.: A study on an applied behavior analysis-based robot-mediated listening comprehension intervention for ASD. Paladyn, J. Behav. Robot. 12(1), 31\u201346 (2021). https:\/\/doi.org\/10.1515\/pjbr-2021-0005","journal-title":"Paladyn, J. Behav. Robot."}],"container-title":["International Journal of Computational Intelligence Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44196-025-01064-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s44196-025-01064-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44196-025-01064-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T17:04:06Z","timestamp":1765213446000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s44196-025-01064-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,8]]},"references-count":49,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1064"],"URL":"https:\/\/doi.org\/10.1007\/s44196-025-01064-3","relation":{},"ISSN":["1875-6883"],"issn-type":[{"value":"1875-6883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,8]]},"assertion":[{"value":"18 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 September 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 October 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 December 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declared no potential conflict of interest with respect to the research, authorship and\/or publication of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval and Consent to Participate"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for Publication"}}],"article-number":"325"}}