{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T20:59:59Z","timestamp":1742936399221,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":21,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819607822"},{"type":"electronic","value":"9789819607839"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0783-9_1","type":"book-chapter","created":{"date-parts":[[2025,1,21]],"date-time":"2025-01-21T19:31:44Z","timestamp":1737487904000},"page":"3-17","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Utilizing Large Language Models for\u00a0Robot Skill Reward Shaping in\u00a0Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Qi","family":"Guo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xing","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianjiang","family":"Hui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengxiong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Panfeng","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,1,22]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Li, B., Liu, X., Liu, Z., et al.: Episode-Fuzzy-COACH method for fast robot skill learning. IEEE Trans. Ind. Electron. 71(6), 5931\u20135940 (2024)","key":"1_CR1","DOI":"10.1109\/TIE.2023.3294600"},{"doi-asserted-by":"crossref","unstructured":"B\u0131y\u0131k, E., et al.: Learning reward functions from diverse sources of human feedback: optimally integrating demonstrations and preferences. Int. J. Robot. Res. 41(1), 45\u201367 (2022)","key":"1_CR2","DOI":"10.1177\/02783649211041652"},{"doi-asserted-by":"crossref","unstructured":"Celemin, C., et al.: An interactive framework for learning continuous actions policies based on corrective feedback. J. Intell. Robot. Syst. 95, 77\u201397 (2019)","key":"1_CR3","DOI":"10.1007\/s10846-018-0839-z"},{"unstructured":"Nguyen, K.: Imitation learning with recurrent neural networks. arXiv preprint arXiv:1607.05241 (2016)","key":"1_CR4"},{"unstructured":"Rahmatizadeh, R., et al.: Learning real manipulation tasks from virtual demonstrations using LSTM. arXiv preprint arXiv:1603.03833 (2016)","key":"1_CR5"},{"unstructured":"Ng, A.Y.: Policy invariance under reward transformations: theory and application to reward shaping. In: Proceedings of the ICML, vol. 99, pp. 278\u2013287 (1999)","key":"1_CR6"},{"doi-asserted-by":"crossref","unstructured":"Wiewiora, E.: Potential-based shaping and Q-value initialization are equivalent. J. Artif. Intell. Res. 19, 205\u2013208 (2003)","key":"1_CR7","DOI":"10.1613\/jair.1190"},{"doi-asserted-by":"crossref","unstructured":"Goyal, P., Niekum, S., Mooney, R.J.: Policy invariance under reward transformations: using natural language for reward shaping in reinforcement learning. arXiv preprint arXiv:1903.02020 (2019)","key":"1_CR8","DOI":"10.24963\/ijcai.2019\/331"},{"unstructured":"Yu, W., Gileadi, N., Fu, C., et al.: Language to rewards for robotic skill synthesis. arXiv preprint arXiv:2306.08647 (2023)","key":"1_CR9"},{"unstructured":"Ma, Yecheng Jason, et al.: Eureka: Human-level reward design via coding large language models. arXiv preprint arXiv:2310.12931 (2023)","key":"1_CR10"},{"unstructured":"Xie, T., Zhao, S., Wu, C.H., et al.: Text2Reward: automated dense reward function generation for reinforcement learning. arXiv preprint arXiv:2309.11489 (2023)","key":"1_CR11"},{"unstructured":"Zeng, Y., Yao M., Lin, S.: Learning Reward for robot skills using large language models via self-alignment. arXiv preprint arXiv:2405.07162 (2024)","key":"1_CR12"},{"unstructured":"Wang, B., et al.: Secrets of RLHF in large language models part II: reward modeling. arXiv preprint arXiv:2401.06080 (2024)","key":"1_CR13"},{"doi-asserted-by":"crossref","unstructured":"Liang, J., et al.: Code as policies: language model programs for embodied control. IEEE International Conference on Robotics and Automation, pp. 9493\u20139500 (2023)","key":"1_CR14","DOI":"10.1109\/ICRA48891.2023.10160591"},{"unstructured":"Wu, Y., et al.: Read and reap the rewards: Learning to play Atari with the help of instruction manuals. In: Advances in Neural Information Processing Systems, vol. 36 (2024)","key":"1_CR15"},{"doi-asserted-by":"crossref","unstructured":"Vemprala, S.H., Bonatti, R., Bucker, A., Kapoor, A.: ChatGPT for robotics: design principles and model abilities. IEEE Access 12, 55682\u201355696 (2024)","key":"1_CR16","DOI":"10.1109\/ACCESS.2024.3387941"},{"unstructured":"Christiano, P.F., Leike, J., Brown, T., Martic, M., Legg, S., Amodei, D.: Deep reinforcement learning from human preferences. In: Advances in Neural Information Processing Systems, vol. 30 (2017)","key":"1_CR17"},{"unstructured":"Daniel, M.Z., et al.: Fine-tuning language models from human preferences. arXiv preprint arXiv:1909.08593 (2019)","key":"1_CR18"},{"unstructured":"Yuntao, B., et al.: Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862 (2022a)","key":"1_CR19"},{"unstructured":"Brown, D., Wonjoon, G., Prabhat, N., Scott, N.: Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations. In: International Conference on Machine Learning, pp. 783\u2013792 (2019)","key":"1_CR20"},{"key":"1_CR21","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., Wang, X., Schuurmans, D., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Intelligent Robotics and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0783-9_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,21]],"date-time":"2025-01-21T19:31:50Z","timestamp":1737487910000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0783-9_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819607822","9789819607839"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0783-9_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"22 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIRA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Robotics and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xi'an","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icira2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.icira2024.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}