{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T05:24:53Z","timestamp":1749792293902,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":18,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819964949"},{"type":"electronic","value":"9789819964956"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-981-99-6495-6_20","type":"book-chapter","created":{"date-parts":[[2023,10,15]],"date-time":"2023-10-15T18:01:56Z","timestamp":1697392916000},"page":"232-244","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Demonstration Shaped Reward Machine for\u00a0Robot Assembly Reinforcement Learning Tasks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3855-0588","authenticated-orcid":false,"given":"Ruihong","family":"Xiao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2658-7762","authenticated-orcid":false,"given":"Hong","family":"Zhan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5963-2932","authenticated-orcid":false,"given":"Yiming","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5255-5559","authenticated-orcid":false,"given":"Chenguang","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,10,16]]},"reference":[{"key":"20_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"20_CR2","doi-asserted-by":"crossref","unstructured":"Torabi, F., Warnell, G., Stone, P.: Behavioral cloning from observation. arXiv preprint arXiv:1805.01954 (2018)","DOI":"10.24963\/ijcai.2018\/687"},{"key":"20_CR3","unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Hester, T., et al.: Deep Q-learning from demonstrations. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"20_CR5","unstructured":"Vecerik, M., et al.: Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards. arXiv preprint arXiv:1707.08817 (2017)"},{"key":"20_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103500","volume":"297","author":"S Arora","year":"2021","unstructured":"Arora, S., Doshi, P.: A survey of inverse reinforcement learning: challenges, methods and progress. Artif. Intell. 297, 103500 (2021)","journal-title":"Artif. Intell."},{"key":"20_CR7","unstructured":"Icarte, R.T., Klassen, T., Valenzano, R., McIlraith, S.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: International Conference on Machine Learning, pp. 2107\u20132116. PMLR (2018)"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Luo, J., et al.: Robust multi-modal policies for industrial assembly via reinforcement learning and demonstrations: a large-scale study. arXiv preprint arXiv:2103.11512 (2021)","DOI":"10.15607\/RSS.2021.XVII.088"},{"issue":"2","key":"20_CR9","doi-asserted-by":"publisher","first-page":"883","DOI":"10.1109\/LRA.2020.2965869","volume":"5","author":"K Kimble","year":"2020","unstructured":"Kimble, K., et al.: Benchmarking protocols for evaluating small parts robotic assembly systems. IEEE Robot. Autom. Lett. 5(2), 883\u2013889 (2020)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"20_CR10","unstructured":"Fujimoto, S., Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning, pp. 1587\u20131596. PMLR (2018)"},{"key":"20_CR11","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"20_CR12","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"20_CR13","unstructured":"Haarnoja, T., et al.: Soft actor-critic algorithms and applications. arXiv preprint arXiv:1812.05905 (2018)"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Rajeswaran, A., et al.: Learning complex dexterous manipulation with deep reinforcement learning and demonstrations. arXiv preprint arXiv:1709.10087 (2017)","DOI":"10.15607\/RSS.2018.XIV.049"},{"key":"20_CR15","unstructured":"Andrychowicz, M., et al.: Hindsight experience replay. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"20_CR16","unstructured":"Coumans, E., Bai, Y.: Pybullet, a python module for physics simulation for games, robotics and machine learning (2016)"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: a physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033. IEEE (2012)","DOI":"10.1109\/IROS.2012.6386109"},{"key":"20_CR18","doi-asserted-by":"publisher","first-page":"51416","DOI":"10.1109\/ACCESS.2021.3068769","volume":"9","author":"J Collins","year":"2021","unstructured":"Collins, J., Chand, S., Vanderkop, A., Howard, D.: A review of physics simulators for robotic applications. IEEE Access 9, 51416\u201351431 (2021)","journal-title":"IEEE Access"}],"container-title":["Lecture Notes in Computer Science","Intelligent Robotics and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-6495-6_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,15]],"date-time":"2023-10-15T18:03:33Z","timestamp":1697393013000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-6495-6_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9789819964949","9789819964956"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-6495-6_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"16 October 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIRA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Robotics and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 July 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 July 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icira2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icira2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"630","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"431","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"68% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}