{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T04:30:44Z","timestamp":1746246644978,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030553920"},{"type":"electronic","value":"9783030553937"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-55393-7_1","type":"book-chapter","created":{"date-parts":[[2020,8,19]],"date-time":"2020-08-19T19:12:32Z","timestamp":1597864352000},"page":"3-14","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["MA-TREX: Mutli-agent Trajectory-Ranked Reward Extrapolation via Inverse Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Sili","family":"Huang","sequence":"first","affiliation":[]},{"given":"Bo","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Hechang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Haiyin","family":"Piao","sequence":"additional","affiliation":[]},{"given":"Zhixiao","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Chang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,8,20]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the Twenty-first International Conference, Machine Learning, Canada, vol. 69 (2004)","DOI":"10.1145\/1015330.1015430"},{"key":"1_CR2","unstructured":"Amodei, D., Olah, C., Steinhardt, J., Christiano, P.F., Schulman, J., Man\u00e9, D.: Concrete problems in AI safety. CoRR abs\/1606.06565 (2016)"},{"issue":"5","key":"1_CR3","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"BD Argall","year":"2009","unstructured":"Argall, B.D., Chernova, S., Veloso, M.M., Browning, B.: A survey of robot learning from demonstration. Robot. Auton. Syst. 57(5), 469\u2013483 (2009)","journal-title":"Robot. Auton. Syst."},{"issue":"3\u20134","key":"1_CR4","first-page":"324","volume":"39","author":"RA Bradley","year":"1952","unstructured":"Bradley, R.A., Terry, M.E.: Rank analysis of incomplete block designs: I. The method of paired comparisons. Biometrika 39(3\u20134), 324\u2013345 (1952)","journal-title":"Biometrika"},{"key":"1_CR5","unstructured":"Brown, D.S., Goo, W., Nagarajan, P., Niekum, S.: Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations. In: Proceedings of the 36th International Conference on Machine Learning, USA, vol. 97, pp. 783\u2013792 (2019)"},{"key":"1_CR6","unstructured":"Christiano, P.F., Leike, J., Brown, T.B., Martic, M., Legg, S., Amodei, D.: Deep reinforcement learning from human preferences. In: Advances in Neural Information Processing Systems 2017, USA, pp. 4299\u20134307 (2017)"},{"key":"1_CR7","unstructured":"Finn, C., Christiano, P.F., Abbeel, P., Levine, S.: A connection between generative adversarial networks, inverse reinforcement learning, and energy-based models. CoRR abs\/1611.03852 (2016)"},{"key":"1_CR8","unstructured":"Finn, C., Levine, S., Abbeel, P.: Guided cost learning: deep inverse optimal control via policy optimization. In: Proceedings of the 33nd International Conference on Machine Learning, USA, vol. 48, pp. 49\u201358 (2016)"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Henderson, P., Chang, W., Bacon, P., Meger, D., Pineau, J., Precup, D.: OptionGAN: learning joint reward-policy options using generative adversarial inverse reinforcement learning. In: Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence, USA, pp. 3199\u20133206 (2018)","DOI":"10.1609\/aaai.v32i1.11775"},{"key":"1_CR10","unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. In: Advances in Neural Information Processing Systems 2016, Spain, pp. 4565\u20134573 (2016)"},{"key":"1_CR11","unstructured":"Hu, J., Wellman, M.P.: Multiagent reinforcement learning: theoretical framework and an algorithm. In: Proceedings of the Fifteenth International Conference on Machine Learning USA, pp. 242\u2013250 (1998)"},{"key":"1_CR12","unstructured":"Ibarz, B., Leike, J., Pohlen, T., Irving, G., Legg, S., Amodei, D.: Reward learning from human preferences and demonstrations in Atari. In: Advances in Neural Information Processing Systems 2018, Canada, pp. 8022\u20138034 (2018)"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"Littman, M.L.: Markov games as a framework for multi-agent reinforcement learning (1994)","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"1_CR14","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in Neural Information Processing Systems 2017, USA, pp. 6379\u20136390 (2017)"},{"issue":"293","key":"1_CR15","first-page":"1","volume":"115","author":"DR Luce","year":"2005","unstructured":"Luce, D.R.: Individual choice behavior: a theoretical analysis. J. Am. Stat. Assoc. 115(293), 1\u201315 (2005)","journal-title":"J. Am. Stat. Assoc."},{"key":"1_CR16","unstructured":"Ng, A.Y., Harada, D., Russell, S.J.: Policy invariance under reward transformations: theory and application to reward shaping. In: Proceedings of the Sixteenth International Conference on Machine Learning, Slovenia, pp. 278\u2013287 (1999)"},{"key":"1_CR17","unstructured":"Ramachandran, D., Amir, E.: Bayesian inverse reinforcement learning. In: Proceedings of the 20th International Joint Conference on Artificial Intelligence, India, pp. 2586\u20132591 (2007)"},{"key":"1_CR18","unstructured":"Song, J., Ren, H., Sadigh, D., Ermon, S.: Multi-agent generative adversarial imitation learning. In: Advances in Neural Information Processing Systems 2018, Canada, pp. 7472\u20137483 (2018)"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Torabi, F., Warnell, G., Stone, P.: Behavioral cloning from observation. In: Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence, Sweden, pp. 4950\u20134957 (2018)","DOI":"10.24963\/ijcai.2018\/687"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Wang, S., Hu, X., Yu, P.S., Li, Z.: MMRate: inferring multi-aspect diffusion networks with multi-pattern cascades. In: The 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, USA, pp. 1246\u20131255 (2014)","DOI":"10.1145\/2623330.2623728"},{"key":"1_CR21","unstructured":"Yu, L., Song, J., Ermon, S.: Multi-agent adversarial inverse reinforcement learning. In: Proceedings of the 36th International Conference on Machine Learning, California, vol. 97, pp. 7194\u20137201 (2019)"},{"key":"1_CR22","unstructured":"Ziebart, B.D., Maas, A.L., Bagnell, J.A., Dey, A.K.: Maximum entropy inverse reinforcement learning. In: Proceedings of the Twenty-Third AAAI Conference on Artificial Intelligence, USA, pp. 1433\u20131438 (2008)"}],"container-title":["Lecture Notes in Computer Science","Knowledge Science, Engineering and Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-55393-7_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T08:56:27Z","timestamp":1710233787000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-55393-7_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030553920","9783030553937"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-55393-7_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"20 August 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KSEM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Knowledge Science, Engineering and Management","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ksem2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ksem2020.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"291","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"58","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"20% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"8","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}