{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T18:28:50Z","timestamp":1767637730360,"version":"3.48.0"},"reference-count":10,"publisher":"Maximum Academic Press","license":[{"start":{"date-parts":[[2021,4,28]],"date-time":"2021-04-28T00:00:00Z","timestamp":1619568000000},"content-version":"unspecified","delay-in-days":117,"URL":"https:\/\/www.cambridge.org\/core\/terms"}],"content-domain":{"domain":["cambridge.org"],"crossmark-restriction":true},"short-container-title":["The Knowledge Engineering Review"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1017\/s0269888921000047","type":"journal-article","created":{"date-parts":[[2021,4,28]],"date-time":"2021-04-28T05:53:13Z","timestamp":1619589193000},"update-policy":"https:\/\/doi.org\/10.1017\/policypage","source":"Crossref","is-referenced-by-count":0,"title":["Special issue on adaptive and learning agents 2018"],"prefix":"10.48130","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7951-878X","authenticated-orcid":false,"given":"Patrick","family":"Mannion","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anna","family":"Harutyunyan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bei","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kaushik","family":"Subramanian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"27968","published-online":{"date-parts":[[2021,4,28]]},"reference":[{"key":"S0269888921000047_ref6","doi-asserted-by":"crossref","first-page":"e29","DOI":"10.1017\/S0269888920000077","article-title":"Improving trust and reputation assessment with dynamic behaviour","volume":"35","author":"Player","year":"2020","journal-title":"The Knowledge Engineering Review"},{"key":"S0269888921000047_ref4","doi-asserted-by":"crossref","first-page":"e8","DOI":"10.1017\/S0269888919000031","article-title":"Introspective q-learning and learning from demonstration","volume":"34","author":"Li","year":"2019","journal-title":"The Knowledge Engineering Review"},{"key":"S0269888921000047_ref5","doi-asserted-by":"crossref","first-page":"e14","DOI":"10.1017\/S0269888919000092","article-title":"Two-level q-learning: learning from conflict demonstrations","volume":"34","author":"Li","year":"2019","journal-title":"The Knowledge Engineering Review"},{"key":"S0269888921000047_ref10","doi-asserted-by":"crossref","unstructured":"Valcarcel Macua, S. , Davies, I. , Tukiainen, A. & Munoz de Cote, E. in press. Diff-dac: Fully distributed actor-critic for average multitask deep reinforcement learning. The Knowledge Engineering Review 36.","DOI":"10.1017\/S0269888921000023"},{"key":"S0269888921000047_ref1","doi-asserted-by":"crossref","first-page":"e12","DOI":"10.1017\/S0269888919000043","article-title":"Team learning from human demonstration with coordination confidence","volume":"34","author":"Banerjee","year":"2019","journal-title":"The Knowledge Engineering Review"},{"key":"S0269888921000047_ref2","doi-asserted-by":"crossref","DOI":"10.1017\/S0269888919000055","article-title":"Pre-training with non-expert human demonstration for deep reinforcement learning","volume":"34","author":"de la Cruz","year":"2019","journal-title":"The Knowledge Engineering Review"},{"key":"S0269888921000047_ref3","unstructured":"Jain, A. , Khetarpal, K. & Precup, D. 2021. Safe option-critic: learning safety in the option-critic architecture. The Knowledge Engineering Review 36, e4."},{"key":"S0269888921000047_ref9","doi-asserted-by":"crossref","first-page":"e31","DOI":"10.1017\/S0269888920000120","article-title":"Effects of parity, sympathy and reciprocity in increasing social welfare","volume":"35","author":"Sen","year":"2020","journal-title":"The Knowledge Engineering Review"},{"key":"S0269888921000047_ref7","doi-asserted-by":"crossref","DOI":"10.1017\/S0269888920000119","article-title":"Toll-based reinforcement learning for efficient equilibria in route choice","volume":"35","author":"Ramos","year":"2020","journal-title":"The Knowledge Engineering Review"},{"key":"S0269888921000047_ref8","doi-asserted-by":"crossref","first-page":"e13","DOI":"10.1017\/S0269888919000079","article-title":"Action learning and grounding in simulated human\u2013robot interactions","volume":"34","author":"Roesler","year":"2019","journal-title":"The Knowledge Engineering Review"}],"container-title":["The Knowledge Engineering Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.cambridge.org\/core\/services\/aop-cambridge-core\/content\/view\/S0269888921000047","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T14:42:21Z","timestamp":1767624141000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.cambridge.org\/core\/product\/identifier\/S0269888921000047\/type\/journal_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":10,"alternative-id":["S0269888921000047"],"URL":"https:\/\/doi.org\/10.1017\/s0269888921000047","relation":{},"ISSN":["0269-8889","1469-8005"],"issn-type":[{"type":"print","value":"0269-8889"},{"type":"electronic","value":"1469-8005"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"\u00a9 The Author(s), 2021. Published by Cambridge University Press","name":"copyright","label":"Copyright","group":{"name":"copyright_and_licensing","label":"Copyright and Licensing"}}],"article-number":"e7"}}