{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T12:11:51Z","timestamp":1742991111194,"version":"3.40.3"},"publisher-location":"Cham","reference-count":15,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030473570"},{"type":"electronic","value":"9783030473587"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-47358-7_6","type":"book-chapter","created":{"date-parts":[[2020,5,5]],"date-time":"2020-05-05T20:03:35Z","timestamp":1588709015000},"page":"55-66","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement Learning in a Physics-Inspired Semi-Markov Environment"],"prefix":"10.1007","author":[{"given":"Colin","family":"Bellinger","sequence":"first","affiliation":[]},{"given":"Rory","family":"Coles","sequence":"additional","affiliation":[]},{"given":"Mark","family":"Crowley","sequence":"additional","affiliation":[]},{"given":"Isaac","family":"Tamblyn","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,5,6]]},"reference":[{"key":"6_CR1","doi-asserted-by":"publisher","first-page":"183","DOI":"10.22331\/q-2019-09-02-183","volume":"3","author":"P Andreasson","year":"2019","unstructured":"Andreasson, P., Johansson, J., Liljestrand, S., Granath, M.: Quantum error correction for the toric code using deep reinforcement learning. Quantum 3, 183 (2019)","journal-title":"Quantum"},{"key":"6_CR2","unstructured":"Andrychowicz, M., et al.: Hindsight experience replay. In: Advances in Neural Information Processing Systems, pp. 5048\u20135058 (2017)"},{"key":"6_CR3","unstructured":"Brockman, G., et al.: OpenAI gym (2016)"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 3389\u20133396. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"6_CR5","unstructured":"Hausknecht, M., Stone, P.: Deep recurrent Q-learning for partially observable MDPs. In: 2015 AAAI Fall Symposium Series (2015)"},{"key":"6_CR6","unstructured":"MacLeod, B.P., et al.: Self-driving laboratory for accelerated discovery of thin-film materials. arXiv preprint arXiv:1906.05398 (2019)"},{"key":"6_CR7","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. In: NeurIPS: Deep Learning Workshop (2013)"},{"issue":"7540","key":"6_CR8","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529 (2015)","journal-title":"Nature"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Roch, L.M., et al.: ChemOS: orchestrating autonomous experimentation. Sci. Robot. 3(19), eaat5559 (2018)","DOI":"10.1126\/scirobotics.aat5559"},{"key":"6_CR10","unstructured":"Schaul, T., Horgan, D., Gregor, K., Silver, D.: Universal value function approximators. In: International Conference on Machine Learning, pp. 1312\u20131320 (2015)"},{"key":"6_CR11","unstructured":"National Academies of Sciences, Engineering, and Medicine: Frontiers of Materials Research: A Decadal Survey. The National Academies Press, Washington, DC (2019)"},{"issue":"7587","key":"6_CR12","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484 (2016)","journal-title":"Nature"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Machine Learning Proceedings 1990, pp. 216\u2013224. Elsevier (1990)","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"6_CR14","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D. thesis, King\u2019s College, Cambridge (1989)"},{"issue":"1","key":"6_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41598-018-37186-2","volume":"9","author":"Z Zhou","year":"2019","unstructured":"Zhou, Z., Kearnes, S., Li, L., Zare, R.N., Riley, P.: Optimization of molecules via deep reinforcement learning. Sci. Rep. 9(1), 1\u201310 (2019)","journal-title":"Sci. Rep."}],"container-title":["Lecture Notes in Computer Science","Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-47358-7_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T15:05:13Z","timestamp":1710255913000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-47358-7_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030473570","9783030473587"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-47358-7_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"6 May 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Canadian AI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canadian Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ottawa, ON","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canada","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 May 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 May 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"33","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"canadianai2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.caiac.ca\/en\/conferences\/canadianai-2020\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"175","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the Corona pandemic. In addition, the proceedings include 4 contributions from the Graduate Student Symposium.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}