{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T10:24:53Z","timestamp":1759400693306,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031408366"},{"type":"electronic","value":"9783031408373"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-40837-3_8","type":"book-chapter","created":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T23:02:25Z","timestamp":1692658945000},"page":"123-140","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Reinforcement Learning with\u00a0Temporal-Logic-Based Causal Diagrams"],"prefix":"10.1007","author":[{"given":"Yash","family":"Paliwal","sequence":"first","affiliation":[]},{"given":"Rajarshi","family":"Roy","sequence":"additional","affiliation":[]},{"given":"Jean-Rapha\u00ebl","family":"Gaglione","sequence":"additional","affiliation":[]},{"given":"Nasim","family":"Baharisangari","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Neider","sequence":"additional","affiliation":[]},{"given":"Xiaoming","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Ufuk","family":"Topcu","sequence":"additional","affiliation":[]},{"given":"Zhe","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,22]]},"reference":[{"key":"8_CR1","doi-asserted-by":"crossref","unstructured":"Abate, A., Almulla, Y., Fox, J., Hyland, D., Wooldridge, M.J.: Learning task automata for reinforcement learning using hidden Markov models. CoRR abs\/2208.11838 (2022)","DOI":"10.3233\/FAIA230247"},{"key":"8_CR2","doi-asserted-by":"publisher","unstructured":"Aksaray, D., Jones, A., Kong, Z., Schwager, M., Belta, C.: Q-learning for robust satisfaction of signal temporal logic specifications. In: IEEE CDC 2016, pp. 6565\u20136570 (2016). https:\/\/doi.org\/10.1109\/CDC.2016.7799279","DOI":"10.1109\/CDC.2016.7799279"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., K\u00f6nighofer, B., Niekum, S., Topcu, U.: Safe reinforcement learning via shielding. In: AAAI 2018 (2018)","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"8_CR4","unstructured":"Bareinboim, E., Forney, A., Pearl, J.: Bandits with unobserved confounders: a causal approach. In: Cortes, C., Lawrence, N., Lee, D., Sugiyama, M., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 28. Curran Associates, Inc. (2015). https:\/\/proceedings.neurips.cc\/paper\/2015\/file\/795c7a7a5ec6b460ec00c5841019b9e9-Paper.pdf"},{"key":"8_CR5","unstructured":"Forney, A., Pearl, J., Bareinboim, E.: Counterfactual data-fusion for online reinforcement learners. In: Precup, D., Teh, Y.W. (eds.) Proceedings of the 34th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 70, pp. 1156\u20131164. PMLR (2017). https:\/\/proceedings.mlr.press\/v70\/forney17a.html"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Fu, J., Topcu, U.: Probably approximately correct MDP learning and control with temporal logic constraints. Robotics: Science and Systems abs\/1404.7073 (2014)","DOI":"10.15607\/RSS.2014.X.039"},{"key":"8_CR7","unstructured":"Giacomo, G.D., Vardi, M.Y.: Linear temporal logic and linear dynamic logic on finite traces. In: IJCAI, pp. 854\u2013860. IJCAI\/AAAI (2013)"},{"key":"8_CR8","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1007\/978-3-642-04898-2_162","volume-title":"International Encyclopedia of Statistical Science","author":"S Greenland","year":"2011","unstructured":"Greenland, S., Pearl, J.: Causal diagrams. In: Lovric, M. (ed.) International Encyclopedia of Statistical Science, pp. 208\u2013216. Springer, Cham (2011). https:\/\/doi.org\/10.1007\/978-3-642-04898-2_162"},{"key":"8_CR9","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1287\/deca.1050.0020","volume":"2","author":"R Howard","year":"2005","unstructured":"Howard, R., Matheson, J.: Influence diagrams. Decis. Anal. 2, 127\u2013143 (2005). https:\/\/doi.org\/10.1287\/deca.1050.0020","journal-title":"Decis. Anal."},{"key":"8_CR10","unstructured":"Huang, B., Feng, F., Lu, C., Magliacane, S., Zhang, K.: AdaRL: What, where, and how to adapt in transfer reinforcement learning. ArXiv: abs\/2107.02729 (2021)"},{"key":"8_CR11","unstructured":"Icarte, R.T., Klassen, T.Q., Valenzano, R.A., McIlraith, S.A.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: ICML Proceedings of Machine Learning Research, vol. 80, pp. 2112\u20132121. PMLR (2018)"},{"key":"8_CR12","doi-asserted-by":"publisher","unstructured":"Koller, D., Milch, B.: Multi-agent influence diagrams for representing and solving games. Games Econ. Behav. 45(1), 181\u2013221 (2003). https:\/\/doi.org\/10.1016\/S0899-8256(02)00544-4, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0899825602005444. First World Congress of the Game Theory Society","DOI":"10.1016\/S0899-8256(02)00544-4"},{"key":"8_CR13","doi-asserted-by":"publisher","unstructured":"Lattimore, F., Lattimore, T., Reid, M.D.: Causal bandits: learning good interventions via causal inference (2016). https:\/\/doi.org\/10.48550\/ARXIV.1606.03203, https:\/\/arxiv.org\/abs\/1606.03203","DOI":"10.48550\/ARXIV.1606.03203"},{"key":"8_CR14","doi-asserted-by":"publisher","unstructured":"Lee, S., Bareinboim, E.: Structural causal bandits with non-manipulable variables. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, no. 01, pp. 4164\u20134172 (2019). https:\/\/doi.org\/10.1609\/aaai.v33i01.33014164, https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/4320","DOI":"10.1609\/aaai.v33i01.33014164"},{"key":"8_CR15","unstructured":"Lee, S., Bareinboim, E.: Characterizing optimal mixed policies: where to intervene and what to observe. In: Advances in Neural Information Processing Systems. vol. 33, pp. 8565\u20138576. Curran Associates, Inc. (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/61a10e6abb1149ad9d08f303267f9bc4-Paper.pdf"},{"key":"8_CR16","unstructured":"Lee, S., Correa, J.D., Bareinboim, E.: General identifiability with arbitrary surrogate experiments. In: Proceedings of The 35th Uncertainty in Artificial Intelligence Conference. Proceedings of Machine Learning Research, vol. 115, pp. 389\u2013398. PMLR (2020). https:\/\/proceedings.mlr.press\/v115\/lee20b.html"},{"key":"8_CR17","doi-asserted-by":"publisher","unstructured":"Li, X., Vasile, C.I., Belta, C.: Reinforcement learning with temporal logic rewards. In: Proceedings of the IEEE\/RSJ International Conference Intelligent Robots and Systems, pp. 3834\u20133839 (2017). https:\/\/doi.org\/10.1109\/IROS.2017.8206234","DOI":"10.1109\/IROS.2017.8206234"},{"key":"8_CR18","doi-asserted-by":"publisher","unstructured":"Lu, C., Huang, B., Wang, K., Hern\u00e1ndez-Lobato, J.M., Zhang, K., Sch\u00f6lkopf, B.: Sample-efficient reinforcement learning via counterfactual-based data augmentation (2020). https:\/\/doi.org\/10.48550\/ARXIV.2012.09092, https:\/\/arxiv.org\/abs\/2012.09092","DOI":"10.48550\/ARXIV.2012.09092"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Memarian, F., Xu, Z., Wu, B., Wen, M., Topcu, U.: Active task-inference-guided deep inverse reinforcement learning. In: CDC, pp. 1932\u20131938. IEEE (2020)","DOI":"10.1109\/CDC42340.2020.9304190"},{"key":"8_CR20","doi-asserted-by":"crossref","unstructured":"Neider, D., Gaglione, J.R., Gavran, I., Topcu, U., Wu, B., Xu, Z.: Advice-guided reinforcement learning in a non-Markovian environment. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 9073\u20139080 (2021)","DOI":"10.1609\/aaai.v35i10.17096"},{"key":"8_CR21","unstructured":"Pitis, S., Creager, E., Garg, A.: Counterfactual data augmentation using locally factored dynamics. In: Proceedings of the 34th International Conference on Neural Information Processing Systems, NIPS 2020, Red Hook, NY, USA (2020)"},{"key":"8_CR22","unstructured":"Spirtes, P.: Introduction to causal inference. J. Mach. Learn. Res. 11(54), 1643\u20131662 (2010). http:\/\/jmlr.org\/papers\/v11\/spirtes10a.html"},{"key":"8_CR23","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192","volume-title":"Reinforcement Learning - An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning - An Introduction. MIT Press, Cambridge (1998)"},{"key":"8_CR24","unstructured":"Tennenholtz, G., Mannor, S., Shalit, U.: Off-policy evaluation in partially observable environments. ArXiv: abs\/1909.03739 (2019)"},{"key":"8_CR25","doi-asserted-by":"publisher","unstructured":"Wen, M., Papusha, I., Topcu, U.: Learning from demonstrations with high-level side information. In: Proceedings of the IJCAI 2017, pp. 3055\u20133061 (2017). https:\/\/doi.org\/10.24963\/ijcai.2017\/426, https:\/\/doi.org\/10.24963\/ijcai.2017\/426","DOI":"10.24963\/ijcai.2017\/426"},{"key":"8_CR26","doi-asserted-by":"crossref","unstructured":"Xu, Z., et al.: Joint inference of reward machines and policies for reinforcement learning. In: Proceedings of the International Conference on Automated Planning and Scheduling (ICAPS), Special Track on Planning and Learning (2020)","DOI":"10.1609\/icaps.v30i1.6756"},{"key":"8_CR27","doi-asserted-by":"publisher","unstructured":"Xu, Z., Topcu, U.: Transfer of temporal logic formulas in reinforcement learning. In: Proceedings of the IJCAI 2019, pp. 4010\u20134018 (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/557","DOI":"10.24963\/ijcai.2019\/557"},{"key":"8_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/978-3-030-84060-0_8","volume-title":"Machine Learning and Knowledge Extraction","author":"Z Xu","year":"2021","unstructured":"Xu, Z., Wu, B., Ojha, A., Neider, D., Topcu, U.: Active finite reward automaton inference and reinforcement learning using queries and counterexamples. In: Holzinger, A., Kieseberg, P., Tjoa, A.M., Weippl, E. (eds.) CD-MAKE 2021. LNCS, vol. 12844, pp. 115\u2013135. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-84060-0_8"},{"key":"8_CR29","unstructured":"Zhang, A., et al.: Learning causal state representations of partially observable environments. ArXiv: abs\/1906.10437 (2019)"},{"key":"8_CR30","doi-asserted-by":"crossref","unstructured":"Zhang, J., Bareinboim, E.: Transfer learning in multi-armed bandits: a causal approach. In: Proceedings of the 26th International Joint Conference on Artificial Intelligence, IJCAI 2017, pp. 1340\u20131346. AAAI Press (2017)","DOI":"10.24963\/ijcai.2017\/186"},{"key":"8_CR31","doi-asserted-by":"crossref","unstructured":"Zhu, S., Tabajara, L.M., Li, J., Pu, G., Vardi, M.Y.: Symbolic LTLF synthesis. In: IJCAI, pp. 1362\u20131369. https:\/\/www.ijcai.org\/ (2017)","DOI":"10.24963\/ijcai.2017\/189"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Extraction"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-40837-3_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T12:33:41Z","timestamp":1710333221000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-40837-3_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031408366","9783031408373"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-40837-3_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CD-MAKE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Cross-Domain Conference for Machine Learning and Knowledge Extraction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Benevento","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cd-make2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/cd-make.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"60% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}