{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T21:49:30Z","timestamp":1776116970844,"version":"3.50.1"},"publisher-location":"Cham","reference-count":60,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031684159","type":"print"},{"value":"9783031684166","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-68416-6_18","type":"book-chapter","created":{"date-parts":[[2024,8,28]],"date-time":"2024-08-28T07:02:40Z","timestamp":1724828560000},"page":"302-321","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Multi-agent Path Finding for\u00a0Timed Tasks Using Evolutionary Games"],"prefix":"10.1007","author":[{"given":"Sheryl","family":"Paul","sequence":"first","affiliation":[]},{"given":"Anand","family":"Balakrishnan","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Qin","sequence":"additional","affiliation":[]},{"given":"Jyotirmoy V.","family":"Deshmukh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,29]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Agmon, N., Urieli, D., Stone, P.: Multiagent patrol generalized to complex environmental conditions. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a025, pp. 1090\u20131095 (2011)","DOI":"10.1609\/aaai.v25i1.7988"},{"key":"18_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1007\/978-3-319-21668-3_21","volume-title":"Computer Aided Verification","author":"T Akazaki","year":"2015","unstructured":"Akazaki, T., Hasuo, I.: Time robustness in MTL and expressivity in hybrid system falsification. In: Kroening, D., P\u0103s\u0103reanu, C.S. (eds.) CAV 2015. LNCS, vol. 9207, pp. 356\u2013374. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-21668-3_21"},{"key":"18_CR3","doi-asserted-by":"publisher","unstructured":"Aksaray, D., Jones, A., Kong, Z., Schwager, M., Belta, C.: Q-learning for robust satisfaction of signal temporal logic specifications. In: 2016 IEEE 55th Conference on Decision and Control (CDC), pp. 6565\u20136570 (2016). https:\/\/doi.org\/10.1109\/cdc.2016.7799279","DOI":"10.1109\/cdc.2016.7799279"},{"key":"18_CR4","unstructured":"Alexander, J.M.: Evolutionary game theory. In: Zalta, E.N. (ed.) The Stanford Encyclopedia of Philosophy. Summer 2021 edn., Metaphysics Research Lab, Stanford University (2021)"},{"key":"18_CR5","unstructured":"Amodei, D., Olah, C., Steinhardt, J., Christiano, P., Schulman, J., Man\u00e9, D.: Concrete problems in AI safety (2016)"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Badings, T., Romao, L., Abate, A., Jansen, N.: A stability-based abstraction framework for reach-avoid control of stochastic dynamical systems with unknown noise distributions (2024)","DOI":"10.23919\/ECC64448.2024.10590865"},{"key":"18_CR7","doi-asserted-by":"publisher","unstructured":"Balakrishnan, A., Deshmukh, J.V.: Structured reward shaping using signal temporal logic specifications. In: 2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 3481\u20133486 (2019). https:\/\/doi.org\/10.1109\/IROS40897.2019.8968254","DOI":"10.1109\/IROS40897.2019.8968254"},{"key":"18_CR8","doi-asserted-by":"publisher","unstructured":"Balakrishnan, A., Jak\u0161i\u0107, S., Aguilar, E.A., Ni\u010dkovi\u0107, D., Deshmukh, J.V.: Model-free reinforcement learning for spatiotemporal tasks using symbolic automata. In: 2023 62nd IEEE Conference on Decision and Control (CDC), pp. 6834\u20136840 (2023). https:\/\/doi.org\/10.1109\/CDC49753.2023.10383559","DOI":"10.1109\/CDC49753.2023.10383559"},{"key":"18_CR9","unstructured":"Bellusci, M., Basilico, N., Amigoni, F.: Multi-agent path finding in configurable environments. In: Proceedings of the 19th International Conference on Autonomous Agents and MultiAgent Systems, pp. 159\u2013167 (2020)"},{"key":"18_CR10","volume-title":"Dynamic Programming and Optimal Control","author":"DP Bertsekas","year":"2005","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control, vol. I, 3rd edn. Athena Scientific, Belmont (2005)","edition":"3"},{"key":"18_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-89716-1_1","volume-title":"Reachability Problems","author":"U Boker","year":"2021","unstructured":"Boker, U.: Quantitative vs. weighted automata. In: Bell, P.C., Totzke, P., Potapov, I. (eds.) RP 2021. LNCS, vol. 13035, pp. 3\u201318. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-89716-1_1"},{"key":"18_CR12","unstructured":"Brockman, G., et al.: OpenAI gym. arXiv preprint arXiv:1606.01540 (2016)"},{"key":"18_CR13","unstructured":"Camacho, A., Chen, O., Sanner, S., McIlraith, S.A.: Non-Markovian rewards expressed in LTL: guiding search via reward shaping (extended version). In: GoalsRL, a Workshop Collocated with ICML\/IJCAI\/AAMAS (2018)"},{"key":"18_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1007\/978-3-540-87531-4_28","volume-title":"Computer Science Logic","author":"K Chatterjee","year":"2008","unstructured":"Chatterjee, K., Doyen, L., Henzinger, T.A.: Quantitative languages. In: Kaminski, M., Martini, S. (eds.) CSL 2008. LNCS, vol. 5213, pp. 385\u2013400. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-87531-4_28"},{"key":"18_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1007\/978-3-662-53413-7_2","volume-title":"Static Analysis","author":"K Chatterjee","year":"2016","unstructured":"Chatterjee, K., Henzinger, T.A., Otop, J.: Quantitative monitor automata. In: Rival, X. (ed.) SAS 2016. LNCS, vol. 9837, pp. 23\u201338. Springer, Heidelberg (2016). https:\/\/doi.org\/10.1007\/978-3-662-53413-7_2"},{"key":"18_CR16","series-title":"Synthesis Lectures on Computer Science","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1007\/978-3-031-29310-8_9","volume-title":"Adaptive and Learning-Based Control of Safety-Critical Systems","author":"M Cohen","year":"2023","unstructured":"Cohen, M., Belta, C.: Temporal logic guided safe model-based reinforcement learning. In: Cohen, M., Belta, C. (eds.) Adaptive and Learning-Based Control of Safety-Critical Systems. SLCS, pp. 165\u2013192. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-29310-8_9"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Cohen, M.H., Belta, C.: Model-based reinforcement learning for approximate optimal control with temporal logic specifications. In: Proceedings of the 24th International Conference on Hybrid Systems: Computation and Control, pp. 1\u201311 (2021)","DOI":"10.1145\/3447928.3456639"},{"key":"18_CR18","doi-asserted-by":"publisher","unstructured":"Droste, M., Gastin, P.: Weighted automata and weighted logics. In: Droste, M., Kuich, W., Vogler, H. (eds.) Handbook of Weighted Automata. Monographs in Theoretical Computer Science. An EATCS Series, pp. 175\u2013211. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-01492-5_5","DOI":"10.1007\/978-3-642-01492-5_5"},{"key":"18_CR19","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1007\/s10472-010-9193-y","volume":"57","author":"Y Elmaliach","year":"2009","unstructured":"Elmaliach, Y., Agmon, N., Kaminka, G.A.: Multi-robot area patrol under frequency constraints. Ann. Math. Artif. Intell. 57, 293\u2013320 (2009)","journal-title":"Ann. Math. Artif. Intell."},{"issue":"1","key":"18_CR20","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1109\/LCSYS.2020.3001875","volume":"5","author":"Y Gilpin","year":"2020","unstructured":"Gilpin, Y., Kurtz, V., Lin, H.: A smooth robustness measure of signal temporal logic for symbolic control. IEEE Control Syst. Lett. 5(1), 241\u2013246 (2020)","journal-title":"IEEE Control Syst. Lett."},{"issue":"12","key":"18_CR21","doi-asserted-by":"publisher","first-page":"4051","DOI":"10.1109\/TAC.2018.2799561","volume":"63","author":"M Guo","year":"2018","unstructured":"Guo, M., Zavlanos, M.M.: Probabilistic motion planning under temporal tasks and soft constraints. IEEE Trans. Autom. Control 63(12), 4051\u20134066 (2018)","journal-title":"IEEE Trans. Autom. Control"},{"issue":"16","key":"18_CR22","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1016\/j.ifacol.2018.08.013","volume":"51","author":"S Haesaert","year":"2018","unstructured":"Haesaert, S., Soudjani, S., Abate, A.: Temporal logic control of general Markov decision processes by approximate policy refinement. IFAC-PapersOnLine 51(16), 73\u201378 (2018)","journal-title":"IFAC-PapersOnLine"},{"issue":"2","key":"18_CR23","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1109\/TSSC.1968.300136","volume":"4","author":"PE Hart","year":"1968","unstructured":"Hart, P.E., Nilsson, N.J., Raphael, B.: A formal basis for the heuristic determination of minimum cost paths. IEEE Trans. Syst. Sci. Cybern. 4(2), 100\u2013107 (1968). https:\/\/doi.org\/10.1109\/TSSC.1968.300136","journal-title":"IEEE Trans. Syst. Sci. Cybern."},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Hasanbeig, M., Kantaros, Y., Abate, A., Kroening, D., Pappas, G.J., Lee, I.: Reinforcement learning for temporal logic control synthesis with probabilistic satisfaction guarantees. In: 2019 IEEE 58th Conference on Decision and Control (CDC), pp. 5338\u20135343. IEEE (2019)","DOI":"10.1109\/CDC40024.2019.9028919"},{"key":"18_CR25","doi-asserted-by":"crossref","unstructured":"Hashemi, N., Hoxha, B., Prokhorov, D., Fainekos, G., Deshmukh, J.: Scaling learning based policy optimization for temporal tasks via dropout (2024)","DOI":"10.1145\/3696112"},{"key":"18_CR26","unstructured":"Icarte, R.T., Klassen, T., Valenzano, R., McIlraith, S.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: Proceedings of the 35th International Conference on Machine Learning, pp. 2107\u20132116. PMLR (2018)"},{"key":"18_CR27","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1613\/jair.1.12440","volume":"73","author":"RT Icarte","year":"2022","unstructured":"Icarte, R.T., Klassen, T.Q., Valenzano, R., McIlraith, S.A.: Reward machines: exploiting reward function structure in reinforcement learning. J. Artif. Intell. Res. 73, 173\u2013208 (2022). https:\/\/doi.org\/10.1613\/jair.1.12440","journal-title":"J. Artif. Intell. Res."},{"issue":"1","key":"18_CR28","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/s10703-018-0319-x","volume":"53","author":"S Jak\u0161i\u0107","year":"2018","unstructured":"Jak\u0161i\u0107, S., Bartocci, E., Grosu, R., Nguyen, T., Ni\u010dkovi\u0107, D.: Quantitative monitoring of STL with edit distance. Formal Methods Syst. Design 53(1), 83\u2013112 (2018). https:\/\/doi.org\/10.1007\/s10703-018-0319-x","journal-title":"Formal Methods Syst. Design"},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Jaksic, S., Bartocci, E., Grosu, R., Nickovic, D.: An algebraic framework for runtime verification (2018)","DOI":"10.1109\/TCAD.2018.2858460"},{"key":"18_CR30","unstructured":"Jothimurugan, K., Bansal, S., Bastani, O., Alur, R.: Compositional reinforcement learning from logical specifications. In: Advances in Neural Information Processing Systems, vol. 34 (2021)"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"Kalagarla, K.C., Jain, R., Nuzzo, P.: Synthesis of discounted-reward optimal policies for Markov decision processes under linear temporal logic specifications. arXiv preprint arXiv:2011.00632 (2020)","DOI":"10.23919\/ACC50511.2021.9482749"},{"key":"18_CR32","unstructured":"Kempa, B., Cramer, N.B., Frank, J.D.: Swarm mentality: toward automatic swarm state awareness with runtime verification. In: AAAI 2022 Spring Symposium Series (2022)"},{"issue":"3","key":"18_CR33","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1109\/TRO.2004.838026","volume":"21","author":"S Koenig","year":"2005","unstructured":"Koenig, S., Likhachev, M.: Fast replanning for navigation in unknown terrain. IEEE Trans. Rob. 21(3), 354\u2013363 (2005)","journal-title":"IEEE Trans. Rob."},{"key":"18_CR34","doi-asserted-by":"crossref","unstructured":"Li, X., Ma, Y., Belta, C.: A policy search method for temporal logic specified reinforcement learning tasks. In: 2018 Annual American Control Conference (ACC), pp. 240\u2013245. IEEE (2018)","DOI":"10.23919\/ACC.2018.8431181"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Li, X., Vasile, C.I., Belta, C.: Reinforcement learning with temporal logic rewards. In: 2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 3834\u20133839. IEEE (2017)","DOI":"10.1109\/IROS.2017.8206234"},{"key":"18_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1007\/11603009_2","volume-title":"Formal Modeling and Analysis of Timed Systems","author":"O Maler","year":"2005","unstructured":"Maler, O., Nickovic, D., Pnueli, A.: Real time temporal logic: past, present, future. In: Pettersson, P., Yi, W. (eds.) FORMATS 2005. LNCS, vol. 3829, pp. 2\u201316. Springer, Heidelberg (2005). https:\/\/doi.org\/10.1007\/11603009_2"},{"key":"18_CR37","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning (2016)"},{"key":"18_CR38","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning (2013)"},{"key":"18_CR39","doi-asserted-by":"publisher","unstructured":"Mohri, M.: Weighted automata algorithms. In: Droste, M., Kuich, W., Vogler, H. (eds.) Handbook of Weighted Automata. Monographs in Theoretical Computer Science. An EATCS Series, pp. 213\u2013254. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-01492-5_6","DOI":"10.1007\/978-3-642-01492-5_6"},{"key":"18_CR40","unstructured":"Morris, R., et al.: Planning, scheduling and monitoring for airport surface operations. In: Workshops at the Thirtieth AAAI Conference on Artificial Intelligence (2016)"},{"key":"18_CR41","doi-asserted-by":"publisher","unstructured":"Mukhopadhyay, A., Chakraborty, S.: Replicator equations induced by microscopic processes in nonoverlapping population playing bimatrix games. Chaos: Int. J. Nonlinear Sci. 31(2) (2021). https:\/\/doi.org\/10.1063\/5.0032311","DOI":"10.1063\/5.0032311"},{"key":"18_CR42","doi-asserted-by":"publisher","unstructured":"Pant, Y.V., Abbas, H., Mangharam, R.: Smooth operator: control using the smooth robustness of temporal logic. In: 2017 IEEE Conference on Control Technology and Applications (CCTA), pp. 1235\u20131240 (2017). https:\/\/doi.org\/10.1109\/CCTA.2017.8062628","DOI":"10.1109\/CCTA.2017.8062628"},{"key":"18_CR43","doi-asserted-by":"publisher","unstructured":"Pant, Y.V., Abbas, H., Quaye, R.A., Mangharam, R.: Fly-by-logic: control of multi-drone fleets with temporal logic objectives. In: 2018 ACM\/IEEE 9th International Conference on Cyber-Physical Systems (ICCPS), pp. 186\u2013197 (2018). https:\/\/doi.org\/10.1109\/ICCPS.2018.00026","DOI":"10.1109\/ICCPS.2018.00026"},{"key":"18_CR44","doi-asserted-by":"crossref","unstructured":"Pant, Y.V., Abbas, H., Quaye, R.A., Mangharam, R.: Fly-by-logic: control of multi-drone fleets with temporal logic objectives. In: Proceedings of the International Conference on Cyber-Physical Systems (ICCPS), pp. 186\u2013197 (2018)","DOI":"10.1109\/ICCPS.2018.00026"},{"key":"18_CR45","doi-asserted-by":"crossref","unstructured":"Pnueli, A.: The temporal logic of programs. In: 18th Annual Symposium on Foundations of Computer Science (SFCS 1977), pp. 46\u201357. IEEE (1977)","DOI":"10.1109\/SFCS.1977.32"},{"key":"18_CR46","doi-asserted-by":"crossref","unstructured":"Qin, X., Ar\u00e9chiga, N., Deshmukh, J., Best, A.: Robust testing for cyber-physical systems using reinforcement learning. In: 2023 21st ACM-IEEE International Symposium on Formal Methods and Models for System Design (MEMOCODE), pp. 36\u201346 (2023)","DOI":"10.1145\/3610579.3611087"},{"key":"18_CR47","unstructured":"Raffin, A., Hill, A., Gleave, A., Kanervisto, A., Ernestus, M., Dormann, N.: Stable-baselines3: reliable reinforcement learning implementations. J. Mach. Learn. Res. (2021)"},{"key":"18_CR48","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1007\/978-3-642-54862-8_24","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"T Reinbacher","year":"2014","unstructured":"Reinbacher, T., Rozier, K.Y., Schumann, J.: Temporal-logic based runtime observer pairs for system health management of real-time systems. In: \u00c1brah\u00e1m, E., Havelund, K. (eds.) TACAS 2014. LNCS, vol. 8413, pp. 357\u2013372. Springer, Heidelberg (2014). https:\/\/doi.org\/10.1007\/978-3-642-54862-8_24"},{"key":"18_CR49","unstructured":"Sadigh, D., Kapoor, A.: Safe control under uncertainty with probabilistic signal temporal logic. In: Proceedings of Robotics: Science and Systems XII (2016)"},{"key":"18_CR50","unstructured":"Salzman, O., Stern, R.: Research challenges and opportunities in multi-agent path finding and multi-agent pickup and delivery problems. In: Proceedings of the 19th International Conference on Autonomous Agents and MultiAgent Systems, pp. 1711\u20131715 (2020)"},{"key":"18_CR51","doi-asserted-by":"publisher","first-page":"3176","DOI":"10.1007\/978-0-387-30440-3_188","volume-title":"Encyclopedia of Complexity and Systems Science","author":"WH Sandholm","year":"2009","unstructured":"Sandholm, W.H.: Evolutionary game theory. In: Meyers, R. (ed.) Encyclopedia of Complexity and Systems Science, pp. 3176\u20133205. Springer, New York (2009). https:\/\/doi.org\/10.1007\/978-0-387-30440-3_188"},{"key":"18_CR52","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"18_CR53","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511806292","volume-title":"Evolution and the Theory of Games","author":"J Smith","year":"1982","unstructured":"Smith, J.: Evolution and the Theory of Games. Cambridge University Press, Cambridge (1982)"},{"key":"18_CR54","series-title":"The Springer International Series in Engineering and Computer Science","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1007\/978-1-4615-6325-9_11","volume-title":"Intelligent Unmanned Ground Vehicles","author":"A Stentz","year":"1997","unstructured":"Stentz, A.: Optimal and efficient path planning for partially known environments. In: Hebert, M.H., Thorpe, C., Stentz, A. (eds.) Intelligent Unmanned Ground Vehicles. The Springer International Series in Engineering and Computer Science, vol. 388, pp. 203\u2013220. Springer, Boston (1997). https:\/\/doi.org\/10.1007\/978-1-4615-6325-9_11"},{"key":"18_CR55","unstructured":"Stern, R., et\u00a0al.: Multi-agent pathfinding: definitions, variants, and benchmarks. In: Twelfth Annual Symposium on Combinatorial Search (2019)"},{"key":"18_CR56","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press (2018)"},{"key":"18_CR57","doi-asserted-by":"crossref","unstructured":"Varambally, S., Li, J., Koenig, S.: Which MAPF model works best for automated warehousing? In: Proceedings of the International Symposium on Combinatorial Search, vol.\u00a015, pp. 190\u2013198 (2022)","DOI":"10.1609\/socs.v15i1.21767"},{"issue":"3","key":"18_CR58","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Mach. Learn. 8(3), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"18_CR59","doi-asserted-by":"crossref","unstructured":"Williams, S., Deshmukh, J.: Potential games on cubic splines for multi-agent motion planning of autonomous agents. In: 2024 International Conference on Autonomous Agents and Multiagent Systems. University of Southern California, Los Angeles (2024)","DOI":"10.1109\/LCSYS.2024.3491052"},{"key":"18_CR60","doi-asserted-by":"publisher","unstructured":"Zhou, W., Li, W.: A hierarchical Bayesian approach to inverse reinforcement learning with symbolic reward machines (2022). https:\/\/doi.org\/10.48550\/arXiv.2204.09772","DOI":"10.48550\/arXiv.2204.09772"}],"container-title":["Lecture Notes in Computer Science","Quantitative Evaluation of Systems and Formal Modeling and Analysis of Timed Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-68416-6_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T10:29:55Z","timestamp":1732703395000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-68416-6_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031684159","9783031684166"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-68416-6_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"29 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"QEST+FORMATS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Quantitative Evaluation of Systems and Formal Modeling and Analysis of Timed Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Calgary, AB","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canada","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"qest2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.qest-formats.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}