{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T06:11:49Z","timestamp":1725862309730},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319434247"},{"type":"electronic","value":"9783319434254"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-43425-4_17","type":"book-chapter","created":{"date-parts":[[2016,8,2]],"date-time":"2016-08-02T11:24:52Z","timestamp":1470137092000},"page":"244-259","source":"Crossref","is-referenced-by-count":2,"title":["Policy Learning for Time-Bounded Reachability in Continuous-Time Markov Decision Processes via Doubly-Stochastic Gradient Ascent"],"prefix":"10.1007","author":[{"given":"Ezio","family":"Bartocci","sequence":"first","affiliation":[]},{"given":"Luca","family":"Bortolussi","sequence":"additional","affiliation":[]},{"given":"Tom\u01ce\u0161","family":"Br\u00e1zdil","sequence":"additional","affiliation":[]},{"given":"Dimitrios","family":"Milios","sequence":"additional","affiliation":[]},{"given":"Guido","family":"Sanguinetti","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,8,3]]},"reference":[{"issue":"6","key":"17_CR1","doi-asserted-by":"crossref","first-page":"524","DOI":"10.1109\/TSE.2003.1205180","volume":"29","author":"C Baier","year":"2003","unstructured":"Baier, C., Haverkort, B., Hermanns, H., Katoen, J.-P.: Model-checking algorithms for continuous-time Markov chains. IEEE Trans. Softw. Eng. 29(6), 524\u2013541 (2003)","journal-title":"IEEE Trans. Softw. Eng."},{"issue":"1","key":"17_CR2","doi-asserted-by":"crossref","first-page":"2","DOI":"10.1016\/j.tcs.2005.07.022","volume":"345","author":"C Baier","year":"2005","unstructured":"Baier, C., Hermanns, H., Katoen, J.-P., Haverkort, B.R.: Efficient computation of time-bounded reachability probabilities in uniform continuous-time Markov decision processes. Theor. Comput. Sci. 345(1), 2\u201326 (2005)","journal-title":"Theor. Comput. Sci."},{"key":"17_CR3","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1007\/s004460050046","volume":"11","author":"C Baier","year":"1998","unstructured":"Baier, C., Kwiatkowska, M.Z.: Model checking for a probabilistic branching time logic with fairness. Distrib. Comput. 11, 125\u2013155 (1998)","journal-title":"Distrib. Comput."},{"key":"17_CR4","unstructured":"Bartocci, E., Bortolussi, L., Br\u00e1zdil, T., Milios, D., Sanguinetti, G.: Policy learning for time-bounded reachability in continuous-time Markov decision processes via doubly-stochastic gradient ascent (2016). CoRR ArXiv, abs\/1605.09703"},{"key":"17_CR5","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1016\/j.tcs.2015.02.046","volume":"587","author":"E Bartocci","year":"2015","unstructured":"Bartocci, E., Bortolussi, L., Nenzi, L., Sanguinetti, G.: System design of stochastic models using robustness of temporal properties. Theor. Comput. Sci. 587, 3\u201325 (2015)","journal-title":"Theor. Comput. Sci."},{"issue":"1","key":"17_CR6","first-page":"351","volume":"15","author":"J Baxter","year":"2011","unstructured":"Baxter, J., Bartlett, P.L., Weaver, L.: Experiments with infinite-horizon, policy-gradient estimation. J. Artif. Int. Res. 15(1), 351\u2013381 (2011)","journal-title":"J. Artif. Int. Res."},{"key":"17_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"499","DOI":"10.1007\/3-540-60692-0_70","volume-title":"Foundations of Software Technology and Theoretical Computer Science","author":"A Bianco","year":"1995","unstructured":"Bianco, A., de Alfaro, L.: Model checking of probabilistic and nondeterministic systems. In: Thiagarajan, P.S. (ed.) Foundations of Software Technology and Theoretical Computer Science. LNCS, vol. 1026, pp. 499\u2013513. Springer, Heidelberg (1995)"},{"issue":"5","key":"17_CR8","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1016\/j.peva.2013.01.001","volume":"70","author":"L Bortolussi","year":"2013","unstructured":"Bortolussi, L., Hillston, J., Latella, D., Massink, M.: Continuous aproximation of collective systems behaviour: a tutorial. Perform. Eval. 70(5), 317\u2013349 (2013)","journal-title":"Perform. Eval."},{"key":"17_CR9","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1016\/j.ic.2016.01.004","volume":"247","author":"L Bortolussi","year":"2016","unstructured":"Bortolussi, L., Milios, D., Sanguinetti, G.: Smoothed model checking for uncertain continuous time Markov chains. Inf. Comput. 247, 235\u2013253 (2016)","journal-title":"Inf. Comput."},{"key":"17_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1007\/978-3-642-40196-1_7","volume-title":"Quantitative Evaluation of Systems","author":"L Bortolussi","year":"2013","unstructured":"Bortolussi, L., Sanguinetti, G.: Learning and designing stochastic processes from logical constraints. In: Joshi, K., Siegle, M., Stoelinga, M., D\u2019Argenio, P.R. (eds.) QEST 2013. LNCS, vol. 8054, pp. 89\u2013105. Springer, Heidelberg (2013)"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent. In: Proceedings of COMPSTAT, pp. 177\u2013186. Physica-Verlag HD (2010)","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"17_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1007\/978-3-642-35289-8_25","volume-title":"Neural Networks: Tricks of the Trade","author":"L Bottou","year":"2012","unstructured":"Bottou, L.: Stochastic gradient descent tricks. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) Neural Networks: Tricks of the Trade, 2nd edn. LNCS, vol. 7700, 2nd edn, pp. 421\u2013436. Springer, Heidelberg (2012)","edition":"2"},{"key":"17_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"166","DOI":"10.1007\/978-3-319-24953-7_12","volume-title":"Automated Technology for Verification and Analysis","author":"Y Butkova","year":"2015","unstructured":"Butkova, Y., Hatefi, H., Hermanns, H., Krc\u00e1l, J.: Optimal continuous time Markov decisions. In: Finkbeiner, B., Pu, G., Zhang, L. (eds.) ATVA 2015. LNCS, vol. 9364, pp. 166\u2013182. Springer, Heidelberg (2015)"},{"issue":"25","key":"17_CR14","doi-asserted-by":"crossref","first-page":"2340","DOI":"10.1021\/j100540a008","volume":"81","author":"DT Gillespie","year":"1977","unstructured":"Gillespie, D.T.: Exact stochastic simulation of coupled chemical reactions. J. Phys. Chem. 81(25), 2340\u20132361 (1977)","journal-title":"J. Phys. Chem."},{"issue":"2","key":"17_CR15","doi-asserted-by":"crossref","first-page":"177","DOI":"10.1007\/BF02837562","volume":"14","author":"X Guo","year":"2006","unstructured":"Guo, X., Hern\u00e1ndez-Lerma, O., Prieto-Rumeau, T., Cao, X.-R., Zhang, J., Hu, Q., Lewis, M.E., V\u00e9lez, R.: A survey of recent results on continuous-time Markov decision processes. TOP 14(2), 177\u2013261 (2006)","journal-title":"TOP"},{"key":"17_CR16","doi-asserted-by":"crossref","unstructured":"Henriques, D., Martins, J., Zuliani, P., Platzer, A., Clarke, E.M.: Statistical model checking for Markov decision processes. In: Proceedings of QEST, pp. 84\u201393. IEEE Computer Society (2012)","DOI":"10.1109\/QEST.2012.19"},{"issue":"04","key":"17_CR17","doi-asserted-by":"crossref","first-page":"823","DOI":"10.1142\/S0129054111008441","volume":"22","author":"T Henzinger","year":"2011","unstructured":"Henzinger, T., Jobstmann, B., Wolf, V.: Formalisms for specifying Markovian population models. Int. J. Found. Comput. Sci. 22(04), 823\u2013841 (2011)","journal-title":"Int. J. Found. Comput. Sci."},{"key":"17_CR18","series-title":"Lecture Notes in Computer Science","first-page":"218","volume-title":"Computational Methods in Systems Biology","author":"SK Jha","year":"2009","unstructured":"Jha, S.K., Clarke, E.M., Langmead, C.J., Legay, A., Platzer, A., Zuliani, P.: A Bayesian approach to model checking biological systems. In: Degano, P., Gorrieri, R. (eds.) CMSB 2009. LNCS, vol. 5688, pp. 218\u2013234. Springer, Heidelberg (2009)"},{"key":"17_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"585","DOI":"10.1007\/978-3-642-22110-1_47","volume-title":"Computer Aided Verification","author":"M Kwiatkowska","year":"2011","unstructured":"Kwiatkowska, M., Norman, G., Parker, D.: PRISM 4.0: verification of probabilistic real-time systems. In: Gopalakrishnan, G., Qadeer, S. (eds.) CAV 2011. LNCS, vol. 6806, pp. 585\u2013591. Springer, Heidelberg (2011)"},{"issue":"5","key":"17_CR20","doi-asserted-by":"crossref","first-page":"971","DOI":"10.1287\/opre.29.5.971","volume":"29","author":"C Lefevre","year":"1981","unstructured":"Lefevre, C.: Optimal control of a birth and death epidemic process. Oper. Res. 29(5), 971\u2013982 (1981)","journal-title":"Oper. Res."},{"key":"17_CR21","unstructured":"Mannor, S., Rubinstein, R.Y., Gat, Y.: The cross entropy method for fast policy search. In: ICML, pp. 512\u2013519 (2003)"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Medina Ayala, A.I., Andersson, S.B., Belta, C.: Probabilistic control from time-bounded temporal logic specifications in dynamic environments. In: Proceedings of ICRA 2012, pp. 4705\u20134710. IEEE (2012)","DOI":"10.1109\/ICRA.2012.6224963"},{"issue":"3","key":"17_CR23","doi-asserted-by":"crossref","first-page":"552","DOI":"10.1016\/0022-247X(68)90194-7","volume":"22","author":"B Miller","year":"1968","unstructured":"Miller, B.: Finite state continuous time Markov decision processes with an infinite planning horizon. J. Math. Anal. Appl. 22(3), 552\u2013569 (1968)","journal-title":"J. Math. Anal. Appl."},{"key":"17_CR24","doi-asserted-by":"crossref","unstructured":"Murata, N.: A statistical study of on-line learning. In: On-Line Learning in Neural Networks, pp. 63\u201392. Cambridge University Press, Cambridge (1998)","DOI":"10.1017\/CBO9780511569920.005"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Neuhaeusser, M.R., Zhang, L.: Time-bounded reachability probabilities in continuous-time Markov decision processes. In: Proceedings of QEST, pp. 209\u2013218. IEEE (2010)","DOI":"10.1109\/QEST.2010.47"},{"key":"17_CR26","unstructured":"Neuh\u00e4u\u00dfer, M.R.: Model checking nondeterministic and randomly timed systems. Ph.D. thesis, RWTH Aachen University (2010)"},{"issue":"10","key":"17_CR27","doi-asserted-by":"crossref","first-page":"1200","DOI":"10.1109\/43.952737","volume":"20","author":"Q Qiu","year":"2001","unstructured":"Qiu, Q., Wu, Q., Pedram, M.: Stochastic modeling of a power-managed system-construction and optimization. IEEE Trans. Comput. Aided Des. Integr. Circ. Syst. 20(10), 1200\u20131217 (2001)","journal-title":"IEEE Trans. Comput. Aided Des. Integr. Circ. Syst."},{"key":"17_CR28","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1007\/s00236-011-0140-0","volume":"48","author":"MN Rabe","year":"2011","unstructured":"Rabe, M.N., Schewe, S.: Finite optimal control for time-bounded reachability in CTMDPs and continuous-time Markov games. Acta Inform. 48, 291\u2013315 (2011)","journal-title":"Acta Inform."},{"key":"17_CR29","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1016\/j.tcs.2012.10.001","volume":"467","author":"MN Rabe","year":"2013","unstructured":"Rabe, M.N., Schewe, S.: Optimal time-abstract schedulers for CTMDPs and continuous-time Markov games. Theor. Comput. Sci. 467, 53\u201367 (2013)","journal-title":"Theor. Comput. Sci."},{"key":"17_CR30","volume-title":"Gaussian Processes for Machine Learning","author":"CE Rasmussen","year":"2006","unstructured":"Rasmussen, C.E., Williams, C.K.I.: Gaussian Processes for Machine Learning. MIT Press, Cambridge (2006)"},{"key":"17_CR31","unstructured":"Rosenstein, M., Barto, A.G.: Robot weightlifting by direct policy search. In: Proceedings of IJCAI, vol. 17, pp. 839\u2013846 (2001)"},{"key":"17_CR32","doi-asserted-by":"crossref","DOI":"10.1002\/9780470317037","volume-title":"Stochastic Dynamic Programming and the Control of Queueing Systems","author":"LI Sennott","year":"1998","unstructured":"Sennott, L.I.: Stochastic Dynamic Programming and the Control of Queueing Systems. Wiley, New York (1998)"},{"key":"17_CR33","unstructured":"Stulp, F., Sigaud, O.: Path integral policy improvement with covariance matrix adaptation (2012). CoRR ArXiv, arXiv:1206.4621"},{"key":"17_CR34","unstructured":"Stulp, F., Sigaud, O.: Policy improvement methods: between black-box optimization and episodic reinforcement learning (2012)"},{"issue":"9","key":"17_CR35","doi-asserted-by":"crossref","first-page":"1368","DOI":"10.1016\/j.ic.2006.05.002","volume":"204","author":"HLS Younes","year":"2006","unstructured":"Younes, H.L.S., Simmons, R.G.: Statistical probabilistic model checking with a focus on time-bounded properties. Inf. Comput. 204(9), 1368\u20131409 (2006)","journal-title":"Inf. Comput."}],"container-title":["Lecture Notes in Computer Science","Quantitative Evaluation of Systems"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-43425-4_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,11]],"date-time":"2019-09-11T22:15:49Z","timestamp":1568240149000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-43425-4_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319434247","9783319434254"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-43425-4_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]}}}