{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,4,6]],"date-time":"2022-04-06T02:19:26Z","timestamp":1649211566225},"reference-count":17,"publisher":"Computers, Materials and Continua (Tech Science Press)","issue":"3","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Intelligent Automation &amp; Soft Computing"],"published-print":{"date-parts":[[2004,1]]},"DOI":"10.1080\/10798587.2004.10642878","type":"journal-article","created":{"date-parts":[[2013,3,2]],"date-time":"2013-03-02T07:50:17Z","timestamp":1362210617000},"page":"209-220","source":"Crossref","is-referenced-by-count":4,"title":["Improved Q<sub>MDP<\/sub>Policy for Partially Observable Markov Decision Processes in Large Domains: Embedding Exploration Dynamics"],"prefix":"10.32604","volume":"10","author":[{"given":"Giorgos","family":"Apostolikas","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Spyros","family":"Tzafestas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"17807","reference":[{"key":"CIT0001","unstructured":"T. Dean, L. P. Kaelbling, J. Kirman, and A. Nicholson, 1993. \u201cPlanning with deadlines in stochastic domains\u201d,In Proc. of the 11th National Conference on Artificial Intelligence,Washington, DC, pp 574\u2013579"},{"key":"CIT0003","doi-asserted-by":"crossref","unstructured":"R. S. Sutton, 1990. \u201cIntegrated architectures for learning, planning, and reacting based on approximating dynamic programming\u201d,In Proc. of the 7th International Conference on Machine Learning,Austin, Texas, pp. 216\u2013224","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"CIT0004","unstructured":"R. A. Howard, 1960. \u201cDynamic Programming and Markov Processes\u201d,The MIT Press,Cambridge, Massachusetts"},{"key":"CIT0008","unstructured":"A. R. Cassandra, M. L. Littman, and N. L. Zhang, 1997. \u201cIncremental pruning: a simple, fast, exact algorithm for partially observable Markov decision processes\u201d,In Proceedings of the Thirteenth Conference on Uncertainty in Artifzcial Intelligence,pp. 54\u201361"},{"key":"CIT0010","unstructured":"N. L. Zhang and S. S. Lee, 1998. \u201cPlanning with partially observable Markov decision processes: Advances in exact solution method\u201d,In Proceeding of the Fourteenth Conference on Uncertainty in Artifzcial Intelligence,pp. 523\u2013530"},{"key":"CIT0011","unstructured":"A. R. Cassandra, 1998. \u201cExact and approximate algorithms for partially observable Markov decision processes\u201d,Ph.D. thesis,Brown University"},{"key":"CIT0012","unstructured":"R. Bellman, 1957. \u201cDynamic Programming\u201d,Princeton University Press,Princeton, New Jersey"},{"key":"CIT0013","doi-asserted-by":"crossref","unstructured":"M. L. Puterman, 1994. \u201cMarkov decision processes: discrete stochastic dynamic programming\u201d,John Wiley,New York","DOI":"10.1002\/9780470316887"},{"key":"CIT0014","unstructured":"E. J. Sondik, 1971. \u201cThe optimal control of partially observable Markov decision processes\u201d,Ph.D. Thesis,Stanford University"},{"key":"CIT0015","doi-asserted-by":"crossref","unstructured":"N. L. Zhang and W. Liu, 1997. \u201cA model approximation scheme for planning in partially observable stochastic domains\u201d,Journal of Artifzcial Intelligence Research,pp. 199\u2013230","DOI":"10.1613\/jair.419"},{"key":"CIT0016","unstructured":"H.T. Cheng, 1988. \u201cAlgorithms for partially observable Markov decision processes\u201d,Ph.D. thesis,University of British Columbia"},{"key":"CIT0018","unstructured":"M. L. Littman, \u201cAlgorithms for sequential decision making\u201d,Ph.D. thesis,Brown University, 1996"},{"key":"CIT0020","unstructured":"O. Madani, S. Hanks and A. Condon, 1999. \u201cOn the undecidability of probabilistic planning and infinite-horizon partially observable Markov decision processes\u201d,In Proceeding of the Sixteenth National Conference on Artifzcial Intelligence,pp. 541\u2013548"},{"key":"CIT0021","doi-asserted-by":"crossref","unstructured":"M. L. Littman, A. R. Cassandra, and L. P. Kaelbling, 1995. \u201cLearning policies for partially observable environments: Scaling up\u201d,In Proc. 12th International Conference on Machine Learning,pp. 362370, San Fransisco, CA","DOI":"10.1016\/B978-1-55860-377-6.50052-9"},{"key":"CIT0022","unstructured":"L. Chrisman, 1992. \u201cReinforcement learning with perceptual aliasing: The perceptual distinctions approach\u201d,In Proc. Tenth National Conference on AI (AAAI),pp. 183\u2013188"},{"key":"CIT0023","unstructured":"R. A. McCallum, 1993. \u201cFirst results with utile distinction memory for reinforcement learning\u201d,Technical Report 446,Dept. Comp. Sci. Univ. Rochester, 1992. AlsoIn Proc. Machine Learning Conference"},{"key":"CIT0024","unstructured":"C. J. Watkins, 1989. \u201cLearning with delayed rewards\u201d,Ph.D. thesis,Cambridge University"}],"container-title":["Intelligent Automation &amp; Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/autosoftjournal.net\/viewPaper.php?paper=10642878","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,6,3]],"date-time":"2020-06-03T13:08:22Z","timestamp":1591189702000},"score":1,"resource":{"primary":{"URL":"http:\/\/autosoftjournal.net\/paperShow.php?paper=10642878"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2004,1]]},"references-count":17,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2004,1]]}},"alternative-id":["10.1080\/10798587.2004.10642878"],"URL":"https:\/\/doi.org\/10.1080\/10798587.2004.10642878","relation":{},"ISSN":["1079-8587","2326-005X"],"issn-type":[{"value":"1079-8587","type":"print"},{"value":"2326-005X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2004,1]]}}}