{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T06:57:10Z","timestamp":1725519430679},"publisher-location":"Berlin, Heidelberg","reference-count":30,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642335020"},{"type":"electronic","value":"9783642335037"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-33503-7_55","type":"book-chapter","created":{"date-parts":[[2012,9,30]],"date-time":"2012-09-30T12:52:24Z","timestamp":1349009544000},"page":"562-576","source":"Crossref","is-referenced-by-count":1,"title":["Q-Tree: Automatic Construction of Hierarchical State Representation for Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Tao","family":"Mao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhao","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Laura E.","family":"Ray","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"issue":"4","key":"55_CR1","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"D. Bernstein","year":"2002","unstructured":"Bernstein, D., Givan, R., Immerman, N., Zilberstein, S.: The complexity of decentralized control of Markov decision processes. Mathematics of Operations Research\u00a027(4), 819\u2013840 (2002)","journal-title":"Mathematics of Operations Research"},{"key":"55_CR2","doi-asserted-by":"crossref","unstructured":"Rabinovich, Z., Goldman, C., Rosenschein, J.: The complexity of multiagent systems: the price of silence. In: 2nd Joint Conf. of Autonomous Agents and Multi-Agent Systems, Melbourne, Australia, pp. 1102\u20131103 (2003)","DOI":"10.1145\/860575.860816"},{"issue":"2","key":"55_CR3","first-page":"15","volume":"27","author":"R. Granger","year":"2006","unstructured":"Granger, R.: Engines of the brain: the computational instruction set of human recognition. AI Magzine\u00a027(2), 15\u201332 (2006)","journal-title":"AI Magzine"},{"issue":"5","key":"55_CR4","doi-asserted-by":"publisher","first-page":"856","DOI":"10.1162\/089892904970690","volume":"16","author":"A. Rodriguez","year":"2004","unstructured":"Rodriguez, A., Whitson, J., Granger, R.: Derivation and analysis of basic computational operations of thalamocortical circuits. J. Cognitive Neuroscience\u00a016(5), 856\u2013877 (2004)","journal-title":"J. Cognitive Neuroscience"},{"key":"55_CR5","unstructured":"Hearn, R., Granger, R.: Learning hierarchical representations and behaviors. In: AAAI Symposium on Naturally-Inspired Artificial Intelligence (2009)"},{"issue":"5","key":"55_CR6","doi-asserted-by":"publisher","first-page":"1624","DOI":"10.1109\/TAC.2002.803530","volume":"16","author":"D. Ormoneit","year":"2002","unstructured":"Ormoneit, D., Sen, S.: Kernel-based reinforcement learning in average-cost problems. IEEE Trans. Automatic Control\u00a016(5), 1624\u20131636 (2002)","journal-title":"IEEE Trans. Automatic Control"},{"key":"55_CR7","doi-asserted-by":"crossref","unstructured":"Yamaguchi, A., Takamatsu, J., Ogasawara, T.: Constructing action set from basic functions for reinforcement learning of robot control, Kobe, Japan (2009)","DOI":"10.1109\/ROBOT.2009.5152840"},{"key":"55_CR8","doi-asserted-by":"crossref","unstructured":"Kimura, H.: Reinforcement learning in multi-dimensional state-action sapce using random rectangular coarse coding and Gibbs sampling. In: Proc. 2009 IEEE Intl\u2019 Conf. on Robotics and Automation (ICRA), Kobe, Japan, pp. 4173\u20134180 (2007)","DOI":"10.1109\/IROS.2007.4399401"},{"key":"55_CR9","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/TIT.1967.1053964","volume":"13","author":"T. Cover","year":"1967","unstructured":"Cover, T., Hart, P.: Nearest neighbor pattern classification. IEEE Trans. Information Theory\u00a013, 21\u201327 (1967)","journal-title":"IEEE Trans. Information Theory"},{"key":"55_CR10","unstructured":"Boyan, J., Moore, A.: Generalization in reinforcement learning: safely approximating the value function. In: Advances in Neural Information Processing Systems (NIPS), vol.\u00a07, pp. 369\u2013376 (1995)"},{"key":"55_CR11","unstructured":"Asada, M., Huber, M.: State space reduction for hierarchical reinforcement learning, Miami Beach, FL, USA (2004)"},{"key":"55_CR12","unstructured":"Dean, T., Givan, R., Leach, S.: Model reduction techniques for computing approximately optimal solutions for Markov decision processes. In: Proceedings of the 13th Conference on Uncertainty in Artificial Intelligence (UAI 1997), San Francisco, USA, pp. 124\u2013131 (1997)"},{"key":"55_CR13","doi-asserted-by":"crossref","unstructured":"McCallum, R.: Instance-based utile distinctions for reinforcement learning with hidden state. In: Proceedings of the 20th Intl\u2019 Conf. Machine Learning, ICML 1995 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50055-4"},{"key":"55_CR14","unstructured":"Jonsson, A., Barto, A.: Automated state abstraction for options using the U-Tree algorithm. In: Advances in Neural Information Processing Systems, vol.\u00a013, pp. 1054\u20131060 (2001)"},{"key":"55_CR15","unstructured":"Uther, W., Veloso, M.: Tree-based discretization for continuous state space reinforcement learning. In: Proc. of 16th National Conf. on Artificial Intelligence (AAAI), Madision, WI, USA (1998)"},{"key":"55_CR16","unstructured":"Pyeatt, L., Howe, A.: Decision tree function approximation in reinforcement learning. In: Proc. 3rd Int\u2019l Symposium on Adaptive Systems: Evolutionary Computation and Probablistic Graphical Models (2001)"},{"key":"55_CR17","doi-asserted-by":"crossref","unstructured":"Mao, T., Ray, L.: Hierarchical state representation and Q-learning for Agent-Based Herding. In: Proc. of Int\u2019l Conf. on Computer and Automation Engineering (ICCAE), Chongqing, China (2011)","DOI":"10.7763\/IJIEE.2012.V2.156"},{"key":"55_CR18","doi-asserted-by":"crossref","unstructured":"Uther, W., Veloso, M.: TTree: Tree-Based State Generalization with Temporally Abstract Actions. In: Proc. of the Symp. on Abstraction, Reformulation and Approximations, Edmonton, Canada (2002)","DOI":"10.1007\/3-540-45622-8_24"},{"key":"55_CR19","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-Based Batch Mode Reinforcement Learning. Journal of Machine Learning Research\u00a06, 503\u2013556 (2005)","journal-title":"Journal of Machine Learning Research"},{"key":"55_CR20","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/S0921-8890(99)00084-6","volume":"31","author":"R. Vaughan","year":"2000","unstructured":"Vaughan, R., Sumpter, N., Frost, A.: Experiments in automatic flock control. Robotics and Autonomous Systems\u00a031, 109\u2013116 (2000)","journal-title":"Robotics and Autonomous Systems"},{"issue":"3-4","key":"55_CR21","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"C. Watkins","year":"1992","unstructured":"Watkins, C., Dayan, P.: Technical Note: Q-Learning. Machine Learning\u00a08(3-4), 279\u2013292 (1992)","journal-title":"Machine Learning"},{"key":"55_CR22","volume-title":"Reinforcement Learning: An Introduction","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"55_CR23","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1109\/TSMC.1979.4310076","volume":"9","author":"N. Otsu","year":"1979","unstructured":"Otsu, N.: A threshold selection method from gray-level histograms. IEEE Trans. Syst. Man Cybern\u00a09, 62\u201366 (1979)","journal-title":"IEEE Trans. Syst. Man Cybern"},{"key":"55_CR24","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316641","volume-title":"Multivariate Observation","author":"G. Seber","year":"1984","unstructured":"Seber, G.: Multivariate Observation. John Wiley & Sons, Inc., New York (1984)"},{"key":"55_CR25","doi-asserted-by":"publisher","first-page":"386","DOI":"10.1037\/h0042519","volume":"65","author":"F. Rosenblatt","year":"1958","unstructured":"Rosenblatt, F.: The perceptron: A probabilistic model for information storage and organization in the brain. Pyschological Rev.\u00a065, 386\u2013407 (1958)","journal-title":"Pyschological Rev."},{"key":"55_CR26","doi-asserted-by":"crossref","DOI":"10.4135\/9781412983433","volume-title":"Applied Logistic Regression Analysis","author":"S. Menard","year":"2002","unstructured":"Menard, S.: Applied Logistic Regression Analysis. Sage Publications, Thousand Oaks (2002)"},{"key":"55_CR27","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1023\/A:1009715923555","volume":"2","author":"C. Burges","year":"1998","unstructured":"Burges, C.: A tutorial on support vector machines for pattern recognition. Data Mining and Knowledge Discovery\u00a02, 121\u2013167 (1998)","journal-title":"Data Mining and Knowledge Discovery"},{"issue":"253","key":"55_CR28","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1080\/01621459.1951.10500769","volume":"46","author":"F. Massey Jr.","year":"1951","unstructured":"Massey Jr., F.: The Kolmogorov-Smirnov test for goodness of fit. Journal of the American Statistics Association\u00a046(253), 68\u201378 (1951)","journal-title":"Journal of the American Statistics Association"},{"key":"55_CR29","unstructured":"Sutton, R.: Generalization in reinforcement learning: successful examples using sparse coarse coding. In: Advances in Neural Information Processing Systems, vol.\u00a08, pp. 1038\u20131044 (1996)"},{"key":"55_CR30","unstructured":"Mao, T., Ray, L.: Frequency-based patrolling with heterogeneous agents and limited communication. In: WorldComp Int\u2019l Conf. on Artificial Intelligence, Las Vegas, USA (2011)"}],"container-title":["Lecture Notes in Computer Science","Intelligent Robotics and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-33503-7_55","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,29]],"date-time":"2022-01-29T20:08:09Z","timestamp":1643486889000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-33503-7_55"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642335020","9783642335037"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-33503-7_55","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}