{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T09:53:35Z","timestamp":1772790815452,"version":"3.50.1"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s10994-024-06724-7","type":"journal-article","created":{"date-parts":[[2025,1,13]],"date-time":"2025-01-13T18:52:44Z","timestamp":1736794364000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Qualitative control learning can be much faster than reinforcement learning"],"prefix":"10.1007","volume":"114","author":[{"given":"Domen","family":"\u0160oberl","sequence":"first","affiliation":[]},{"given":"Ivan","family":"Bratko","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,14]]},"reference":[{"key":"6724_CR1","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1109\/37.24809","volume":"9","author":"CW Anderson","year":"1989","unstructured":"Anderson, C. W. (1989). Learning to control an inverted pendulum using neural networks. IEEE Control Systems Magazine, 9, 31\u201337.","journal-title":"IEEE Control Systems Magazine"},{"key":"6724_CR2","doi-asserted-by":"crossref","unstructured":"Barto, A. G., Sutton, R. S. & Anderson, C. W. (1983). Neuronlike adaptive elements that can solve difficult learning control problems. IEEE Transactions on Systems, Man, and CyberneticsSMC-13, pp. 834\u2013846.","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"6724_CR3","unstructured":"Bratko, I. (2012). Prolog Programming for Artificial Intelligence, 4th Edition. Addison-Wesley."},{"key":"6724_CR4","first-page":"107","volume":"24","author":"I Bratko","year":"2003","unstructured":"Bratko, I., & \u0160uc, D. (2003). Learning qualitative models. AI Magazine, 24, 107\u2013119.","journal-title":"AI Magazine"},{"key":"6724_CR5","doi-asserted-by":"crossref","unstructured":"Forbus, K. D. (2019). Qualitative Representations: How People Reason and Learn about the Continuous World. MIT Press.","DOI":"10.7551\/mitpress\/11578.001.0001"},{"key":"6724_CR6","unstructured":"Hafner, D., Davidson, J. & Vanhoucke, V. (2018). Tensorflow agents: Efficient batched reinforcement learning in tensorflow. arxiv:1709.02878"},{"key":"6724_CR7","doi-asserted-by":"publisher","first-page":"8082","DOI":"10.1016\/j.ifacol.2020.12.2277","volume":"53","author":"D Hein","year":"2020","unstructured":"Hein, D., Limmer, S., & Runkler, T. A. (2020). Interpretable control by reinforcement learning. IFAC-PapersOnLine, 53, 8082\u20138089.","journal-title":"IFAC-PapersOnLine"},{"key":"6724_CR8","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1016\/0004-3702(86)90073-1","volume":"29","author":"B Kuipers","year":"1986","unstructured":"Kuipers, B. (1986). Qualitative simulation. Artificial Intelligence, 29, 289\u2013338.","journal-title":"Artificial Intelligence"},{"key":"6724_CR9","volume-title":"Qualitative Reasoning: Modeling and Simulation with Incomplete Knowledge","author":"B Kuipers","year":"1994","unstructured":"Kuipers, B. (1994). Qualitative Reasoning: Modeling and Simulation with Incomplete Knowledge. Cambridge, MA, USA: MIT Press."},{"key":"6724_CR10","doi-asserted-by":"crossref","unstructured":"Linglin, W., Yongxin, L. & Xiaoke, Z. (2015). Design of reinforce learning control algorithm and verified in inverted pendulum. 2015 34th Chinese Control Conference (CCC), pp. 3164\u20133168.","DOI":"10.1109\/ChiCC.2015.7260128"},{"key":"6724_CR11","first-page":"125","volume":"2","author":"D Michie","year":"1968","unstructured":"Michie, D., & Chambers, R. A. (1968). BOXES: An experiment in adaptive control. Machine Intelligence, 2, 125\u2013133.","journal-title":"Machine Intelligence"},{"key":"6724_CR12","doi-asserted-by":"crossref","unstructured":"Nagendra, S., Podila, N., Ugarakhod, R. & George, K. (2017). Comparison of reinforcement learning algorithms applied to the cart-pole problem. 2017 International Conference on Advances in Computing, Communications and Informatics (ICACCI), pp. 26\u201332.","DOI":"10.1109\/ICACCI.2017.8125811"},{"key":"6724_CR13","doi-asserted-by":"crossref","unstructured":"Puriel-Gil, G., Yu, W. & Sossa, H. (2018). Reinforcement learning compensation based pd control for inverted pendulum. 15th International Conference on Electrical Engineering, Computing Science and Automatic Control (CCE), pp. 1\u20136.","DOI":"10.1109\/ICEEE.2018.8533946"},{"key":"6724_CR14","doi-asserted-by":"crossref","unstructured":"Ramamoorthy, S. & Kuipers, B. (2003). Qualitative heterogeneous control of higher order systems. Hybrid Systems: Computation and Control, pp. 417\u2013434.","DOI":"10.1007\/3-540-36580-X_31"},{"key":"6724_CR15","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Peters, J. & Schaal, S. (2007). Evaluation of policy gradient methods and variants on the cart-pole benchmark. 2007 IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning, pp. 254\u2013261.","DOI":"10.1109\/ADPRL.2007.368196"},{"key":"6724_CR16","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A. & Klimov, O. (2017). Proximal policy optimization algorithms. ArXivabs\/1707.06347. https:\/\/api.semanticscholar.org\/CorpusID:28695052"},{"key":"6724_CR17","doi-asserted-by":"crossref","unstructured":"\u0160oberl, D. & Bratko, I. (2017). Reactive motion planning with qualitative constraints. Advances in Artificial Intelligence: From Theory to Practice, pp. 41\u201350.","DOI":"10.1007\/978-3-319-60042-0_5"},{"key":"6724_CR18","doi-asserted-by":"crossref","unstructured":"\u0160oberl, D. & Bratko, I. (2019). Learning explainable control strategies demonstrated on the pole-and-cart system. Advances and Trends in Artificial Intelligence. From Theory to Practice, pp. 483\u2013494.","DOI":"10.1007\/978-3-030-22999-3_42"},{"key":"6724_CR19","doi-asserted-by":"crossref","unstructured":"\u0160oberl, D. & Bratko, I. (2020). Learning to control a quadcopter qualitatively. Journal of Intelligent & Robotic Systems.","DOI":"10.1007\/s10846-020-01228-7"},{"key":"6724_CR20","unstructured":"\u0160oberl, D. (2019). Automated planning with induced qualitative models in dynamic robotic domains. Ph.D. thesis, University of Ljubljana. https:\/\/repozitorij.uni-lj.si\/IzpisGradiva.php?id=126285"},{"key":"6724_CR21","doi-asserted-by":"crossref","unstructured":"\u0160oberl, D., \u017dabkar, J. & Bratko, I. (2015). Qualitative planning of object pushing by a robot. Foundations of Intelligent Systems, pp. 410\u2013419.","DOI":"10.1007\/978-3-319-25252-0_44"},{"key":"6724_CR22","unstructured":"\u0160uc, D. (2003). Machine Reconstruction of Human Control Strategies Frontiers in Artificial Intelligence and Applications. IOS Press, Inc."},{"key":"6724_CR23","unstructured":"Sutton, R. S., & Barto, A. G. (2018). Reinforcement Learning: An Introduction Second edn. The MIT Press."},{"key":"6724_CR24","unstructured":"Svete, A. (2020). Generalization of the cart pole problem to more difficult domains (Posplo\u0161itev problema vozicka s palico na zahtevnej\u0161e domene). Ph.D. thesis, University of Ljubljana. https:\/\/repozitorij.uni-lj.si\/IzpisGradiva.php?lang=slv&id=118155"},{"key":"6724_CR25","unstructured":"Tsividis, P., Pouncy, T., Xu, J.\u00a0L., Tenenbaum, J.\u00a0B. & Gershman, S.\u00a0J. (2017). Human learning in atari. AAAI Spring Symposia. https:\/\/api.semanticscholar.org\/CorpusID:5841991"},{"key":"6724_CR26","unstructured":"Wellman, M.\u00a0P. Allen, J.\u00a0F., Fikes, R. & Sandewall, E. (eds) (1991). Qualitative simulation with multivariate constraints. (eds Allen, J.\u00a0F., Fikes, R. & Sandewall, E.) Second International Conference on Principles of Knowledge Representation and Reasoning, 547\u2013557 (Morgan Kaufmann, 1991)."},{"key":"6724_CR27","doi-asserted-by":"publisher","unstructured":"Wiley, T. (2017). A Planning and Learning Hierarchy for the Online Acquisition of Robot Behaviours. Ph.D. thesis, UNSW Sydney. https:\/\/doi.org\/10.26190\/unsworks\/19999","DOI":"10.26190\/unsworks\/19999"},{"key":"6724_CR28","doi-asserted-by":"crossref","unstructured":"Wiley, T., Bratko, I. & Sammut, C. (2018). A machine learning system for controlling a rescue robot. RoboCup 2017: Robot World Cup XXI, pp. 108\u2013119.","DOI":"10.1007\/978-3-030-00308-1_9"},{"key":"6724_CR29","first-page":"19","volume":"4","author":"T Wiley","year":"2016","unstructured":"Wiley, T., Sammut, C., Hengst, B., & Bratko, I. (2016). A planning and learning hierarchy using qualitative reasoning for the on-line acquisition of robotic behaviors. Advances in Cognitive Systems, 4, 19.","journal-title":"Advances in Cognitive Systems"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06724-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-024-06724-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06724-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T01:04:55Z","timestamp":1768352695000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-024-06724-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1]]},"references-count":29,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["6724"],"URL":"https:\/\/doi.org\/10.1007\/s10994-024-06724-7","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1]]},"assertion":[{"value":"5 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 September 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 January 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}],"article-number":"4"}}