{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T13:02:50Z","timestamp":1771506170317,"version":"3.50.1"},"reference-count":104,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T00:00:00Z","timestamp":1771459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T00:00:00Z","timestamp":1771459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Latvian Quantum Initiative under EU Recovery and Resilience Facility","award":["2.3.1.1.i.0\/1\/22\/I\/CFLA\/001"],"award-info":[{"award-number":["2.3.1.1.i.0\/1\/22\/I\/CFLA\/001"]}]},{"name":"Latvian Quantum Initiative under EU Recovery and Resilience Facility","award":["2.3.1.1.i.0\/1\/22\/I\/CFLA\/001"],"award-info":[{"award-number":["2.3.1.1.i.0\/1\/22\/I\/CFLA\/001"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Quantum Mach. Intell."],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s42484-026-00368-7","type":"journal-article","created":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T11:47:35Z","timestamp":1771501655000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Quantum algorithm for apprenticeship learning"],"prefix":"10.1007","volume":"8","author":[{"given":"Andris","family":"Ambainis","sequence":"first","affiliation":[]},{"given":"Debbie","family":"Lim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,19]]},"reference":[{"key":"368_CR1","unstructured":"Aaronson S, Chia N-H, Lin H-H, Wang C, Zhang R (2019) On the quantum complexity of closest pair and related problems. Proceedings of the 35th Computational Complexity Conference"},{"key":"368_CR2","unstructured":"Abbeel P (2008) Apprenticeship learning and reinforcement learning with application to robotic control. PhD thesis, Stanford University"},{"key":"368_CR3","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2004) Apprenticeship learning via inverse reinforcement learning. In: Proceedings of The 21st International Conference on Machine Learning, p 1","DOI":"10.1145\/1015330.1015430"},{"key":"368_CR4","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2005) Exploration and apprenticeship learning in reinforcement learning. In: Proceedings of the 22nd international conference on machine learning, pp 1\u20138","DOI":"10.1145\/1102351.1102352"},{"issue":"6","key":"368_CR5","doi-asserted-by":"publisher","first-page":"4307","DOI":"10.1007\/s10462-021-10108-x","volume":"55","author":"S Adams","year":"2022","unstructured":"Adams S, Cody T, Beling PA (2022) A survey of inverse reinforcement learning. Artif Intell Rev 55(6):4307\u20134346","journal-title":"Artif Intell Rev"},{"key":"368_CR6","unstructured":"Allcock J, Bao J, Doriguello JF, Luongo A, Santha M (2024) Constant-depth circuits for Uniformly Controlled Gates and Boolean functions with application to quantum memory circuits. Conference on the theory of quantum computation, communication and cryptography (TQC)"},{"key":"368_CR7","doi-asserted-by":"crossref","unstructured":"Altman E (2021) Constrained markov decision processes","DOI":"10.1201\/9781315140223"},{"issue":"1","key":"368_CR8","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1137\/S0097539705447311","volume":"37","author":"A Ambainis","year":"2007","unstructured":"Ambainis A (2007) Quantum walk algorithm for element distinctness. SIAM J Comput 37(1):210\u2013239","journal-title":"SIAM J Comput"},{"key":"368_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103500","volume":"297","author":"S Arora","year":"2021","unstructured":"Arora S, Doshi P (2021) A survey of inverse reinforcement learning: Challenges, methods and progress. Artif Intell 297:103500","journal-title":"Artif Intell"},{"key":"368_CR10","unstructured":"Auer P, Jaksch T, Ortner R (2008) Near-optimal regret bounds for reinforcement learning. Adv Neural Inf Process Syst 21"},{"key":"368_CR11","doi-asserted-by":"crossref","unstructured":"Azizzadenesheli K, Brunskill E, Anandkumar A (2018) Efficient exploration through bayesian deep q-networks. In: 2018 Information theory and applications workshop (ITA). IEEE, pp 1\u20139","DOI":"10.1109\/ITA.2018.8503252"},{"issue":"2","key":"368_CR12","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","volume":"41","author":"T Baltru\u0161aitis","year":"2018","unstructured":"Baltru\u0161aitis T, Ahuja C, Morency L-P (2018) Multimodal machine learning: A survey and taxonomy. IEEE Trans Pattern Anal Mach Intell 41(2):423\u2013443","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2153","key":"368_CR13","first-page":"20120686","volume":"469","author":"R Beals","year":"2013","unstructured":"Beals R, Brierley S, Gray O, Harrow AW, Kutin S, Linden N, Shepherd D, Stather M (2013) Efficient distributed quantum computing. Proc R Soc A Math Phys Eng Sci 469(2153):20120686","journal-title":"Proc R Soc A Math Phys Eng Sci"},{"issue":"3","key":"368_CR14","doi-asserted-by":"publisher","first-page":"310","DOI":"10.1007\/s11768-011-1005-3","volume":"9","author":"DP Bertsekas","year":"2011","unstructured":"Bertsekas DP (2011) Approximate policy iteration: A survey and some new methods. J Control Theory aAppl 9(3):310\u2013335","journal-title":"J Control Theory aAppl"},{"issue":"7671","key":"368_CR15","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1038\/nature23474","volume":"549","author":"J Biamonte","year":"2017","unstructured":"Biamonte J, Wittek P, Pancotti N, Rebentrost P, Wiebe N, Lloyd S (2017) Quantum machine learning. Nature 549(7671):195\u2013202","journal-title":"Nature"},{"key":"368_CR16","unstructured":"Boularias A, Kober J, Peters J (2011) Relative entropy inverse reinforcement learning. In: Proceedings of The 14th international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings, pp 182\u2013189"},{"issue":"1","key":"368_CR17","first-page":"1","volume":"9","author":"R Boutaba","year":"2018","unstructured":"Boutaba R, Salahuddin MA, Limam N, Ayoubi S, Shahriar N, Estrada-Solano F, Caicedo OM (2018) A comprehensive survey on machine learning for networking: evolution, applications and research opportunities. J Int Serv Appl 9(1):1\u201399","journal-title":"J Int Serv Appl"},{"issue":"1","key":"368_CR18","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1023\/A:1018056104778","volume":"22","author":"SJ Bradtke","year":"1996","unstructured":"Bradtke SJ, Barto AG (1996) Linear least-squares algorithms for temporal difference learning. Mach Learn 22(1):33\u201357","journal-title":"Mach Learn"},{"key":"368_CR19","doi-asserted-by":"crossref","unstructured":"Brandao FG, Svore K (2016) Quantum speed-ups for semidefinite programming. arXiv:1609.05537","DOI":"10.1109\/FOCS.2017.45"},{"key":"368_CR20","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1090\/conm\/305\/05215","volume":"305","author":"G Brassard","year":"2002","unstructured":"Brassard G, Hoyer P, Mosca M, Tapp A (2002) Quantum amplitude amplification and estimation. Contemp Math 305:53\u201374","journal-title":"Contemp Math"},{"key":"368_CR21","unstructured":"Buhrman H, Loff B, Patro S, Speelman F (2022) Limits of quantum speed-ups for computational geometry and other problems: Fine-grained complexity via quantum walks. In: 13th Innovations in theoretical computer science conference"},{"key":"368_CR22","unstructured":"Buhrman H, Loff B, Patro S, Speelman F (2022) Memory compression with quantum random-access gates. arXiv:2203.05599"},{"key":"368_CR23","unstructured":"Chakraborty S, Gily\u00e9n A, Jeffery S (2019) The power of block-encoded matrix powers: improved regression techniques via faster hamiltonian simulation. Proceedings of the 46th International Colloquium on Automata, Languages, and Programming (ICALP)"},{"issue":"2","key":"368_CR24","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1007\/s42484-023-00116-1","volume":"5","author":"EA Cherrat","year":"2023","unstructured":"Cherrat EA, Kerenidis I, Prakash A (2023) Quantum reinforcement learning via policy iteration. Quantum Mach Intell 5(2):30","journal-title":"Quantum Mach Intell"},{"key":"368_CR25","first-page":"691","volume":"12","author":"J-D Choi","year":"2011","unstructured":"Choi J-D, Kim K-E (2011) Inverse reinforcement learning in partially observable environments. J Mach Learn Res 12:691\u2013730","journal-title":"J Mach Learn Res"},{"key":"368_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TQE.2022.3231194","volume":"3","author":"BD Clader","year":"2023","unstructured":"Clader BD, Dalzell AM, Stamatopoulos N, Salton G, Berta M, Zeng WJ (2023) Quantum resources required to block-encode a matrix of classical data. IEEE Trans Quantum Eng 3:1\u201323","journal-title":"IEEE Trans Quantum Eng"},{"issue":"5","key":"368_CR27","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2371656.2371658","volume":"59","author":"KL Clarkson","year":"2012","unstructured":"Clarkson KL, Hazan E, Woodruff DP (2012) Sublinear optimization for machine learning. J ACM (JACM) 59(5):1\u201349","journal-title":"J ACM (JACM)"},{"issue":"7","key":"368_CR28","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1145\/1538788.1538812","volume":"52","author":"A Coates","year":"2009","unstructured":"Coates A, Abbeel P, Ng AY (2009) Apprenticeship learning for helicopter control. Commun ACM 52(7):97\u2013105","journal-title":"Commun ACM"},{"key":"368_CR29","doi-asserted-by":"crossref","unstructured":"Cornelissen A, Hamoudi Y, Jerbi S (2022) Near-optimal quantum algorithms for multivariate mean estimation. In: Proceedings of the 54th Annual ACM SIGACT symposium on theory of computing, pp 33\u201343","DOI":"10.1145\/3519935.3520045"},{"key":"368_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TQE.2020.2965803","volume":"1","author":"O Di Matteo","year":"2020","unstructured":"Di Matteo O, Gheorghiu V, Mosca M (2020) Fault-tolerant resource estimation of quantum random-access memories. IEEE Trans Quantum Eng 1:1\u201313","journal-title":"IEEE Trans Quantum Eng"},{"key":"368_CR31","doi-asserted-by":"crossref","unstructured":"Dong D, Chen C, Li H, Tarn T-J (2008) Quantum reinforcement learning. IEEE Trans Syst Man Cybern Part B (Cybernetics) 38(5):1207\u20131220","DOI":"10.1109\/TSMCB.2008.925743"},{"key":"368_CR32","doi-asserted-by":"crossref","unstructured":"Dunjko V, Taylor JM, Briegel HJ (2017) Advances in quantum reinforcement learning. In: 2017 IEEE international conference on systems, man, and cybernetics (SMC). IEEE, pp 282\u2013287","DOI":"10.1109\/SMC.2017.8122616"},{"key":"368_CR33","unstructured":"Durr C, Hoyer P (1996) A quantum algorithm for finding the minimum. arXiv:quant-ph\/9607014"},{"issue":"01","key":"368_CR34","first-page":"1","volume":"9","author":"M Fatima","year":"2017","unstructured":"Fatima M, Pasha M (2017) Survey of machine learning algorithms for disease diagnostic. J Intell Learn Syst Appl 9(01):1","journal-title":"J Intell Learn Syst Appl"},{"key":"368_CR35","unstructured":"Fu J, Luo K, Levine S (2017) Learning robust rewards with adversarial inverse reinforcement learning. arXiv:1710.11248"},{"key":"368_CR36","unstructured":"Gao M, Xie T, Du SS, Yang LF (2021) A provably efficient algorithm for linear markov decision process with low switching cost. arXiv:2101.00494"},{"key":"368_CR37","doi-asserted-by":"publisher","first-page":"1225","DOI":"10.22331\/q-2024-01-11-1225","volume":"8","author":"G Gentinetta","year":"2024","unstructured":"Gentinetta G, Thomsen A, Sutter D, Woerner S (2024) The complexity of quantum support vector machines. Quantum 8:1225","journal-title":"Quantum"},{"issue":"16","key":"368_CR38","doi-asserted-by":"publisher","DOI":"10.1103\/physrevlett.100.160501","volume":"100","author":"V Giovannetti","year":"2008","unstructured":"Giovannetti V, Lloyd S, Maccone L (2008) Quantum random access memory. Phys Rev Lett 100(16):160501. https:\/\/doi.org\/10.1103\/physrevlett.100.160501","journal-title":"Phys Rev Lett"},{"key":"368_CR39","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevA.78.052310","volume":"78","author":"V Giovannetti","year":"2008","unstructured":"Giovannetti V, Lloyd S, Maccone L (2008) Architectures for a quantum random access memory. Phys Rev A 78:052310. https:\/\/doi.org\/10.1103\/PhysRevA.78.052310","journal-title":"Phys Rev A"},{"issue":"16","key":"368_CR40","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.100.160501","volume":"100","author":"V Giovannetti","year":"2008","unstructured":"Giovannetti V, Lloyd S, Maccone L (2008) Quantum random access memory. Phys Rev Lett 100(16):160501","journal-title":"Phys Rev Lett"},{"key":"368_CR41","doi-asserted-by":"crossref","unstructured":"Grover LK (1996) A fast quantum mechanical algorithm for database search. In: Proceedings of the 28th Annual ACM symposium on theory of computing, pp 212\u2013219","DOI":"10.1145\/237814.237866"},{"key":"368_CR42","unstructured":"Grover L, Rudolph T (2002) Creating superpositions that correspond to efficiently integrable probability distributions. ArXiv eprints. quant-ph\/0208112"},{"key":"368_CR43","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1613\/jair.1000","volume":"19","author":"C Guestrin","year":"2003","unstructured":"Guestrin C, Koller D, Parr R, Venkataraman S (2003) Efficient solution algorithms for factored mdps. J Artif Intell Res 19:399\u2013468","journal-title":"J Artif Intell Res"},{"issue":"4","key":"368_CR44","doi-asserted-by":"publisher","first-page":"042016","DOI":"10.1103\/PhysRevResearch.4.L042016","volume":"4","author":"AY Guo","year":"2022","unstructured":"Guo AY, Deshpande A, Chu S-K, Eldredge Z, Bienias P, Devulapalli D, Su Y, Childs AM, Gorshkov AV (2022) Implementing a fast unbounded quantum fanout gate using power-law interactions. Phys Rev Res 4(4):042016","journal-title":"Phys Rev Res"},{"key":"368_CR45","unstructured":"Hadfield-Menell D, Russell SJ, Abbeel P, Dragan A (2016) Cooperative inverse reinforcement learning. Adv Neural Inf Process Syst 29"},{"key":"368_CR46","unstructured":"Hann CT (2021) Practicality of quantum random access memory. PhD thesis, Yale University"},{"issue":"15","key":"368_CR47","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.103.150502","volume":"103","author":"AW Harrow","year":"2009","unstructured":"Harrow AW, Hassidim A, Lloyd S (2009) Quantum algorithm for linear systems of equations. Phys Rev Lett 103(15):150502","journal-title":"Phys Rev Lett"},{"key":"368_CR48","unstructured":"He J, Zhou D, Gu Q (2021) Logarithmic regret for reinforcement learning with linear function approximation. In: International conference on machine learning. PMLR, pp 4171\u20134180"},{"key":"368_CR49","unstructured":"Jaques S, Rattew AG (2023) Qram: A survey and critique. arXiv:2305.10310"},{"key":"368_CR50","unstructured":"Jerbi S, Cornelissen A, Ozols M, Dunjko V (2022) Quantum policy gradient algorithms. Conference on the theory of quantum computation, communication and cryptography (TQC)"},{"issue":"3","key":"368_CR51","doi-asserted-by":"publisher","first-page":"1496","DOI":"10.1287\/moor.2022.1309","volume":"48","author":"C Jin","year":"2023","unstructured":"Jin C, Yang Z, Wang Z, Jordan MI (2023) Provably efficient reinforcement learning with linear function approximation. Math Oper Res 48(3):1496\u20131521","journal-title":"Math Oper Res"},{"issue":"6245","key":"368_CR52","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1126\/science.aaa8415","volume":"349","author":"MI Jordan","year":"2015","unstructured":"Jordan MI, Mitchell TM (2015) Machine learning: Trends, perspectives, and prospects. Science 349(6245):255\u2013260","journal-title":"Science"},{"key":"368_CR53","unstructured":"Kakade SM (2003) On the sample complexity of reinforcement learning. PhD thesis, University of London, University College London (United Kingdom)"},{"key":"368_CR54","unstructured":"Kara AD, Yuksel S (2023) Q-learning for continuous state and action mdps under average cost criteria. arXiv:2308.07591"},{"key":"368_CR55","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1023\/A:1017932429737","volume":"49","author":"M Kearns","year":"2002","unstructured":"Kearns M, Mansour Y, Ng AY (2002) A sparse sampling algorithm for near-optimal planning in large markov decision processes. Mach Learn 49:193\u2013208","journal-title":"Mach Learn"},{"key":"368_CR56","unstructured":"Kearns M, Singh S (1998) Finite-sample convergence rates for q-learning and indirect algorithms. Adv Neural Inf Process Syst 11"},{"key":"368_CR57","unstructured":"Kerenidis I, Landman J, Luongo A, Prakash A (2019) q-means: A quantum algorithm for unsupervised machine learning. Adv Neural Inf Process Syst 32"},{"key":"368_CR58","unstructured":"Kerenidis I, Prakash A (2016) Quantum recommendation systems. arXiv:1603.08675"},{"issue":"11","key":"368_CR59","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: A survey. Int J Robot Res 32(11):1238\u20131274","journal-title":"Int J Robot Res"},{"key":"368_CR60","unstructured":"Kolter J, Abbeel P, Ng A (2007) Hierarchical apprenticeship learning with application to quadruped locomotion. Adv Neural Inf Process Syst 20"},{"issue":"3","key":"368_CR61","doi-asserted-by":"publisher","first-page":"122","DOI":"10.3390\/robotics2030122","volume":"2","author":"P Kormushev","year":"2013","unstructured":"Kormushev P, Calinon S, Caldwell DG (2013) Reinforcement learning in robotics: Applications and real-world challenges. Robotics 2(3):122\u2013148","journal-title":"Robotics"},{"key":"368_CR62","first-page":"1107","volume":"4","author":"MG Lagoudakis","year":"2003","unstructured":"Lagoudakis MG, Parr R (2003) Least-squares policy iteration. J Mach Learn Res 4:1107\u20131149","journal-title":"J Mach Learn Res"},{"issue":"4","key":"368_CR63","doi-asserted-by":"publisher","first-page":"2733","DOI":"10.1007\/s10462-021-10061-9","volume":"55","author":"N Le","year":"2022","unstructured":"Le N, Rathour VS, Yamazaki K, Luu K, Savvides M (2022) Deep reinforcement learning in computer vision: a comprehensive survey. Artif Intell Rev 55(4):2733\u20132819","journal-title":"Artif Intell Rev"},{"key":"368_CR64","unstructured":"Levine S, Popovic Z, Koltun V (2011) Nonlinear inverse reinforcement learning with gaussian processes. Adv Neural Inf Process Syst 24"},{"key":"368_CR65","first-page":"12861","volume":"33","author":"G Li","year":"2020","unstructured":"Li G, Wei Y, Chi Y, Gu Y, Chen Y (2020) Breaking the sample size barrier in model-based reinforcement learning with a generative model. Adv Neural Inf Process Syst 33:12861\u201312872","journal-title":"Adv Neural Inf Process Syst"},{"key":"368_CR66","unstructured":"Li T, Chakrabarti S, Wu X (2019) Sublinear quantum algorithms for training linear and kernel-based classifiers. In: International conference on machine learning. PMLR, pp 3815\u20133824"},{"issue":"9","key":"368_CR67","doi-asserted-by":"publisher","first-page":"631","DOI":"10.1038\/nphys3029","volume":"10","author":"S Lloyd","year":"2014","unstructured":"Lloyd S, Mohseni M, Rebentrost P (2014) Quantum principal component analysis. Nat Phys 10(9):631\u2013633","journal-title":"Nat Phys"},{"key":"368_CR68","doi-asserted-by":"publisher","first-page":"163","DOI":"10.22331\/q-2019-07-12-163","volume":"3","author":"GH Low","year":"2019","unstructured":"Low GH, Chuang IL (2019) Hamiltonian simulation by qubitization. Quantum 3:163","journal-title":"Quantum"},{"issue":"4","key":"368_CR69","doi-asserted-by":"publisher","first-page":"3133","DOI":"10.1109\/COMST.2019.2916583","volume":"21","author":"NC Luong","year":"2019","unstructured":"Luong NC, Hoang DT, Gong S, Niyato D, Wang P, Liang Y-C, Kim DI (2019) Applications of deep reinforcement learning in communications and networking: A survey. IEEE Commun Surv Tutor 21(4):3133\u20133174","journal-title":"IEEE Commun Surv Tutor"},{"key":"368_CR70","unstructured":"Luongo A (2022) Quantum algorithms for data analysis"},{"key":"368_CR71","doi-asserted-by":"crossref","unstructured":"Melo FS, Ribeiro MI (2007) Q-learning with linear function approximation. In: International conference on computational learning theory. Springer, pp 308\u2013322","DOI":"10.1007\/978-3-540-72927-3_23"},{"key":"368_CR72","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1016\/j.inffus.2019.12.001","volume":"57","author":"T Meng","year":"2020","unstructured":"Meng T, Jing X, Yan Z, Pedrycz W (2020) A survey on machine learning for data fusion. Inf Fusion 57:115\u2013129","journal-title":"Inf Fusion"},{"key":"368_CR73","unstructured":"Meyer N, Ufrecht C, Periyasamy M, Scherer DD, Plinge A, Mutschler C (2022) A survey on quantum reinforcement learning. arXiv:2211.03464"},{"issue":"2181","key":"368_CR74","first-page":"20150301","volume":"471","author":"A Montanaro","year":"2015","unstructured":"Montanaro A (2015) Quantum speedup of monte carlo methods. Proc R Soc A Math Phys Eng Sci 471(2181):20150301","journal-title":"Proc R Soc A Math Phys Eng Sci"},{"key":"368_CR75","unstructured":"Muhammad I, Yan Z (2015) Supervised machine learning approaches: A survey. ICTACT J Soft Comput 5(3)"},{"key":"368_CR76","first-page":"10407","volume":"34","author":"G Neu","year":"2021","unstructured":"Neu G, Olkhovskaya J (2021) Online learning in mdps with linear function approximation and bandit feedback. Adv Neural Inf Process Syst 34:10407\u201310417","journal-title":"Adv Neural Inf Process Syst"},{"key":"368_CR77","unstructured":"Ng AY, Russell S (2000) Algorithms for inverse reinforcement learning. In: Proceedings of The 17th international conference on machine learning, pp 663\u2013670"},{"key":"368_CR78","unstructured":"Osband I, Van Roy B, Wen Z (2016) Generalization and exploration via randomized value functions. In: International conference on machine learning. PMLR, pp 2377\u20132386"},{"issue":"17","key":"368_CR79","doi-asserted-by":"publisher","first-page":"7462","DOI":"10.3390\/s23177462","volume":"23","author":"K Phalak","year":"2023","unstructured":"Phalak K, Chatterjee A, Ghosh S (2023) Quantum random access memory for dummies. Sensors 23(17):7462","journal-title":"Sensors"},{"key":"368_CR80","unstructured":"Pineau J, Gordon G, Thrun S (2003) Point-based value iteration: An anytime algorithm for pomdps. In: Proceedings of the 18th international joint conference on artificial intelligence, vol 3, pp 1025\u20131032"},{"key":"368_CR81","unstructured":"Prakash A (2014) Quantum algorithms for linear algebra and machine learning. PhD thesis, University of California, Berkeley"},{"key":"368_CR82","unstructured":"Puterman ML (2014) Markov decision processes: discrete stochastic dynamic programming"},{"issue":"11","key":"368_CR83","doi-asserted-by":"publisher","first-page":"1127","DOI":"10.1287\/mnsc.24.11.1127","volume":"24","author":"ML Puterman","year":"1978","unstructured":"Puterman ML, Shin MC (1978) Modified policy iteration algorithms for discounted markov decision problems. Manage Sci 24(11):1127\u20131137","journal-title":"Manage Sci"},{"key":"368_CR84","first-page":"1","volume":"2016","author":"J Qiu","year":"2016","unstructured":"Qiu J, Wu Q, Ding G, Xu Y, Feng S (2016) A survey of machine learning for big data processing. EURASIP J Adv Signal Process 2016:1\u201316","journal-title":"EURASIP J Adv Signal Process"},{"key":"368_CR85","unstructured":"Ramachandran D, Amir E (2007) Bayesian inverse reinforcement learning. In: Proceedings of the 20th international joint conference on artifical intelligence, vol 7, pp 2586\u20132591"},{"issue":"13","key":"368_CR86","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.113.130503","volume":"113","author":"P Rebentrost","year":"2014","unstructured":"Rebentrost P, Mohseni M, Lloyd S (2014) Quantum support vector machine for big data classification. Phys Rev Lett 113(13):130503","journal-title":"Phys Rev Lett"},{"issue":"4","key":"368_CR87","doi-asserted-by":"publisher","first-page":"945","DOI":"10.1287\/moor.2016.0832","volume":"42","author":"N Saldi","year":"2017","unstructured":"Saldi N, Y\u00fcksel S, Linder T (2017) On the asymptotic optimality of finite approximations to markov decision processes with borel spaces. Math Oper Res 42(4):945\u2013978","journal-title":"Math Oper Res"},{"key":"368_CR88","doi-asserted-by":"crossref","unstructured":"Sen PC, Hajra M, Ghosh M (2020) Supervised classification algorithms in machine learning: A survey and review. In: Emerging Technology in Modelling and Graphics: Proceedings of IEM Graph 2018. Springer, pp 99\u2013111","DOI":"10.1007\/978-981-13-7403-6_11"},{"key":"368_CR89","doi-asserted-by":"crossref","unstructured":"Shor PW (1994) Algorithms for quantum computation: discrete logarithms and factoring. In: Proceedings 35th annual symposium on foundations of computer science. IEEE, pp 124\u2013134","DOI":"10.1109\/SFCS.1994.365700"},{"key":"368_CR90","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller M (2014) Deterministic policy gradient algorithms. In: International conference on machine learning. PMLR, pp 387\u2013395"},{"key":"368_CR91","unstructured":"Valko M, Ghavamzadeh M, Lazaric A (2013) Semi-supervised apprenticeship learning. In: European workshop on reinforcement learning. PMLR, pp 131\u2013142"},{"issue":"6","key":"368_CR92","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3214306","volume":"51","author":"P Wang","year":"2019","unstructured":"Wang P, Li Y, Reddy CK (2019) Machine learning for survival analysis: A survey. ACM Comput Surv 51(6):1\u201336. https:\/\/doi.org\/10.1145\/3214306","journal-title":"ACM Comput Surv"},{"key":"368_CR93","first-page":"17816","volume":"33","author":"R Wang","year":"2020","unstructured":"Wang R, Du SS, Yang L, Salakhutdinov RR (2020) On reward-free reinforcement learning with linear function approximation. Adv Neural Inf Process Syst 33:17816\u201317826","journal-title":"Adv Neural Inf Process Syst"},{"key":"368_CR94","unstructured":"Wang D, Sundaram A, Kothari R, Kapoor A, Roetteler M (2021) Quantum algorithms for reinforcement learning with a generative model. In: International conference on machine learning. PMLR, pp 10916\u201310926"},{"key":"368_CR95","unstructured":"Wei C-Y, Jahromi MJ, Luo H, Jain R (2021) Learning infinite-horizon average-reward mdps with linear function approximation. In: International conference on artificial intelligence and statistics. PMLR, pp 3007\u20133015"},{"key":"368_CR96","unstructured":"Wiedemann S, Hein D, Udluft S, Mendl C (2022) Quantum policy iteration via amplitude estimation and grover search\u2013towards quantum advantage for reinforcement learning. arXiv:2206.04741"},{"issue":"1","key":"368_CR97","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.ejor.2014.08.003","volume":"241","author":"S Woerner","year":"2015","unstructured":"Woerner S, Laumanns M, Zenklusen R, Fertis A (2015) Approximate dynamic programming for stochastic linear control problems on compact state spaces. Eur J Oper Res 241(1):85\u201398","journal-title":"Eur J Oper Res"},{"key":"368_CR98","unstructured":"Yang L, Wang M (2019) Sample-optimal parametric q-learning using linearly additive features. In: International conference on machine llearning. PMLR, pp 6995\u20137004"},{"issue":"1","key":"368_CR99","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3477600","volume":"55","author":"C Yu","year":"2021","unstructured":"Yu C, Liu J, Nemati S, Yin G (2021) Reinforcement learning in healthcare: A survey. ACM Comput Surv (CSUR) 55(1):1\u201336","journal-title":"ACM Comput Surv (CSUR)"},{"issue":"1","key":"368_CR100","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1002\/que2.34","volume":"2","author":"Y Zhang","year":"2020","unstructured":"Zhang Y, Ni Q (2020) Recent advances in quantum machine learning. Quantum Eng 2(1):34","journal-title":"Quantum Eng"},{"issue":"3","key":"368_CR101","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1108\/17563781211255862","volume":"5","author":"S Zhifei","year":"2012","unstructured":"Zhifei S, Joo EM (2012) A survey of inverse reinforcement learning techniques. Int J Intell Comput Cybern 5(3):293\u2013311","journal-title":"Int J Intell Comput Cybern"},{"key":"368_CR102","doi-asserted-by":"crossref","unstructured":"Zhifei S, Joo EM (2012) A review of inverse reinforcement learning theory and recent advances. In: 2012 IEEE congress on evolutionary computation. IEEE, pp 1\u20138","DOI":"10.1109\/CEC.2012.6256507"},{"key":"368_CR103","unstructured":"Zhong H, Hu J, Xue Y, Li T, Wang L (2024) Provably efficient exploration in quantum reinforcement learning with logarithmic worst-case regret. In: Proceedings of the 41st international conference on machine learning. PMLR, vol 235, pp 61681\u201361707"},{"key":"368_CR104","unstructured":"Ziebart BD, Maas AL, Bagnell JA, Dey AK (2008) Maximum entropy inverse reinforcement learning. In: Proceedings of the 23rd AAAI conference on artificial intelligence. Chicago, IL, USA, vol 8, pp 1433\u20131438"}],"container-title":["Quantum Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42484-026-00368-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42484-026-00368-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42484-026-00368-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T12:04:43Z","timestamp":1771502683000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42484-026-00368-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,19]]},"references-count":104,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["368"],"URL":"https:\/\/doi.org\/10.1007\/s42484-026-00368-7","relation":{},"ISSN":["2524-4906","2524-4914"],"issn-type":[{"value":"2524-4906","type":"print"},{"value":"2524-4914","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,19]]},"assertion":[{"value":"29 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"16"}}