{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T23:02:31Z","timestamp":1778626951096,"version":"3.51.4"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100012818","name":"Comunidad de Madrid","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100012818","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100031478","name":"NextGenerationEU","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100031478","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Ministry for Digital Transformation and of Civil Service of the Spanish Government"},{"DOI":"10.13039\/100000183","name":"U.S. Army Research Office","doi-asserted-by":"crossref","id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Quantum Mach. Intell."],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s42484-026-00359-8","type":"journal-article","created":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T13:56:55Z","timestamp":1770127015000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Hybrid reward-driven reinforcement learning for efficient quantum circuit synthesis"],"prefix":"10.1007","volume":"8","author":[{"given":"Sara","family":"Giordano","sequence":"first","affiliation":[]},{"given":"Kornikar","family":"Sen","sequence":"additional","affiliation":[]},{"given":"Miguel A.","family":"Martin-Delgado","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,3]]},"reference":[{"issue":"4","key":"359_CR1","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/JHEP04(2019)087","volume":"2019","author":"T Ali","year":"2019","unstructured":"Ali T, Bhattacharyya A, Haque SS, Kim EH, Moynihan N (2019) Time evolution of complexity: a critique of three methods. J High Energy Phys 2019(4):87. https:\/\/doi.org\/10.1007\/JHEP04(2019)087","journal-title":"J High Energy Phys"},{"issue":"6","key":"359_CR2","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1109\/TCAD.2013.2244643","volume":"32","author":"M Amy","year":"2013","unstructured":"Amy M, Maslov D, Mosca M, Roetteler M (2013) A meet-in-the-middle algorithm for fast synthesis of depth-optimal quantum circuits. IEEE Trans Comput Aided Des Integr Circuits Syst 32(6):818\u2013830. https:\/\/doi.org\/10.1109\/TCAD.2013.2244643","journal-title":"IEEE Trans Comput Aided Des Integr Circuits Syst"},{"key":"359_CR3","doi-asserted-by":"publisher","first-page":"183","DOI":"10.22331\/q-2019-09-02-183","volume":"3","author":"P Andreasson","year":"2019","unstructured":"Andreasson P, Johansson J, Liljestrand S, Granath M (2019) Quantum error correction for the toric code using deep reinforcement learning. Quantum 3:183. https:\/\/doi.org\/10.22331\/q-2019-09-02-183","journal-title":"Quantum"},{"key":"359_CR4","unstructured":"Blundell C, Uria B, Pritzel A, Li Y, Ruderman A, Leibo J, Rae J, Wierstra D, Hassabis D (2016) Model-free episodic control. arXiv. [stat.ML]. Preprint at arXiv:1606.04460"},{"key":"359_CR5","doi-asserted-by":"publisher","first-page":"030316","DOI":"10.1103\/PRXQuantum.2.030316","volume":"2","author":"FGSL Brand\u00e3o","year":"2021","unstructured":"Brand\u00e3o FGSL, Chemissany W, Hunter-Jones N, Kueng R, Preskill J (2021) Models of quantum complexity growth. PRX Quantum 2:030316. https:\/\/doi.org\/10.1103\/PRXQuantum.2.030316","journal-title":"PRX Quantum"},{"key":"359_CR6","doi-asserted-by":"publisher","first-page":"031086","DOI":"10.1103\/PhysRevX.8.031086","volume":"8","author":"M Bukov","year":"2018","unstructured":"Bukov M, Day AGR, Sels D, Weinberg P, Polkovnikov A, Mehta P (2018) Reinforcement learning in different phases of quantum control. Phys Rev X 8:031086. https:\/\/doi.org\/10.1103\/PhysRevX.8.031086","journal-title":"Phys Rev X"},{"key":"359_CR7","doi-asserted-by":"publisher","first-page":"042314","DOI":"10.1103\/PhysRevA.83.042314","volume":"83","author":"A Cabello","year":"2011","unstructured":"Cabello A, Danielsen LE, L\u00f3pez-Tarrida AJ, Portillo JR (2011) Optimal preparation of graph states. Phys Rev A 83:042314. https:\/\/doi.org\/10.1103\/PhysRevA.83.042314","journal-title":"Phys Rev A"},{"key":"359_CR8","doi-asserted-by":"publisher","first-page":"195125","DOI":"10.1103\/PhysRevB.106.195125","volume":"106","author":"P Caputa","year":"2022","unstructured":"Caputa P, Liu S (2022) Quantum complexity and topological phases of matter. Phys Rev B 106:195125. https:\/\/doi.org\/10.1103\/PhysRevB.106.195125","journal-title":"Phys Rev B"},{"issue":"4","key":"359_CR9","doi-asserted-by":"publisher","first-page":"045002","DOI":"10.1088\/2058-9565\/ad420a","volume":"9","author":"Q Chen","year":"2024","unstructured":"Chen Q, Du Y, Jiao Y, Lu X, Wu X, Zhao Q (2024) Efficient and practical quantum compiler towards multi-qubit systems with deep reinforcement learning. Quantum Sci Technol 9(4):045002. https:\/\/doi.org\/10.1088\/2058-9565\/ad420a","journal-title":"Quantum Sci Technol"},{"key":"359_CR10","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1038\/ncomms1147","volume":"1","author":"M Cramer","year":"2010","unstructured":"Cramer M, Plenio MB, Flammia ST, Somma RD, Gross D, Bartlett SD, Landon-Cardinal O, Poulin D, Liu Y-K (2010) Efficient quantum state tomography. Nat Commun 1:149. https:\/\/doi.org\/10.1038\/ncomms1147","journal-title":"Nat Commun"},{"key":"359_CR11","doi-asserted-by":"publisher","first-page":"041","DOI":"10.21468\/SciPostPhys.16.2.041","volume":"16","author":"B Craps","year":"2024","unstructured":"Craps B, Clerck MD, Evnin O, Hacker P (2024) Integrability and complexity in quantum spin chains. SciPost Phys 16:041. https:\/\/doi.org\/10.21468\/SciPostPhys.16.2.041","journal-title":"SciPost Phys"},{"key":"359_CR12","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1038\/s41534-019-0241-0","volume":"6","author":"M Dalgaard","year":"2020","unstructured":"Dalgaard M, Motzoi F, S\u00f8rensen JJWH, Sherson J (2020) Global optimization of quantum dynamics with AlphaZero deep exploration. NPJ Quantum Inf 6:6. https:\/\/doi.org\/10.1038\/s41534-019-0241-0","journal-title":"NPJ Quantum Inf"},{"key":"359_CR13","doi-asserted-by":"crossref","unstructured":"Devlin S, Kudenko D (2011) Theoretical considerations of potential-based reward shaping for multi-agent systems. In: 10th international conference on autonomous agents and multiagent systems - vol 1\u20133. Taipei, Taiwan, pp 225\u2013232","DOI":"10.65109\/VDID3904"},{"key":"359_CR14","doi-asserted-by":"publisher","first-page":"2419","DOI":"10.1007\/s10994-021-05961-4","volume":"110","author":"G Dulac-Arnold","year":"2021","unstructured":"Dulac-Arnold G, Levine N, Mankowitz D, Li J, Paduraru C, Gowal S, Hester T (2021) Challenges of real-world reinforcement learning: definitions, benchmarks and analysis. Mach Learn 110:2419\u20132468. https:\/\/doi.org\/10.1007\/s10994-021-05961-4","journal-title":"Mach Learn"},{"issue":"7","key":"359_CR15","doi-asserted-by":"publisher","first-page":"074001","DOI":"10.1088\/1361-6633\/aab406","volume":"81","author":"V Dunjko","year":"2018","unstructured":"Dunjko V, Briegel HJ (2018) Machine learning and artificial intelligence in the quantum domain: a review of recent progress. Rep Prog Phys 81(7):074001. https:\/\/doi.org\/10.1088\/1361-6633\/aab406","journal-title":"Rep Prog Phys"},{"key":"359_CR16","doi-asserted-by":"publisher","first-page":"062314","DOI":"10.1103\/PhysRevA.62.062314","volume":"62","author":"W D\u00fcr","year":"2000","unstructured":"D\u00fcr W, Vidal G, Cirac J (2000) Three qubits can be entangled in two inequivalent ways. Phys Rev A 62:062314. https:\/\/doi.org\/10.1103\/PhysRevA.62.062314","journal-title":"Phys Rev A"},{"key":"359_CR17","doi-asserted-by":"publisher","unstructured":"Elfwing S, Seymour B (2017) Parallel reward and punishment control in humans and robots: safe reinforcement learning using the MaxPain algorithm. In: 2017 joint IEEE international conference on development and learning and epigenetic robotics (ICDL-EpiRob). pp 140\u2013147. https:\/\/doi.org\/10.1109\/DEVLRN.2017.8329799","DOI":"10.1109\/DEVLRN.2017.8329799"},{"key":"359_CR18","volume-title":"Fundamentals of database systems","author":"R Elmasri","year":"2010","unstructured":"Elmasri R, Navathe S (2010) Fundamentals of database systems, 6th edn. Addison-Wesley Publishing Company, USA","edition":"6"},{"key":"359_CR19","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1103\/RevModPhys.74.347","volume":"74","author":"A Galindo","year":"2002","unstructured":"Galindo A, Mart\u00edn-Delgado M (2002) Information and computation: classical and quantum aspects. Rev Mod Phys 74:347\u2013423. https:\/\/doi.org\/10.1103\/RevModPhys.74.347","journal-title":"Rev Mod Phys"},{"key":"359_CR20","doi-asserted-by":"publisher","first-page":"043056","DOI":"10.1103\/PhysRevResearch.4.043056","volume":"4","author":"S Giordano","year":"2022","unstructured":"Giordano S, Martin-Delgado M (2022) Reinforcement-learning generation of four-qubit entangled states. Phys Rev Res 4:043056. https:\/\/doi.org\/10.1103\/PhysRevResearch.4.043056","journal-title":"Phys Rev Res"},{"issue":"9","key":"359_CR21","doi-asserted-by":"publisher","first-page":"1341","DOI":"10.1109\/TC.2011.144","volume":"61","author":"O Golubitsky","year":"2012","unstructured":"Golubitsky O, Maslov D (2012) A study of optimal 4-bit reversible Toffoli circuits and their synthesis. IEEE Trans Comput 61(9):1341\u20131353. https:\/\/doi.org\/10.1109\/TC.2011.144","journal-title":"IEEE Trans Comput"},{"key":"359_CR22","unstructured":"Gottesman D (1999) The Heisenberg representation of quantum computers. In: Group22: Proceedings of the XXII international colloquium on group theoretical methods in physics. International Press, Cambridge, MA, pp 32\u201343"},{"issue":"7","key":"359_CR23","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/JHEP07(2018)139","volume":"2018","author":"L Hackl","year":"2018","unstructured":"Hackl L, Myers RC (2018) Circuit complexity for free fermions. J High Energy Phys 2018(7):139","journal-title":"J High Energy Phys"},{"issue":"1","key":"359_CR24","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1007\/JHEP01(2022)159","volume":"2022","author":"SS Haque","year":"2022","unstructured":"Haque SS, Jana C, Underwood B (2022) Saturation of thermal complexity of purification. J High Energy Phys 2022(1):159. https:\/\/doi.org\/10.1007\/JHEP01(2022)159","journal-title":"J High Energy Phys"},{"key":"359_CR25","doi-asserted-by":"publisher","first-page":"062311","DOI":"10.1103\/PhysRevA.69.062311","volume":"69","author":"M Hein","year":"2004","unstructured":"Hein M, Eisert J, Briegel H (2004) Multiparty entanglement in graph states. Phys Rev A 69:062311. https:\/\/doi.org\/10.1103\/PhysRevA.69.062311","journal-title":"Phys Rev A"},{"key":"359_CR26","unstructured":"Hein M, D\u00fcr W, Eisert J, Raussendorf R, Nest M, Briegel H (2006) Entanglement in graph states and its applications. In: Proceedings of the international school of physics \u201cEnrico Fermi\u201d: quantum computers, algorithms and chaos. Enrico Fermi School of Physics. IOS Press, Varenna, Italy"},{"key":"359_CR27","doi-asserted-by":"publisher","unstructured":"Jakab HS, Csat\u00f3 L (2015) Sparse approximations to value functions in reinforcement learning. In: Artificial neural networks. Springer, Cham, pp 295\u2013314. https:\/\/doi.org\/10.1007\/978-3-319-09903-3_14","DOI":"10.1007\/978-3-319-09903-3_14"},{"key":"359_CR28","doi-asserted-by":"publisher","unstructured":"Jefferson RA, Myers RC (2017) Circuit complexity in quantum field theory. J High Energy Phys (10):107. https:\/\/doi.org\/10.1007\/JHEP10(2017)107","DOI":"10.1007\/JHEP10(2017)107"},{"key":"359_CR29","doi-asserted-by":"publisher","unstructured":"Kafatos M (ed) (1989) Bell\u2019s theorem, quantum theory and conceptions of the universe. Fundamental theories of physics, vol 37. Springer, Dordrecht. https:\/\/doi.org\/10.1007\/978-94-017-0849-4","DOI":"10.1007\/978-94-017-0849-4"},{"key":"359_CR30","unstructured":"Konda V, Tsitsiklis J (1999) Actor-critic algorithms. In: Advances in neural information processing systems, vol 12. MIT Press, pp 1008\u20131014"},{"key":"359_CR31","unstructured":"Kumabe S, Mori R, Yoshimura Y (2024) Complexity of graph-state preparation by Clifford circuits. arXiv. [quant-ph]. Preprint at arXiv:2402.05874"},{"key":"359_CR32","doi-asserted-by":"publisher","unstructured":"Liu S, Benchasattabuse N, Morgan DQ, Hajdu\u0161ek M, Devitt SJ, Van Meter R (2023) A substrate scheduler for compiling arbitrary fault-tolerant graph states. In: 2023 IEEE international conference on quantum computing and engineering (QCE), vol 01. pp 870\u2013880. https:\/\/doi.org\/10.1109\/QCE57702.2023.00101","DOI":"10.1109\/QCE57702.2023.00101"},{"issue":"6","key":"359_CR33","doi-asserted-by":"publisher","first-page":"1221","DOI":"10.1073\/pnas.1714936115","volume":"115","author":"AA Melnikov","year":"2018","unstructured":"Melnikov AA, Nautrup HP, Krenn M, Dunjko V, Tiersch M, Zeilinger A, Briegel HJ (2018) Active learning machine learns to create new quantum experiments. Proc Natl Acad Sci USA 115(6):1221\u20131226. https:\/\/doi.org\/10.1073\/pnas.1714936115","journal-title":"Proc Natl Acad Sci USA"},{"key":"359_CR34","doi-asserted-by":"publisher","first-page":"780","DOI":"10.1038\/s42256-023-00687-5","volume":"5","author":"F Metz","year":"2023","unstructured":"Metz F, Bukov M (2023) Self-correcting quantum many-body control using reinforcement learning with tensor networks. Nat Mach Intell 5:780\u2013791. https:\/\/doi.org\/10.1038\/s42256-023-00687-5","journal-title":"Nat Mach Intell"},{"issue":"3","key":"359_CR35","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1016\/0020-0190(92)90041-S","volume":"41","author":"J Misra","year":"1992","unstructured":"Misra J, Gries D (1992) A constructive proof of Vizing\u2019s theorem. Inf Process Lett 41(3):131\u2013133. https:\/\/doi.org\/10.1016\/0020-0190(92)90041-S","journal-title":"Inf Process Lett"},{"issue":"7540","key":"359_CR36","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"359_CR37","doi-asserted-by":"publisher","unstructured":"Moflic I, Paler A (2024) Towards faster reinforcement learning of quantum circuit optimisation: exponential reward functions. In: Proceedings of the 18th ACM international symposium on nanoscale architectures. NANOARCH \u201923. Association for Computing Machinery, New York, NY, USA. https:\/\/doi.org\/10.1145\/3611315.3633259","DOI":"10.1145\/3611315.3633259"},{"key":"359_CR38","doi-asserted-by":"publisher","first-page":"215","DOI":"10.22331\/q-2019-12-16-215","volume":"3","author":"HP Nautrup","year":"2019","unstructured":"Nautrup HP, Delfosse N, Dunjko V, Briegel HJ, Friis N (2019) Optimizing quantum error correction codes with reinforcement learning. Quantum 3:215. https:\/\/doi.org\/10.22331\/q-2019-12-16-215","journal-title":"Quantum"},{"key":"359_CR39","unstructured":"Ng A, Harada D, Russell S (1999) Policy invariance under reward transformations: theory and application to reward shaping. In: Proceedings of the sixteenth international conference on machine learning. ICML \u201999. San Francisco, CA, USA, pp 278\u2013287"},{"issue":"3","key":"359_CR40","doi-asserted-by":"publisher","first-page":"213","DOI":"10.26421\/qic6.3-2","volume":"6","author":"MA Nielsen","year":"2006","unstructured":"Nielsen MA (2006) A geometric approach to quantum circuit lower bounds. Quantum Inf Comput 6(3):213\u2013262. https:\/\/doi.org\/10.26421\/qic6.3-2","journal-title":"Quantum Inf Comput"},{"key":"359_CR41","volume-title":"Quantum computation and quantum information: 10th anniversary","author":"M Nielsen","year":"2010","unstructured":"Nielsen M, Chuang I (2010) Quantum computation and quantum information: 10th anniversary. Cambridge University Press, Cambridge"},{"issue":"5764","key":"359_CR42","doi-asserted-by":"publisher","first-page":"1133","DOI":"10.1126\/science.1121541","volume":"311","author":"MA Nielsen","year":"2006","unstructured":"Nielsen MA, Dowling MR, Gu M, Doherty AC (2006) Quantum computation as geometry. Science 311(5764):1133\u20131135. https:\/\/doi.org\/10.1126\/science.1121541","journal-title":"Science"},{"key":"359_CR43","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1038\/s41534-019-0141-3","volume":"55","author":"MY Niu","year":"2019","unstructured":"Niu MY, Boixo S, Smelyanskiy VN, Neven H (2019) Universal quantum control through deep reinforcement learning. NPJ Quantum Inf 55:33. https:\/\/doi.org\/10.1038\/s41534-019-0141-3","journal-title":"NPJ Quantum Inf"},{"key":"359_CR44","doi-asserted-by":"publisher","first-page":"022312","DOI":"10.1103\/PhysRevA.68.022312","volume":"68","author":"R Raussendorf","year":"2003","unstructured":"Raussendorf R, Browne DE, Briegel HJ (2003) Measurement-based quantum computation on cluster states. Phys Rev A 68:022312. https:\/\/doi.org\/10.1103\/PhysRevA.68.022312","journal-title":"Phys Rev A"},{"key":"359_CR45","doi-asserted-by":"publisher","unstructured":"Rietsch S, Dubey AY, Ufrecht C, Periyasamy M, Plinge A, Mutschler C, Scherer DD (2024) Unitary synthesis of Clifford+T circuits with reinforcement learning. In: 2024 IEEE international conference on Quantum Computing and Engineering (QCE), vol 01. Montreal, QC, Canada, pp 824\u2013835. https:\/\/doi.org\/10.1109\/QCE60285.2024.00102","DOI":"10.1109\/QCE60285.2024.00102"},{"issue":"1","key":"359_CR46","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"D Roijers","year":"2013","unstructured":"Roijers D, Vamplew P, Whiteson S, Dazeley R (2013) A survey of multi-objective sequential decision-making. J Artif Intell Res 48(1):67\u2013113. https:\/\/doi.org\/10.1613\/jair.3987","journal-title":"J Artif Intell Res"},{"key":"359_CR47","unstructured":"Rummery G, Niranjan M (1994) On-line q-learning using connectionist systems. In: CTIT technical reports series. Cambridge University Engineering Department, UK, vol 37. https:\/\/ci.nii.ac.jp\/naid\/10013482118"},{"key":"359_CR48","volume-title":"Artificial intelligence: a modern approach","author":"S Russell","year":"2009","unstructured":"Russell S, Norvig P (2009) Artificial intelligence: a modern approach, 3rd edn. Prentice Hall Press, USA","edition":"3"},{"key":"359_CR49","doi-asserted-by":"publisher","first-page":"012308","DOI":"10.1103\/PhysRevA.65.012308","volume":"65","author":"D Schlingemann","year":"2001","unstructured":"Schlingemann D, Werner R (2001) Quantum error-correcting codes associated with graphs. Phys Rev A 65:012308. https:\/\/doi.org\/10.1103\/PhysRevA.65.012308","journal-title":"Phys Rev A"},{"key":"359_CR50","doi-asserted-by":"publisher","first-page":"76388","DOI":"10.1109\/ACCESS.2023.3296802","volume":"11","author":"P Selig","year":"2023","unstructured":"Selig P, Murphy N, Redmond D, Caton S (2023) DeepQPrep: neural network augmented search for quantum state preparation. IEEE Access 11:76388\u201376402. https:\/\/doi.org\/10.1109\/ACCESS.2023.3296802","journal-title":"IEEE Access"},{"key":"359_CR51","doi-asserted-by":"publisher","first-page":"107906","DOI":"10.1016\/j.future.2025.107906","volume":"173","author":"W Sun","year":"2025","unstructured":"Sun W, Li X, Yu L, Wang Z, Chen G, Yang G (2025) MLQM: machine learning approach for accelerating optimal qubit mapping. Futur Gener Comput Syst 173:107906. https:\/\/doi.org\/10.1016\/j.future.2025.107906","journal-title":"Futur Gener Comput Syst"},{"key":"359_CR52","doi-asserted-by":"publisher","unstructured":"Sutton R (1990) Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Machine learning proceedings 1990, San Francisco (CA). pp 216\u2013224. https:\/\/doi.org\/10.1016\/B978-1-55860-141-3.50030-4","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"issue":"4","key":"359_CR53","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1145\/122344.122377","volume":"2","author":"R Sutton","year":"1991","unstructured":"Sutton R (1991) Dyna, an integrated architecture for learning, planning, and reacting. SIGART Bull 2(4):160\u2013163. https:\/\/doi.org\/10.1145\/122344.122377","journal-title":"SIGART Bull"},{"key":"359_CR54","volume-title":"Reinforcement learning: an introduction","author":"R Sutton","year":"2018","unstructured":"Sutton R, Barto A (2018) Reinforcement learning: an introduction. A Bradford Book, Cambridge, MA, USA"},{"key":"359_CR55","unstructured":"Sutton R, McAllester D, Singh S, Mansour Y (1999) Policy gradient methods for reinforcement learning with function approximation. In: Advances in neural information processing systems. NIPS\u201999, vol 12. MIT Press, Cambridge, MA, USA. pp 1057\u20131063"},{"key":"359_CR56","unstructured":"Szepesvari C (2010) Algorithms for reinforcement learning. Morgan and Claypool Publishers, San Rafael, California, USA"},{"issue":"3","key":"359_CR57","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"C Watkins","year":"1992","unstructured":"Watkins C, Dayan P (1992) Q-learning. Mach Learn 8(3):279\u2013292. https:\/\/doi.org\/10.1007\/BF00992698","journal-title":"Mach Learn"},{"key":"359_CR58","doi-asserted-by":"publisher","unstructured":"Watrous J (2009) In: Meyers RA (ed) Quantum computational complexity. Springer, New York, NY, pp 7174\u20137201. https:\/\/doi.org\/10.1007\/978-0-387-30440-3_428","DOI":"10.1007\/978-0-387-30440-3_428"},{"key":"359_CR59","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3","volume-title":"Reinforcement learning: state-of-the-art","author":"M Wiering","year":"2012","unstructured":"Wiering M, Otterlo M (2012) Reinforcement learning: state-of-the-art. Springer, Heidelberg, Germany"},{"key":"359_CR60","doi-asserted-by":"publisher","first-page":"122420","DOI":"10.1016\/j.ins.2025.122420","volume":"718","author":"L-H Yu","year":"2025","unstructured":"Yu L-H, Li X-Y, Cheng G, Zhu Q-S, Li H, Yang G-W (2025) Knowledge-driven quantum architecture search through filtering and focusing. Inf Sci 718:122420. https:\/\/doi.org\/10.1016\/j.ins.2025.122420","journal-title":"Inf Sci"}],"updated-by":[{"DOI":"10.1007\/s42484-026-00378-5","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T00:00:00Z","timestamp":1773619200000}}],"container-title":["Quantum Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42484-026-00359-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42484-026-00359-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42484-026-00359-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T13:59:10Z","timestamp":1773669550000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42484-026-00359-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,3]]},"references-count":60,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["359"],"URL":"https:\/\/doi.org\/10.1007\/s42484-026-00359-8","relation":{},"ISSN":["2524-4906","2524-4914"],"issn-type":[{"value":"2524-4906","type":"print"},{"value":"2524-4914","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,3]]},"assertion":[{"value":"27 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 March 2026","order":5,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Update","order":6,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The original online version of this article was revised: In the published version of this article, Figure 7 was incomplete. The correct Figure 7, containing both the top and bottom panels, has now been included.","order":7,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 March 2026","order":8,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Correction","order":9,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"A Correction to this paper has been published:","order":10,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"https:\/\/doi.org\/10.1007\/s42484-026-00378-5","URL":"https:\/\/doi.org\/10.1007\/s42484-026-00378-5","order":11,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"9"}}