{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T02:01:42Z","timestamp":1780020102043,"version":"3.53.1"},"reference-count":84,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100004410","name":"T\u00fcrkiye Bilimsel ve Teknolojik Ara\u015ft\u0131rma Kurumu","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004410","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.knosys.2026.116178","type":"journal-article","created":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T11:19:07Z","timestamp":1778757547000},"page":"116178","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Subgoal identification with multiple instance learning methods in landmark Partially Observable Markov Decision Process problems"],"prefix":"10.1016","volume":"346","author":[{"given":"Saim","family":"Sunel","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Faruk","family":"Polat","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"issue":"1","key":"10.1016\/j.knosys.2026.116178_b1","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","article-title":"Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning","volume":"112","author":"Sutton","year":"1999","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.knosys.2026.116178_b2","series-title":"Proceedings of the Eighteenth International Conference on Machine Learning","first-page":"361","article-title":"Automatic discovery of subgoals in reinforcement learning using diverse density","author":"McGovern","year":"2001"},{"key":"10.1016\/j.knosys.2026.116178_b3","series-title":"13th European Conference on Machine Learning Proceedings","first-page":"295","article-title":"Q-cut-dynamic discovery of sub-goals in reinforcement learning","volume":"vol. 14","author":"Menache","year":"2002"},{"issue":"2","key":"10.1016\/j.knosys.2026.116178_b4","doi-asserted-by":"crossref","DOI":"10.1145\/3643852","article-title":"Faster MIL-based subgoal identification for reinforcement learning by tuning fewer hyperparameters","volume":"19","author":"Sunel","year":"2024","journal-title":"ACM Trans. Auton. Adapt. Syst."},{"key":"10.1016\/j.knosys.2026.116178_b5","unstructured":"O. Simsek, A. Barto, Skill characterization based on betweenness, in: Advances in Neural Information Processing Systems 21 - Proceedings of the 2008 Conference, 2009, pp. 1497\u20131504."},{"key":"10.1016\/j.knosys.2026.116178_b6","series-title":"Proceedings of the 22nd International Conference on Machine Learning","first-page":"816","article-title":"Identifying useful subgoals in reinforcement learning by local graph partitioning","author":"\u015eim\u015fek","year":"2005"},{"key":"10.1016\/j.knosys.2026.116178_b7","series-title":"Proceedings of the 13th European Conference on Machine Learning","first-page":"295","article-title":"Q-cut - Dynamic discovery of sub-goals in reinforcement learning","author":"Menache","year":"2002"},{"key":"10.1016\/j.knosys.2026.116178_b8","doi-asserted-by":"crossref","unstructured":"S. Kazemitabar, H. Beigy, Automatic discovery of subgoals in reinforcement learning using strongly connected components. 5506 (2008) 829\u2013834. http:\/\/dx.doi.org\/10.1007\/978-3-642-02490-0_101.","DOI":"10.1007\/978-3-642-02490-0_101"},{"key":"10.1016\/j.knosys.2026.116178_b9","series-title":"Using relative novelty to identify useful temporal abstractions in reinforcement learning","author":"Simsek","year":"2004"},{"key":"10.1016\/j.knosys.2026.116178_b10","article-title":"Improved automatic discovery of subgoals for options in hierarchical reinforcement learning","author":"Kretchmar","year":"2003","journal-title":"J. Comput. Sci. Technol. - JCST"},{"key":"10.1016\/j.knosys.2026.116178_b11","series-title":"2017 IEEE 29th International Conference on Tools with Artificial Intelligence","first-page":"1","article-title":"A concept filtering approach for diverse density to discover subgoals in reinforcement learning","author":"Demir","year":"2017"},{"issue":"1","key":"10.1016\/j.knosys.2026.116178_b12","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1023\/A:1022619109594","article-title":"Learning to perceive and act by trial and error","volume":"7","author":"Whitehead","year":"1991","journal-title":"Mach. Learn."},{"key":"10.1016\/j.knosys.2026.116178_b13","series-title":"SICE Annual Conference 2007","first-page":"2601","article-title":"Reinforcement learning in non-markovian environments using automatic discovery of subgoals","author":"Komeda","year":"2007"},{"key":"10.1016\/j.knosys.2026.116178_b14","series-title":"Advances in Neural Information Processing Systems","article-title":"A framework for multiple-instance learning","volume":"Vol. 10","author":"Maron","year":"1998"},{"key":"10.1016\/j.knosys.2026.116178_b15","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1016\/j.patcog.2017.08.026","article-title":"Revisiting multiple instance neural networks","volume":"74","author":"Wang","year":"2018","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.knosys.2026.116178_b16","unstructured":"M. James, S. Singh, SarsaLandmark: An algorithm for learning in POMDPs with landmarks. 1 (2009) 585\u2013591. http:\/\/dx.doi.org\/10.1145\/1558013.1558094."},{"key":"10.1016\/j.knosys.2026.116178_b17","doi-asserted-by":"crossref","first-page":"49089","DOI":"10.1109\/ACCESS.2018.2854283","article-title":"A deep hierarchical reinforcement learning algorithm in partially observable Markov decision processes","volume":"6","author":"Le","year":"2018","journal-title":"IEEE Access"},{"key":"10.1016\/j.knosys.2026.116178_b18","series-title":"Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence","article-title":"Reinforcement learning in POMDPs with memoryless options and option-observation initiation sets","author":"Steckelmacher","year":"2018"},{"key":"10.1016\/j.knosys.2026.116178_b19","series-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"1998"},{"issue":"1","key":"10.1016\/j.knosys.2026.116178_b20","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1016\/0022-247X(65)90154-X","article-title":"Optimal control of Markov processes with incomplete state information","volume":"10","author":"\u00c5str\u00f6m","year":"1965","journal-title":"J. Math. Anal. Appl."},{"issue":"1","key":"10.1016\/j.knosys.2026.116178_b21","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1016\/S0004-3702(96)00034-3","article-title":"Solving the multiple instance problem with axis-parallel rectangles","volume":"89","author":"Dietterich","year":"1997","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.knosys.2026.116178_b22","series-title":"Multiple instance learning. Foundations and algorithms","author":"Herrera","year":"2016"},{"key":"10.1016\/j.knosys.2026.116178_b23","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","article-title":"Hierarchical reinforcement learning with the MAXQ value function decomposition","volume":"13","author":"Dietterich","year":"2000","journal-title":"J. Artificial Intelligence Res."},{"issue":"5","key":"10.1016\/j.knosys.2026.116178_b24","doi-asserted-by":"crossref","DOI":"10.1145\/3453160","article-title":"Hierarchical reinforcement learning: A comprehensive survey","volume":"54","author":"Pateria","year":"2021","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.knosys.2026.116178_b25","doi-asserted-by":"crossref","unstructured":"P.-L. Bacon, J. Harb, D. Precup, The option-critic architecture, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 31, 2017.","DOI":"10.1609\/aaai.v31i1.10916"},{"issue":"2","key":"10.1016\/j.knosys.2026.116178_b26","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1177\/105971239700600202","article-title":"HQ-learning","volume":"6","author":"Wiering","year":"1997","journal-title":"Adapt. Behav."},{"key":"10.1016\/j.knosys.2026.116178_b27","unstructured":"N. Hernandez-Gardiol, S. Mahadevan, Hierarchical Memory-Based Reinforcement Learning, in: T. Leen, T. Dietterich, V. Tresp (Eds.), Advances in Neural Information Processing Systems, Vol. 13, MIT Press, URL https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2000\/file\/85f007f8c50dd25f5a45fca73cad64bd-Paper.pdf."},{"key":"10.1016\/j.knosys.2026.116178_b28","series-title":"Advances in Neural Information Processing Systems","article-title":"Approximate planning in POMDPs with macro-actions","volume":"Vol. 16","author":"Theocharous","year":"2003"},{"key":"10.1016\/j.knosys.2026.116178_b29","series-title":"2006 IEEE International Conference on Systems, Man and Cybernetics","first-page":"4813","article-title":"An acquiring method of macro-actions in reinforcement learning","volume":"Vol. 6","author":"Yoshikawa","year":"2006"},{"key":"10.1016\/j.knosys.2026.116178_b30","article-title":"Automated hierarchy discovery for planning in partially observable environments","volume":"19","author":"Charlin","year":"2006","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.116178_b31","series-title":"Tractable Planning Under Uncertainty: Exploiting Structure","author":"Pineau","year":"2004"},{"key":"10.1016\/j.knosys.2026.116178_b32","doi-asserted-by":"crossref","unstructured":"R. He, E. Brunskill, N. Roy, PUMA: Planning under uncertainty with macro-actions, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 24, 2010, pp. 1089\u20131095.","DOI":"10.1609\/aaai.v24i1.7749"},{"key":"10.1016\/j.knosys.2026.116178_b33","series-title":"2012 IEEE\/WIC\/ACM International Conferences on Web Intelligence and Intelligent Agent Technology","first-page":"348","article-title":"Abstraction in model based partially observable reinforcement learning using extended sequence trees","volume":"Vol. 2","author":"\u00c7ilden","year":"2012"},{"key":"10.1016\/j.knosys.2026.116178_b34","series-title":"2013 IEEE 25th International Conference on Tools with Artificial Intelligence","first-page":"719","article-title":"Generating memoryless policies faster using automatic temporal abstractions for reinforcement learning with hidden state","author":"\u00c7ilden","year":"2013"},{"key":"10.1016\/j.knosys.2026.116178_b35","article-title":"Learning reward machines for partially observable reinforcement learning","volume":"32","author":"Toro Icarte","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.116178_b36","doi-asserted-by":"crossref","unstructured":"X. Ye, Y. Yang, Hierarchical and partially observable goal-driven policy learning with goals relational graph, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 14101\u201314110.","DOI":"10.1109\/CVPR46437.2021.01388"},{"key":"10.1016\/j.knosys.2026.116178_b37","doi-asserted-by":"crossref","unstructured":"M. Abdulhai, D.-K. Kim, M. Riemer, M. Liu, G. Tesauro, J.P. How, Context-specific representation abstraction for deep option learning, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 36, 2022, pp. 5959\u20135967.","DOI":"10.1609\/aaai.v36i6.20541"},{"key":"10.1016\/j.knosys.2026.116178_b38","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1016\/j.future.2022.03.016","article-title":"Using chains of bottleneck transitions to decompose and solve reinforcement learning tasks with hidden states","volume":"133","author":"Ayd\u0131n","year":"2022","journal-title":"Future Gener. Comput. Syst."},{"key":"10.1016\/j.knosys.2026.116178_b39","series-title":"On-line Q-Learning Using Connectionist Systems","author":"Rummery","year":"1994"},{"key":"10.1016\/j.knosys.2026.116178_b40","series-title":"EM-DD: An improved multiple-instance learning technique","first-page":"1073","author":"Zhang","year":"2001"},{"key":"10.1016\/j.knosys.2026.116178_b41","series-title":"Advances in Knowledge Discovery and Data Mining","first-page":"272","article-title":"Logistic regression and boosting for labeled bags of instances","author":"Xu","year":"2004"},{"key":"10.1016\/j.knosys.2026.116178_b42","series-title":"International Conference on Machine Learning","first-page":"2127","article-title":"Attention-based deep multiple instance learning","author":"Ilse","year":"2018"},{"key":"10.1016\/j.knosys.2026.116178_b43","doi-asserted-by":"crossref","unstructured":"X. Shi, F. Xing, Y. Xie, Z. Zhang, L. Cui, L. Yang, Loss-based attention for deep multiple instance learning, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 34, 2020, pp. 5742\u20135749.","DOI":"10.1609\/aaai.v34i04.6030"},{"key":"10.1016\/j.knosys.2026.116178_b44","doi-asserted-by":"crossref","unstructured":"S. Pal, A. Valkanas, F. Regol, M. Coates, Bag graph: Multiple instance learning using bayesian graph neural networks, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 36, 2022, pp. 7922\u20137930.","DOI":"10.1609\/aaai.v36i7.20762"},{"key":"10.1016\/j.knosys.2026.116178_b45","unstructured":"Z.-H. Zhou, M.-L. Zhang, Neural networks for multi-instance learning, in: Proceedings of the International Conference on Intelligent Information Technology, Beijing, China, 2002, pp. 455\u2013459."},{"key":"10.1016\/j.knosys.2026.116178_b46","unstructured":"D. Kingma, J. Ba, Adam: A Method for Stochastic Optimization, in: International Conference on Learning Representations, 2014."},{"issue":"1","key":"10.1016\/j.knosys.2026.116178_b47","doi-asserted-by":"crossref","first-page":"503","DOI":"10.1007\/BF01589116","article-title":"On the limited memory BFGS method for large scale optimization","volume":"45","author":"Liu","year":"1989","journal-title":"Math. Program."},{"key":"10.1016\/j.knosys.2026.116178_b48","series-title":"Advances in Optimization and Numerical Analysis","first-page":"51","article-title":"A direct search optimization method that models the objective and constraint functions by linear interpolation","author":"Powell","year":"1994"},{"key":"10.1016\/j.knosys.2026.116178_b49","series-title":"Neural machine translation by jointly learning to align and translate","author":"Bahdanau","year":"2014"},{"key":"10.1016\/j.knosys.2026.116178_b50","series-title":"Advances in Neural Information Processing Systems","article-title":"Attention is all you need","volume":"Vol. 30","year":"2017"},{"key":"10.1016\/j.knosys.2026.116178_b51","first-page":"2136","article-title":"Transmil: Transformer based correlated multiple instance learning for whole slide image classification","volume":"34","author":"Shao","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.116178_b52","series-title":"2021 IEEE Winter Conference on Applications of Computer Vision","first-page":"1720","article-title":"Kernel self-attention for weakly-supervised image classification using deep multiple instance learning","author":"Rymarczyk","year":"2021"},{"key":"10.1016\/j.knosys.2026.116178_b53","series-title":"International Conference on Machine Learning","first-page":"3744","article-title":"Set transformer: A framework for attention-based permutation-invariant neural networks","author":"Lee","year":"2019"},{"key":"10.1016\/j.knosys.2026.116178_b54","series-title":"Semi-supervised classification with graph convolutional networks","author":"Kipf","year":"2016"},{"issue":"20","key":"10.1016\/j.knosys.2026.116178_b55","first-page":"10","article-title":"Graph attention networks","volume":"1050","author":"Velickovic","year":"2017","journal-title":"Stat"},{"key":"10.1016\/j.knosys.2026.116178_b56","series-title":"Multiple instance learning with graph neural networks","author":"Tu","year":"2019"},{"key":"10.1016\/j.knosys.2026.116178_b57","article-title":"Hierarchical graph representation learning with differentiable pooling","volume":"31","author":"Ying","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.116178_b58","series-title":"Multi-instance learning by utilizing structural relationship among instances","author":"Ma","year":"2021"},{"key":"10.1016\/j.knosys.2026.116178_b59","series-title":"Variational graph auto-encoders","author":"Kipf","year":"2016"},{"issue":"11","key":"10.1016\/j.knosys.2026.116178_b60","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pcbi.1011623","article-title":"MIGGRI: A multi-instance graph neural network model for inferring gene regulatory networks for drosophila from spatial expression images","volume":"19","author":"Huang","year":"2023","journal-title":"PLoS Comput. Biol."},{"key":"10.1016\/j.knosys.2026.116178_b61","series-title":"2022 IEEE International Conference on Bioinformatics and Biomedicine","first-page":"430","article-title":"MilGNet: A multi-instance learning-based heterogeneous graph network for drug repositioning","author":"Gu","year":"2022"},{"key":"10.1016\/j.knosys.2026.116178_b62","series-title":"2024 7th International Conference on Artificial Intelligence and Big Data","first-page":"505","article-title":"Graph neural network based multi-instance learning with graph structure learning","author":"Liu","year":"2024"},{"issue":"11","key":"10.1016\/j.knosys.2026.116178_b63","doi-asserted-by":"crossref","first-page":"2673","DOI":"10.1109\/78.650093","article-title":"Bidirectional recurrent neural networks","volume":"45","author":"Schuster","year":"1997","journal-title":"IEEE Trans. Signal Process."},{"key":"10.1016\/j.knosys.2026.116178_b64","series-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2-7, 2019, Volume 1 (Long and Short Papers)","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"10.1016\/j.knosys.2026.116178_b65","series-title":"Semi-supervised sequence tagging with bidirectional language models","author":"Peters","year":"2017"},{"key":"10.1016\/j.knosys.2026.116178_b66","doi-asserted-by":"crossref","first-page":"629","DOI":"10.1007\/s13042-019-01021-5","article-title":"Multiple instance learning for sequence data with across bag dependencies","volume":"11","author":"Zoghlami","year":"2020","journal-title":"Int. J. Mach. Learn. Cybern."},{"key":"10.1016\/j.knosys.2026.116178_b67","series-title":"Deep inside convolutional networks: Visualising image classification models and saliency maps","author":"Simonyan","year":"2013"},{"key":"10.1016\/j.knosys.2026.116178_b68","series-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","first-page":"4768","article-title":"A unified approach to interpreting model predictions","author":"Lundberg","year":"2017"},{"key":"10.1016\/j.knosys.2026.116178_b69","series-title":"Proceedings of the 34th International Conference on Machine Learning - Volume 70","first-page":"3319","article-title":"Axiomatic attribution for deep networks","author":"Sundararajan","year":"2017"},{"key":"10.1016\/j.knosys.2026.116178_b70","series-title":"Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","first-page":"1135","article-title":"\u201cWhy should I trust you?\u201d: Explaining the predictions of any classifier","author":"Ribeiro","year":"2016"},{"key":"10.1016\/j.knosys.2026.116178_b71","series-title":"Advances in Neural Information Processing Systems","article-title":"Gnnexplainer: Generating explanations for graph neural networks","volume":"Vol. 32","author":"Ying","year":"2019"},{"key":"10.1016\/j.knosys.2026.116178_b72","series-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems","article-title":"Towards multi-grained explainability for graph neural networks","author":"Wang","year":"2024"},{"key":"10.1016\/j.knosys.2026.116178_b73","series-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","article-title":"Parameterized explainer for graph neural network","author":"Luo","year":"2020"},{"key":"10.1016\/j.knosys.2026.116178_b74","series-title":"Proceedings of the 38th International Conference on Machine Learning","first-page":"12241","article-title":"On explainability of graph neural networks via subgraph explorations","volume":"vol. 139","author":"Yuan","year":"2021"},{"key":"10.1016\/j.knosys.2026.116178_b75","doi-asserted-by":"crossref","unstructured":"H. Chefer, S. Gur, L. Wolf, Generic attention-model explainability for interpreting bi-modal and encoder-decoder transformers, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 397\u2013406.","DOI":"10.1109\/ICCV48922.2021.00045"},{"key":"10.1016\/j.knosys.2026.116178_b76","series-title":"Neural Information Processing Systems","article-title":"Additive MIL: Intrinsically interpretable multiple instance learning for pathology","author":"Javed","year":"2022"},{"key":"10.1016\/j.knosys.2026.116178_b77","first-page":"1","article-title":"Reinforced GNNs for multiple instance learning","author":"Zhao","year":"2024","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2026.116178_b78","series-title":"Model agnostic interpretability for multiple instance learning","author":"Early","year":"2022"},{"key":"10.1016\/j.knosys.2026.116178_b79","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1007\/BF01386390","article-title":"A note on two problems in connexion with graphs","volume":"1","author":"Dijkstra","year":"1959","journal-title":"Numer. Math."},{"key":"10.1016\/j.knosys.2026.116178_b80","doi-asserted-by":"crossref","DOI":"10.1017\/S026988891900002X","article-title":"Automatic landmark discovery for learning agents under partial observability","volume":"34","author":"Demir","year":"2019","journal-title":"Knowl. Eng. Rev."},{"issue":"3","key":"10.1016\/j.knosys.2026.116178_b81","first-page":"279","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"key":"10.1016\/j.knosys.2026.116178_b82","series-title":"Machine Learning Proceedings 1994","first-page":"226","article-title":"Incremental multi-step Q-learning","author":"Peng","year":"1994"},{"issue":"7540","key":"10.1016\/j.knosys.2026.116178_b83","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"key":"10.1016\/j.knosys.2026.116178_b84","series-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126009044?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126009044?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T01:04:10Z","timestamp":1780016650000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705126009044"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":84,"alternative-id":["S0950705126009044"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116178","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Subgoal identification with multiple instance learning methods in landmark Partially Observable Markov Decision Process problems","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116178","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"116178"}}