{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:33:09Z","timestamp":1757619189477,"version":"3.44.0"},"publisher-location":"Singapore","reference-count":30,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819698172"},{"type":"electronic","value":"9789819698189"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-9818-9_35","type":"book-chapter","created":{"date-parts":[[2025,7,19]],"date-time":"2025-07-19T12:24:00Z","timestamp":1752927840000},"page":"415-426","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mamba in Mamba: Offline Reinforcement Learning via Sequence Modeling with Inner and Outer Selective State Spaces"],"prefix":"10.1007","author":[{"given":"Qiang","family":"Han","sequence":"first","affiliation":[]},{"given":"Xiwen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Lifang","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Kaixin","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Xiaoqin","family":"Yu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"issue":"1","key":"35_CR1","doi-asserted-by":"crossref","first-page":"126","DOI":"10.1162\/089892999563184","volume":"11","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning. J. Cogn. Neurosci. 11(1), 126\u2013134 (1999)","journal-title":"J. Cogn. Neurosci."},{"key":"35_CR2","first-page":"61573","volume":"36","author":"S Fujimoto","year":"2023","unstructured":"Fujimoto, S., Chang, W.D., Smith, E., Precup, D.: For sale: State-action representation learning for deep reinforcement learning. Adv. Neural. Inf. Process. Syst. 36, 61573\u201361624 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"35_CR3","first-page":"1273","volume":"34","author":"M Janner","year":"2021","unstructured":"Janner, M., Li, Q., Levine, S.: Offline reinforcement learning as one big sequence modeling problem. Adv. Neural. Inf. Process. Syst. 34, 1273\u20131286 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"35_CR4","unstructured":"Hu, S., Shen, L., Zhang, Y., Wu, Z., Su, H., Zhao, D.: Graph decision transformer. arXiv preprint arXiv:2303.03747 (2023)"},{"key":"35_CR5","unstructured":"Levine, S., Kumar, A., Tucker, G., Fu, J.: Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)"},{"key":"35_CR6","unstructured":"Li, G., Shan, Y., Zhu, Z., Zheng, S., Xu, W., Tan, M.: DiffStitch: boosting offline reinforcement learning with diffusion-based trajectory stitching. arXiv preprint arXiv:2402.02439 (2024)"},{"key":"35_CR7","unstructured":"Bhargava, P., Chitnis, R., Geramifard, A., Held, D., Levine, S.: Decision transformer is a robust contender for offline reinforcement learning. In: Twelfth International Conference on Learning Representations (ICLR 2024)"},{"key":"35_CR8","doi-asserted-by":"publisher","DOI":"10.1016\/j.adhoc.2023.103257","volume":"149","author":"S Zhang","year":"2023","unstructured":"Zhang, S., Lam, K.Y., Shen, B., He, C., Guo, W., Han, Z.: Dynamic spectrum access for Internet-of-Things with hierarchical federated deep reinforcement learning. Ad Hoc Netw. 149, 103257 (2023)","journal-title":"Ad Hoc Netw."},{"key":"35_CR9","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Zhou, A., Tucker, G., Levine, S.: Conservative Q-learning for offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 33, 1179\u20131191 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"35_CR10","unstructured":"Ghanem, A., Ciblat, P., Ghogho, M.: Multi-objective decision transformers for offline reinforcement learning. arXiv preprint arXiv:2308.16379 (2023)"},{"key":"35_CR11","first-page":"15737","volume":"33","author":"T Xu","year":"2020","unstructured":"Xu, T., Li, Z., Yu, Y.: Error bounds of imitating policies and environments. Adv. Neural. Inf. Process. Syst. 33, 15737\u201315749 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"35_CR12","doi-asserted-by":"crossref","unstructured":"Torabi, F., Warnell, G., Stone, P.: Behavioral cloning from observation. arXiv preprint arXiv:1805.01954 (2018)","DOI":"10.24963\/ijcai.2018\/687"},{"key":"35_CR13","doi-asserted-by":"crossref","unstructured":"Bain, M., Sammut, C.: A framework for behavioural cloning. In: Machine Intelligence 15, pp. 103\u2013129. Oxford University Press, Oxford (1999)","DOI":"10.1093\/oso\/9780198538677.003.0006"},{"issue":"5","key":"35_CR14","doi-asserted-by":"publisher","first-page":"6322","DOI":"10.1109\/TNNLS.2022.3213246","volume":"35","author":"B Zheng","year":"2022","unstructured":"Zheng, B., Verma, S., Zhou, J., Zhang, C., Pan, S.J.: Imitation learning: Progress, taxonomies and challenges. IEEE Trans. Neural Netw. Learn. Syst. 35(5), 6322\u20136337 (2022)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"35_CR15","unstructured":"Janner, M., Li, Q., Levine, S.: Reinforcement learning as one big sequence modeling problem. arXiv preprint arXiv:2106.02039 (2021)"},{"key":"35_CR16","unstructured":"Mao, H., Zhao, R., Chen, H., Wang, J., Liu, Q.: Transformer in transformer as backbone for deep reinforcement learning. arXiv preprint arXiv:2212.14538 (2022)"},{"key":"35_CR17","unstructured":"Mao, H., Zhao, R., Li, Z., Wang, J., Liu, Q.: PDiT: interleaving perception and decision-making transformers for deep reinforcement learning. arXiv preprint arXiv:2312.15863 (2023)"},{"key":"35_CR18","first-page":"15084","volume":"34","author":"L Chen","year":"2021","unstructured":"Chen, L., Lu, K., Rajeswaran, A., Lee, K., Grover, A., Abbeel, P.: Decision transformer: reinforcement learning via sequence modeling. Adv. Neural. Inf. Process. Syst. 34, 15084\u201315097 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"35_CR19","unstructured":"Kim, J., Lee, S., Kim, W., Ahn, S., Lee, J.: Decision convformer: local filtering in metaformer is sufficient for decision making. arXiv preprint arXiv:2310.03022 (2023)"},{"key":"35_CR20","unstructured":"Liu, Y., Tian, Y., Zhao, Y., Liu, C., Zhang, J.: VMamba: visual state space model. arXiv preprint arXiv:2401.10166 (2024)"},{"key":"35_CR21","unstructured":"Gu, A., Dao, T.: Mamba: linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752 (2023)"},{"key":"35_CR22","unstructured":"Fu, D.Y., Dao, T., Saab, K.K., Jang, E., Recht, B., R\u00e9, C.: Hungry hungry hippos: Towards language modeling with state space models. arXiv preprint arXiv:2212.14052 (2022)"},{"key":"35_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.adhoc.2022.102880","volume":"132","author":"M Roy","year":"2022","unstructured":"Roy, M., Biswas, D., Aslam, N., Al-Bayatti, A.H.: Reinforcement learning based effective communication strategies for energy harvested WBAN. Ad Hoc Netw. 132, 102880 (2022)","journal-title":"Ad Hoc Netw."},{"key":"35_CR24","unstructured":"Prudencio, R.F., Maximo, M.R.O.A., Colombini, E.L.: A survey on offline reinforcement learning: Taxonomy, review, and open problems. IEEE Trans. Neural Netw. Learn. Syst. (2023)"},{"key":"35_CR25","unstructured":"Ma, Y., Xiao, C., Liang, H., Wang, Y., Xu, L.: Rethinking decision transformer via hierarchical reinforcement learning. arXiv preprint arXiv:2311.00267 (2023)"},{"key":"35_CR26","unstructured":"Wang, X., Wang, S., Ding, Y., Wang, Y., Huang, M., Zhang, X.: State space model for new-generation network alternative to transformers: a survey. arXiv preprint arXiv:2404.09516 (2024)"},{"key":"35_CR27","unstructured":"Gu, A., Goel, K., R\u00e9, C.: Efficiently modeling long sequences with structured state spaces. arXiv preprint arXiv:2111.00396 (2021)"},{"key":"35_CR28","unstructured":"Ota, T.: Decision mamba: reinforcement learning via sequence modeling with selective state spaces. arXiv preprint arXiv:2403.19925 (2024)"},{"key":"35_CR29","unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G., Levine, S.: D4RL: datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)"},{"key":"35_CR30","unstructured":"Brockman, G., et al.: OpenAI Gym. arXiv preprint arXiv:1606.01540 (2016)"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-9818-9_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T15:02:43Z","timestamp":1757257363000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-9818-9_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819698172","9789819698189"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-9818-9_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"20 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}