{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T00:50:17Z","timestamp":1775263817932,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,23]],"date-time":"2024-02-23T00:00:00Z","timestamp":1708646400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,2,23]]},"DOI":"10.1145\/3653876.3653899","type":"proceedings-article","created":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T12:28:23Z","timestamp":1722515303000},"page":"205-211","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Deep Reinforcement Learning with Swin Transformers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8867-9104","authenticated-orcid":false,"given":"Li","family":"Meng","sequence":"first","affiliation":[{"name":"Department of Technology Systems, University of Oslo, Norway"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6331-702X","authenticated-orcid":false,"given":"Morten","family":"Goodwin","sequence":"additional","affiliation":[{"name":"Centre for Artificial Intelligence Research, University of Agder, Norway"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7591-1659","authenticated-orcid":false,"given":"Anis","family":"Yazidi","sequence":"additional","affiliation":[{"name":"Oslo Metropolitan University, Norway"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8371-927X","authenticated-orcid":false,"given":"Paal","family":"Engelstad","sequence":"additional","affiliation":[{"name":"Department of Technology Systems, University of Oslo, Norway"}]}],"member":"320","published-online":{"date-parts":[[2024,8]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Deep reinforcement learning at the edge of the statistical precipice. Advances in Neural Information Processing Systems 34","author":"Agarwal Rishabh","year":"2021","unstructured":"Rishabh Agarwal, Max Schwarzer, Pablo\u00a0Samuel Castro, Aaron\u00a0C Courville, and Marc Bellemare. 2021. Deep reinforcement learning at the edge of the statistical precipice. Advances in Neural Information Processing Systems 34 (2021)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_1_3_1","volume-title":"Decision transformer: Reinforcement learning via sequence modeling. Advances in neural information processing systems 34","author":"Chen Lili","year":"2021","unstructured":"Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Misha Laskin, Pieter Abbeel, Aravind Srinivas, and Igor Mordatch. 2021. Decision transformer: Reinforcement learning via sequence modeling. Advances in neural information processing systems 34 (2021)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"e_1_3_2_1_5_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. ICLR","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, 2021. An image is worth 16x16 words: Transformers for image recognition at scale. ICLR (2021)."},{"key":"e_1_3_2_1_6_1","volume-title":"Advances in neural information processing systems 23. Curran Associates","author":"Hasselt Hado","unstructured":"Hado Hasselt. 2010. Double Q-learning. In Advances in neural information processing systems 23. Curran Associates, Inc., Red Hook, NY, 2613\u20132621."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_2_1_8_1","volume-title":"Improving Sample Efficiency of Value Based Models Using Attention and Vision Transformers. arXiv preprint arXiv:2202.00710","author":"Kalantari Amir\u00a0Ardalan","year":"2022","unstructured":"Amir\u00a0Ardalan Kalantari, Mohammad Amini, Sarath Chandar, and Doina Precup. 2022. Improving Sample Efficiency of Value Based Models Using Attention and Vision Transformers. arXiv preprint arXiv:2202.00710 (2022). 10.48550\/arXiv.2202.00710."},{"key":"e_1_3_2_1_9_1","volume-title":"Vision transformer for learning driving policies in complex multi-agent environments. arXiv preprint","author":"Kargar Eshagh","year":"2021","unstructured":"Eshagh Kargar and Ville Kyrki. 2021. Vision transformer for learning driving policies in complex multi-agent environments. arXiv preprint (2021)."},{"key":"e_1_3_2_1_10_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014). 10.48550\/arXiv.1412.6980."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555429"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/WorldS451998.2021.9514052"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3529570.3529586"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3617695.3617723"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/VRW58643.2023.00064"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580978"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392393"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_19_1","volume-title":"Expert Q-learning: Deep Q-learning With State Values From Expert Examples.CoRR","author":"Meng Li","year":"2021","unstructured":"Li Meng, Anis Yazidi, Morten Goodwin, and Paal Engelstad. 2021. Expert Q-learning: Deep Q-learning With State Values From Expert Examples.CoRR (2021). 10.7557\/18.6237."},{"key":"e_1_3_2_1_20_1","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Alex Graves Ioannis Antonoglou Daan Wierstra and Martin Riedmiller. 2013. Playing Atari with Deep Reinforcement Learning. arxiv:1312.5602\u00a0[cs.LG] 10.48550\/arXiv.1312.5602."},{"key":"e_1_3_2_1_21_1","volume-title":"Human-level control through deep reinforcement learning. nature 518, 7540","author":"Mnih Volodymyr","year":"2015","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Andrei\u00a0A Rusu, Joel Veness, Marc\u00a0G Bellemare, Alex Graves, Martin Riedmiller, Andreas\u00a0K Fidjeland, Georg Ostrovski, 2015. Human-level control through deep reinforcement learning. nature 518, 7540 (2015), 529\u2013533."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157382.3157548"},{"key":"e_1_3_2_1_23_1","volume-title":"Image Generation of Egyptian Hieroglyphs. In ICMLC","author":"Hui","year":"2024","unstructured":"B.\u00a0Hui S.\u00a0Gao and Li W. 2024. Image Generation of Egyptian Hieroglyphs. In ICMLC 2024."},{"key":"e_1_3_2_1_24_1","volume-title":"Incentivizing exploration in reinforcement learning with deep predictive models. arXiv preprint arXiv:1507.00814","author":"Stadie C","year":"2015","unstructured":"Bradly\u00a0C Stadie, Sergey Levine, and Pieter Abbeel. 2015. Incentivizing exploration in reinforcement learning with deep predictive models. arXiv preprint arXiv:1507.00814 (2015). 10.48550\/arXiv.1507.00814."},{"key":"e_1_3_2_1_25_1","volume-title":"Evaluating Vision Transformer Methods for Deep Reinforcement Learning from Pixels. arXiv preprint arXiv:2204.04905","author":"Tao Tianxin","year":"2022","unstructured":"Tianxin Tao, Daniele Reda, and Michiel van\u00a0de Panne. 2022. Evaluating Vision Transformer Methods for Deep Reinforcement Learning from Pixels. arXiv preprint arXiv:2204.04905 (2022). 10.48550\/arXiv.2204.04905."},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 4th Connectionist Models Summer School","author":"Thrun Sebastian","year":"1993","unstructured":"Sebastian Thrun and Anton Schwartz. 1993. Issues in using function approximation for reinforcement learning. In Proceedings of the 4th Connectionist Models Summer School. Hillsdale, NJ, 255\u2013263."},{"key":"e_1_3_2_1_27_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017). 10.5555\/3295222.3295349."},{"key":"e_1_3_2_1_28_1","volume-title":"Self-attention with linear complexity. arXiv preprint 8","author":"Wang Sinong","year":"2020","unstructured":"Sinong Wang, Belinda Li, Madian Khabsa, Han Fang, and H\u00a0Linformer Ma. 2020. Self-attention with linear complexity. arXiv preprint 8 (2020)."},{"key":"e_1_3_2_1_29_1","volume-title":"Machine learning 8, 3-4","author":"Watkins JCH","year":"1992","unstructured":"Christopher\u00a0JCH Watkins and Peter Dayan. 1992. Q-learning. Machine learning 8, 3-4 (1992), 279\u2013292. 10.1007\/BF00992698."},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the 28th ACM International Conference on Multimedia. 1162\u20131170","author":"Wang","unstructured":"Wang Y., Liang W., Li W., Li D., and Yu L.F. 2020. Scene-aware background music synthesis. In Proceedings of the 28th ACM International Conference on Multimedia. 1162\u20131170."},{"key":"e_1_3_2_1_31_1","volume-title":"International Conference on Machine Learning. PMLR, 27042\u201327059","author":"Zheng Qinqing","year":"2022","unstructured":"Qinqing Zheng, Amy Zhang, and Aditya Grover. 2022. Online decision transformer. In International Conference on Machine Learning. PMLR, 27042\u201327059."}],"event":{"name":"ICDSP 2024: 2024 8th International Conference on Digital Signal Processing","location":"Hangzhou China","acronym":"ICDSP 2024"},"container-title":["Proceedings of the 2024 8th International Conference on Digital Signal Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3653876.3653899","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3653876.3653899","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:34:10Z","timestamp":1755912850000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3653876.3653899"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,23]]},"references-count":31,"alternative-id":["10.1145\/3653876.3653899","10.1145\/3653876"],"URL":"https:\/\/doi.org\/10.1145\/3653876.3653899","relation":{},"subject":[],"published":{"date-parts":[[2024,2,23]]},"assertion":[{"value":"2024-08-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}