{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,4]],"date-time":"2025-09-04T13:54:38Z","timestamp":1756994078077,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,5,26]],"date-time":"2023-05-26T00:00:00Z","timestamp":1685059200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,5,26]]},"DOI":"10.1145\/3603781.3603857","type":"proceedings-article","created":{"date-parts":[[2023,7,27]],"date-time":"2023-07-27T18:02:29Z","timestamp":1690480949000},"page":"415-420","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["STFormer3D: Spatio-Temporal Transformer Based 3D Object Detection for Intelligent Driving"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0623-7178","authenticated-orcid":false,"given":"Wei","family":"Liu","sequence":"first","affiliation":[{"name":"College of Information Science and Engineering, Northeastern University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2422-5589","authenticated-orcid":false,"given":"Yue","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Information Science and Engineering, Northeastern University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6434-8119","authenticated-orcid":false,"given":"Haoxiang","family":"Jie","sequence":"additional","affiliation":[{"name":"Neusoft Reach Automotive Technology Ltd, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7094-1901","authenticated-orcid":false,"given":"Jun","family":"Hu","sequence":"additional","affiliation":[{"name":"Neusoft Reach Automotive Technology Ltd, China"}]}],"member":"320","published-online":{"date-parts":[[2023,7,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Pointrcnn: 3d object proposal generation and detection from point cloud","author":"Shi S.","year":"2018","unstructured":"S. Shi , X. Wang , and H. Li , \u201c Pointrcnn: 3d object proposal generation and detection from point cloud ,\u201d Cornell University - arXiv, 2018 . S. Shi, X. Wang, and H. Li, \u201cPointrcnn: 3d object proposal generation and detection from point cloud,\u201d Cornell University - arXiv, 2018."},{"key":"e_1_3_2_1_2_1","volume-title":"Pointnet++: Deep hierarchical feature learning on point sets in a metric space","author":"Qi C. R.","year":"2017","unstructured":"C. R. Qi , L. Yi , H. Su , and L. J. Guibas , \u201c Pointnet++: Deep hierarchical feature learning on point sets in a metric space ,\u201d arXiv: Computer Vision and Pattern Recognition , 2017 . C. R. Qi, L. Yi, H. Su, and L. J. Guibas, \u201cPointnet++: Deep hierarchical feature learning on point sets in a metric space,\u201d arXiv: Computer Vision and Pattern Recognition, 2017."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1109\/CVPR.2019.01296"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_4_1","DOI":"10.15607\/RSS.2016.XII.042"},{"key":"e_1_3_2_1_5_1","volume-title":"Voxelnet: End-to-end learning for point cloud based 3d object detection","author":"Zhou Y.","year":"2018","unstructured":"Y. Zhou and O. Tuzel , \u201c Voxelnet: End-to-end learning for point cloud based 3d object detection ,\u201d Cornell University - arXiv, 2018 . Y. Zhou and O. Tuzel, \u201cVoxelnet: End-to-end learning for point cloud based 3d object detection,\u201d Cornell University - arXiv, 2018."},{"key":"e_1_3_2_1_6_1","volume-title":"Pointpillars: Fast encoders for object detection from point clouds","author":"Lang A. H.","year":"2018","unstructured":"A. H. Lang , S. Vora , H. Caesar , L. Zhou , J. Yang , and O. Beijbom , \u201c Pointpillars: Fast encoders for object detection from point clouds ,\u201d arXiv: Learning , 2018 . A. H. Lang, S. Vora, H. Caesar, L. Zhou, J. Yang, and O. Beijbom, \u201cPointpillars: Fast encoders for object detection from point clouds,\u201d arXiv: Learning, 2018."},{"key":"e_1_3_2_1_7_1","volume-title":"Multi-view 3d object detection network for autonomous driving","author":"Chen X.","year":"2016","unstructured":"X. Chen , H. Ma , J. Wan , B. Li , and X. Tian , \u201c Multi-view 3d object detection network for autonomous driving ,\u201d Cornell University - arXiv, 2016 . X. Chen, H. Ma, J. Wan, B. Li, and X. Tian, \u201cMulti-view 3d object detection network for autonomous driving,\u201d Cornell University - arXiv, 2016."},{"key":"e_1_3_2_1_8_1","volume-title":"Bevfusion: Multi-task multi-sensor fusion with unified bird's-eye view representation","author":"Liu Z.","year":"2023","unstructured":"Z. Liu , H. Tang , M. A. Amini , X. Yang , H. Mao , O. Daniela , R. Mit , H. Song , and Mit , \u201c Bevfusion: Multi-task multi-sensor fusion with unified bird's-eye view representation ,\u201d 2023 . Z. Liu, H. Tang, M. A. Amini, X. Yang, H. Mao, O. Daniela, R. Mit, H. Song, and Mit, \u201cBevfusion: Multi-task multi-sensor fusion with unified bird's-eye view representation,\u201d 2023."},{"key":"e_1_3_2_1_9_1","volume-title":"Mapfusion: A general framework for 3d object detection with hdmaps","author":"Fang J.","year":"2021","unstructured":"J. Fang , D. Zhou , X. Song , and L. Zhang , \u201c Mapfusion: A general framework for 3d object detection with hdmaps .\u201d arXiv: Computer Vision and Pattern Recognition , 2021 . J. Fang, D. Zhou, X. Song, and L. Zhang, \u201cMapfusion: A general framework for 3d object detection with hdmaps.\u201d arXiv: Computer Vision and Pattern Recognition, 2021."},{"key":"e_1_3_2_1_10_1","volume-title":"Swin transformer: Hierarchical vision transformer using shifted windows","author":"Liu Z.","year":"2021","unstructured":"Z. Liu , Y. Lin , Y. Cao , H. Hu , Y. Wei , Z. Zhang , S. Lin , and B. Guo , \u201c Swin transformer: Hierarchical vision transformer using shifted windows .\u201d arXiv: Computer Vision and Pattern Recognition , 2021 . Z. Liu, Y. Lin, Y. Cao, H. Hu, Y. Wei, Z. Zhang, S. Lin, and B. Guo, \u201cSwin transformer: Hierarchical vision transformer using shifted windows.\u201d arXiv: Computer Vision and Pattern Recognition, 2021."},{"key":"e_1_3_2_1_11_1","volume-title":"Cswin transformer: A general vision transformer backbone with cross-shaped windows","author":"Dong X.","year":"2021","unstructured":"X. Dong , J. Bao , D. Chen , W. Zhang , N. Yu , L. Yuan , D. Chen , and B. Guo , \u201c Cswin transformer: A general vision transformer backbone with cross-shaped windows ,\u201d Cornell University - arXiv, 2021 . X. Dong, J. Bao, D. Chen, W. Zhang, N. Yu, L. Yuan, D. Chen, and B. Guo, \u201cCswin transformer: A general vision transformer backbone with cross-shaped windows,\u201d Cornell University - arXiv, 2021."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.1007\/978-3-030-58452-8_13"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1109\/ICCV48922.2021.00360"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the ... AAAI Conference on Artificial Intelligence","author":"Wang Y.","year":"2021","unstructured":"Y. Wang , X. Zhang , T. Yang , and J. Sun , \u201c Anchor detr: Query design for transformer-based detector .\u201d Proceedings of the ... AAAI Conference on Artificial Intelligence , 2021 . Y. Wang, X. Zhang, T. Yang, and J. Sun, \u201cAnchor detr: Query design for transformer-based detector.\u201d Proceedings of the ... AAAI Conference on Artificial Intelligence, 2021."},{"key":"e_1_3_2_1_15_1","volume-title":"Petr: Position embedding transformation for multi-view 3d object detection","author":"Liu Y.","year":"2023","unstructured":"Y. Liu , T. Wang , X. Zhang , and J. Sun , \u201c Petr: Position embedding transformation for multi-view 3d object detection ,\u201d 2023 . Y. Liu, T. Wang, X. Zhang, and J. Sun, \u201cPetr: Position embedding transformation for multi-view 3d object detection,\u201d 2023."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.1109\/CVPR46437.2021.00607"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1109\/ICCV48922.2021.01032"},{"key":"e_1_3_2_1_18_1","volume-title":"Bevformer: Learning bird's-eye-view representation from multi-camera images via spatiotemporal transformers","author":"Li Z.","year":"2023","unstructured":"Z. Li , W. Wang , H. Li , E. Xie , C. Sima , T. Lu , Q. Yu , and J. Dai , \u201c Bevformer: Learning bird's-eye-view representation from multi-camera images via spatiotemporal transformers ,\u201d 2023 . Z. Li, W. Wang, H. Li, E. Xie, C. Sima, T. Lu, Q. Yu, and J. Dai, \u201cBevformer: Learning bird's-eye-view representation from multi-camera images via spatiotemporal transformers,\u201d 2023."},{"key":"e_1_3_2_1_19_1","volume-title":"Sensors","author":"Yan Y.","year":"2018","unstructured":"Y. Yan , Y. Mao , and B. Li , \u201c Second: Sparsely embedded convolutional detection ,\u201d Sensors , 2018 . Y. Yan, Y. Mao, and B. Li, \u201cSecond: Sparsely embedded convolutional detection,\u201d Sensors, 2018."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_20_1","DOI":"10.1109\/CVPR46437.2021.00190"},{"key":"e_1_3_2_1_21_1","volume-title":"Sa-det3d: Self-attention based context-aware 3d object detection","author":"Bhattacharyya P.","year":"2021","unstructured":"P. Bhattacharyya , C. Huang , and K. Czarnecki , \u201c Sa-det3d: Self-attention based context-aware 3d object detection ,\u201d arXiv: Computer Vision and Pattern Recognition , 2021 . P. Bhattacharyya, C. Huang, and K. Czarnecki, \u201cSa-det3d: Self-attention based context-aware 3d object detection,\u201d arXiv: Computer Vision and Pattern Recognition, 2021."},{"key":"e_1_3_2_1_22_1","volume-title":"Centerformer: Center-based transformer for 3d object detection","author":"Zhou Z.","year":"2023","unstructured":"Z. Zhou , X. Zhao , Y. Wang , P. Wang , and H. Foroosh , \u201c Centerformer: Center-based transformer for 3d object detection ,\u201d 2023 . Z. Zhou, X. Zhao, Y. Wang, P. Wang, and H. Foroosh, \u201cCenterformer: Center-based transformer for 3d object detection,\u201d 2023."},{"key":"e_1_3_2_1_23_1","volume-title":"Embracing single stride 3d object detector with sparse transformer","author":"Casia L.","year":"2023","unstructured":"L. Casia , T. Zhang , Y.-X. Wang , H. Zhao , F. Wang , N. Wang , and Z. Zhang , \u201c Embracing single stride 3d object detector with sparse transformer ,\u201d 2023 . L. Casia, T. Zhang, Y.-X. Wang, H. Zhao, F. Wang, N. Wang, and Z. Zhang, \u201cEmbracing single stride 3d object detector with sparse transformer,\u201d 2023."},{"key":"e_1_3_2_1_24_1","volume-title":"Efficientdet: Scalable and efficient object detection","author":"Tan M.","year":"2019","unstructured":"M. Tan , R. Pang , and Q. V. Le , \u201c Efficientdet: Scalable and efficient object detection ,\u201d Cornell University - arXiv, 2019 . M. Tan, R. Pang, and Q. V. Le, \u201cEfficientdet: Scalable and efficient object detection,\u201d Cornell University - arXiv, 2019."},{"key":"e_1_3_2_1_25_1","volume-title":"Center-based 3d object detection and tracking","author":"Yin T.","year":"2020","unstructured":"T. Yin , X. Zhou , and P. Kra\u00a8henbu\u00a8hl , \u201c Center-based 3d object detection and tracking .\u201d 2020 . T. Yin, X. Zhou, and P. Kra\u00a8henbu\u00a8hl, \u201cCenter-based 3d object detection and tracking.\u201d 2020."},{"key":"e_1_3_2_1_26_1","volume-title":"Ssn: Shape signature networks for multi-class object detection from point clouds","author":"Zhu X.","year":"2020","unstructured":"X. Zhu , Y. Ma , T. Wang , Y. Xu , J. Shi , and D. Lin , \u201c Ssn: Shape signature networks for multi-class object detection from point clouds ,\u201d Springer International Publishing eBooks, 2020 . X. Zhu, Y. Ma, T. Wang, Y. Xu, J. Shi, and D. Lin, \u201cSsn: Shape signature networks for multi-class object detection from point clouds,\u201d Springer International Publishing eBooks, 2020."}],"event":{"acronym":"CNIOT'23","name":"CNIOT'23: 2023 4th International Conference on Computing, Networks and Internet of Things","location":"Xiamen China"},"container-title":["Proceedings of the 2023 4th International Conference on Computing, Networks and Internet of Things"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603781.3603857","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3603781.3603857","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:10Z","timestamp":1750178830000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603781.3603857"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,26]]},"references-count":26,"alternative-id":["10.1145\/3603781.3603857","10.1145\/3603781"],"URL":"https:\/\/doi.org\/10.1145\/3603781.3603857","relation":{},"subject":[],"published":{"date-parts":[[2023,5,26]]},"assertion":[{"value":"2023-07-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}