{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T15:05:19Z","timestamp":1779375919650,"version":"3.53.1"},"reference-count":45,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100011259","name":"Chinese Academy of Sciences State Key Laboratory of Robotics","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100011259","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013066","name":"Key Scientific Research Project of Colleges and Universities in Henan Province","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100013066","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.neunet.2026.109096","type":"journal-article","created":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T15:20:40Z","timestamp":1778772040000},"page":"109096","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Cross attention-based prior deformation for category-level 6D pose estimation"],"prefix":"10.1016","volume":"203","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3311-5002","authenticated-orcid":false,"given":"Shuai","family":"Guo","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yongchao","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lifeng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chunge","family":"Cao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1491-0040","authenticated-orcid":false,"given":"Yazhou","family":"Hu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.neunet.2026.109096_bib0001","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"11973","article-title":"Learning canonical shape space for category-level 6D object pose and size estimation","author":"Chen","year":"2020"},{"key":"10.1016\/j.neunet.2026.109096_bib0002","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"2773","article-title":"SGPA: Structure-guided prior adaptation for category-level 6D object pose estimation","author":"Chen","year":"2021"},{"key":"10.1016\/j.neunet.2026.109096_bib0003","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"1581","article-title":"FS-Net: Fast shape-based network for category-level 6D object pose estimation with decoupled rotation mechanism","author":"Chen","year":"2021"},{"key":"10.1016\/j.neunet.2026.109096_bib0004","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"6781","article-title":"GPV-pose: Category-level object pose estimation via geometry-guided point-wise voting","author":"Di","year":"2022"},{"key":"10.1016\/j.neunet.2026.109096_bib0005","series-title":"Proceedings of the 2024 international conference on multimedia retrieval","first-page":"55","article-title":"ACR-Pose: Adversarial canonical representation reconstruction network for category level 6D object pose estimation","author":"Fan","year":"2024"},{"key":"10.1016\/j.neunet.2026.109096_bib0006","first-page":"27469","article-title":"Category-level 6D object pose estimation in the wild: A semi-supervised learning approach and a new dataset","volume":"35","author":"Fu","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.109096_bib0007","doi-asserted-by":"crossref","first-page":"187","DOI":"10.1007\/s41095-021-0229-5","article-title":"PCT: Point cloud transformer","volume":"7","author":"Guo","year":"2021","journal-title":"Computational visual media"},{"key":"10.1016\/j.neunet.2026.109096_bib0008","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"2961","article-title":"Mask R-CNN","author":"He","year":"2017"},{"key":"10.1016\/j.neunet.2026.109096_bib0009","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.neunet.2026.109096_bib0010","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"11108","article-title":"RandLA-Net: Efficient semantic segmentation of large-scale point clouds","author":"Hu","year":"2020"},{"key":"10.1016\/j.neunet.2026.109096_bib0011","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106321","article-title":"Ensuring spatial scalability with temporal-wise spatial attentive pooling for temporal action detection","volume":"176","author":"Kim","year":"2024","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.109096_bib0012","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Mintun, E., Ravi, N., Mao, H., Rolland, C., Gustafson, L., Xiao, T., Whitehead, S., Berg, A. C., Lo, W.-Y. et al. (2023). Segment anything. arXiv preprint:2304.02643.","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"10.1016\/j.neunet.2026.109096_bib0013","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"11143","article-title":"Geometric transformer for fast and robust point cloud registration","author":"Qin","year":"2022"},{"key":"10.1016\/j.neunet.2026.109096_bib0014","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"8500","article-title":"Stratified transformer for 3D point cloud segmentation","author":"Lai","year":"2022"},{"key":"10.1016\/j.neunet.2026.109096_bib0015","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"6707","article-title":"SAR-Net: Shape alignment and recovery network for category-level 6D object pose and size estimation","author":"Lin","year":"2022"},{"key":"10.1016\/j.neunet.2026.109096_bib0016","first-page":"16779","article-title":"Sparse steerable convolutions: An efficient learning of se (3)-equivariant features for estimation and tracking of object poses in 3D space","volume":"34","author":"Lin","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.109096_bib0017","series-title":"European conference on computer vision","first-page":"19","article-title":"Category-level 6D object pose and size estimation using self-supervised deep prior deformation networks","author":"Lin","year":"2022"},{"key":"10.1016\/j.neunet.2026.109096_bib0018","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"3560","article-title":"Dualposenet: Category-level 6D object pose and size estimation using dual pose network with refined learning of pose consistency","author":"Lin","year":"2021"},{"key":"10.1016\/j.neunet.2026.109096_bib0019","series-title":"Leaping from 2D Detection to Efficient 6DoF Object Pose Estimation","first-page":"707","author":"Liu","year":"2020"},{"key":"10.1016\/j.neunet.2026.109096_bib0020","series-title":"Computer vision\u2013ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, proceedings, Part XXIII 16","first-page":"326","article-title":"A closer look at local aggregation operators in point cloud analysis","author":"Liu","year":"2020"},{"key":"10.1016\/j.neunet.2026.109096_bib0021","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"10012","article-title":"Swin transformer: Hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.neunet.2026.109096_bib0022","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"2949","article-title":"Group-free 3D object detection via transformers","author":"Liu","year":"2021"},{"key":"10.1016\/j.neunet.2026.109096_bib0023","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"4561","article-title":"PVNet: Pixel-wise voting network for 6DoF pose estimation","author":"Peng","year":"2019"},{"key":"10.1016\/j.neunet.2026.109096_bib0024","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"652","article-title":"PointNet: Deep learning on point sets for 3D classification and segmentation","author":"Qi","year":"2017"},{"key":"10.1016\/j.neunet.2026.109096_bib0025","first-page":"5105","article-title":"PointNet++: Deep hierarchical feature learning on point sets in a metric space","volume":"30","author":"Qi","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"10.1016\/j.neunet.2026.109096_bib0026","first-page":"5998","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"10.1016\/j.neunet.2026.109096_bib0027","series-title":"European conference on computer vision","first-page":"530","article-title":"Shape prior deformation for categorical 6D object pose and size estimation","author":"Tian","year":"2020"},{"issue":"04","key":"10.1016\/j.neunet.2026.109096_bib0028","doi-asserted-by":"crossref","first-page":"376","DOI":"10.1109\/34.88573","article-title":"Least-squares estimation of transformation parameters between two point patterns","volume":"13","author":"Umeyama","year":"1991","journal-title":"IEEE Transactions on Pattern Analysis & Machine Intelligence"},{"key":"10.1016\/j.neunet.2026.109096_bib0029","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"14065","article-title":"SOCS: Semantically-aware object coordinate space for category-level 6D object pose estimation under large shape variations","author":"Wan","year":"2023"},{"key":"10.1016\/j.neunet.2026.109096_bib0030","series-title":"2020\u202fIEEE International conference on robotics and automation (ICRA)","first-page":"10059","article-title":"6-Pack: Category-level 6D pose tracker with anchor-based keypoints","author":"Wang","year":"2020"},{"key":"10.1016\/j.neunet.2026.109096_bib0031","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"3343","article-title":"DenseFusion: 6D object pose estimation by iterative dense fusion","author":"Wang","year":"2019"},{"key":"10.1016\/j.neunet.2026.109096_bib0032","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"2642","article-title":"Normalized object coordinate space for category-level 6D object pose and size estimation","author":"Wang","year":"2019"},{"key":"10.1016\/j.neunet.2026.109096_bib0033","series-title":"2021\u202fIEEE\/RSJ International conference on intelligent robots and systems (IROS)","first-page":"4807","article-title":"Category-level 6D object pose estimation via cascaded relation and recurrent reconstruction networks","author":"Wang","year":"2021"},{"key":"10.1016\/j.neunet.2026.109096_bib0034","unstructured":"Wang, J., Li, J., Ding, L., Wang, Y., & Xu, T. (2021b). Papooling: Graph-based position adaptive aggregation of local geometry in point clouds. arXiv preprint:2111.14067."},{"key":"10.1016\/j.neunet.2026.109096_bib0035","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"13209","article-title":"CAPTRA: Category-level pose tracking for rigid and articulated objects from point clouds","author":"Weng","year":"2021"},{"key":"10.1016\/j.neunet.2026.109096_bib0036","doi-asserted-by":"crossref","first-page":"3906","DOI":"10.1109\/CVPR46437.2021.00390","article-title":"DSC-PoseNet: Learning 6DoF object pose estimation via dual-scale consistency","author":"Yang","year":"2021","journal-title":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"10.1016\/j.neunet.2026.109096_bib0037","first-page":"1","article-title":"6DoF object pose estimation via differentiable proxy voting regularizer","author":"Yu","year":"2020","journal-title":"31st British Machine Vision Conference, BMVC 2020"},{"key":"10.1016\/j.neunet.2026.109096_bib0038","series-title":"2018 International conference on 3D vision (3DV)","first-page":"728","article-title":"PCN: Point completion network","author":"Yuan","year":"2018"},{"key":"10.1016\/j.neunet.2026.109096_bib0039","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"1941","article-title":"Dpod: 6D pose object detector and refiner","author":"Zakharov","year":"2019"},{"key":"10.1016\/j.neunet.2026.109096_bib0040","unstructured":"Zhang, K., Fu, Y., Borse, S., Cai, H., Porikli, F., & Wang, X. (2022a). Self-supervised geometric correspondence for category-level 6D object pose estimation in the wild. arXiv: 2210.07199."},{"key":"10.1016\/j.neunet.2026.109096_bib0041","series-title":"European conference on computer vision","first-page":"655","article-title":"RBP-Pose: Residual bounding box projection for category-level pose estimation","author":"Zhang","year":"2022"},{"key":"10.1016\/j.neunet.2026.109096_bib0042","series-title":"2022\u202fIEEE\/RSJ International conference on intelligent robots and systems (IROS)","first-page":"7452","article-title":"SSP-POSE: Symmetry-aware shape prior deformation for direct category-level object pose estimation","author":"Zhang","year":"2022"},{"key":"10.1016\/j.neunet.2026.109096_bib0043","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"16259","article-title":"Point transformer","author":"Zhao","year":"2021"},{"key":"10.1016\/j.neunet.2026.109096_bib0044","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"17163","article-title":"HS-pose: Hybrid scope feature extraction for category-level object pose estimation","author":"Zheng","year":"2023"},{"key":"10.1016\/j.neunet.2026.109096_bib0045","doi-asserted-by":"crossref","first-page":"609","DOI":"10.1016\/j.neunet.2023.07.037","article-title":"MSSPA-GC: Multi-scale shape prior adaptation with 3D graph convolutions for category-level object pose estimation","volume":"166","author":"Zou","year":"2023","journal-title":"Neural Networks"}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026005563?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026005563?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T14:07:26Z","timestamp":1779372446000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608026005563"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":45,"alternative-id":["S0893608026005563"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2026.109096","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Cross attention-based prior deformation for category-level 6D pose estimation","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2026.109096","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"109096"}}