{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T18:05:06Z","timestamp":1781114706121,"version":"3.54.1"},"reference-count":43,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008238","name":"Hebei Provincial Department of Science And Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100008238","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.patcog.2026.113905","type":"journal-article","created":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T23:54:20Z","timestamp":1778025260000},"page":"113905","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PD","title":["CausalPose: Causal visuo-tactile fusion for robust 6-DoF object pose estimation"],"prefix":"10.1016","volume":"179","author":[{"given":"Peiliang","family":"Wu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7631-2459","authenticated-orcid":false,"given":"Yao","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuanzhi","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mingyue","family":"Niu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fengda","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ziying","family":"Song","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yongtao","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenbai","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113905_b1","series-title":"Proceedings of the IEEE\/RSJ International Conference on Intelligent Robots and Systems","first-page":"6839","article-title":"PoseFusion: Robust object-in-hand pose estimation with SelectLSTM","author":"Tu","year":"2023"},{"issue":"7","key":"10.1016\/j.patcog.2026.113905_b2","doi-asserted-by":"crossref","first-page":"1186","DOI":"10.1177\/02783649241301443","article-title":"Shared visuo-tactile interactive perception for robust object pose estimation","volume":"44","author":"Murali","year":"2025","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.patcog.2026.113905_b3","doi-asserted-by":"crossref","unstructured":"Y. Hai, R. Song, J. Li, M. Salzmann, Y. Hu, Rigidity-aware detection for 6D object pose estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2023, pp. 8927\u20138936.","DOI":"10.1109\/CVPR52729.2023.00862"},{"key":"10.1016\/j.patcog.2026.113905_b4","doi-asserted-by":"crossref","unstructured":"H. Qi, C. Zhao, M. Salzmann, A. Mathis, HOISDF: Constraining 3D hand-object pose estimation with global signed distance fields, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2024, pp. 10392\u201310402.","DOI":"10.1109\/CVPR52733.2024.00989"},{"key":"10.1016\/j.patcog.2026.113905_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109896","article-title":"Learning geometric consistency and discrepancy for category-level 6D object pose estimation from point clouds","volume":"145","author":"Zou","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113905_b6","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2025.103581","article-title":"CausalMixNet: A mixed-attention framework for causal intervention in robust medical image diagnosis","volume":"103","author":"Zhang","year":"2025","journal-title":"Med. Image Anal."},{"issue":"3","key":"10.1016\/j.patcog.2026.113905_b7","doi-asserted-by":"crossref","DOI":"10.1007\/s11263-025-02667-1","article-title":"Multimodal alignment and fusion: A survey","volume":"134","author":"Li","year":"2026","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.patcog.2026.113905_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111485","article-title":"Balanced multi-modal learning with hierarchical fusion for fake news detection","volume":"164","author":"Wu","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113905_b9","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.102200","article-title":"Uncertainty-aware incomplete multimodal fusion for few-shot CRAO classification","volume":"104","author":"Zhou","year":"2024","journal-title":"Inf. Fusion"},{"issue":"140","key":"10.1016\/j.patcog.2026.113905_b10","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.patcog.2026.113905_b11","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106347","article-title":"Multimodal information bottleneck for deep reinforcement learning with multiple sensors","volume":"176","author":"You","year":"2024","journal-title":"Neural Netw."},{"key":"10.1016\/j.patcog.2026.113905_b12","first-page":"49921","article-title":"VinT-6D: A large-scale object-in-hand dataset from vision, touch and proprioception","volume":"235","author":"Wan","year":"2024","journal-title":"Proc. Mach. Learn. Res."},{"key":"10.1016\/j.patcog.2026.113905_b13","doi-asserted-by":"crossref","unstructured":"G.M. Caddeo, N.A. Piga, F. Bottarel, L. Natale, Collision-aware in-hand 6D object pose estimation using multiple vision-based tactile sensors, in: Proceedings of the IEEE International Conference on Robotics and Automation, ICRA, 2023, pp. 719\u2013725.","DOI":"10.1109\/ICRA48891.2023.10160359"},{"issue":"2","key":"10.1016\/j.patcog.2026.113905_b14","doi-asserted-by":"crossref","first-page":"2148","DOI":"10.1109\/LRA.2022.3143289","article-title":"VisuoTactile 6D pose estimation of an in-hand object using vision and tactile sensor data","volume":"7","author":"Dikhale","year":"2022","journal-title":"IEEE Robot. Autom. Lett."},{"issue":"96","key":"10.1016\/j.patcog.2026.113905_b15","doi-asserted-by":"crossref","DOI":"10.1126\/scirobotics.adl0628","article-title":"NeuralFeels with neural fields: Visuotactile perception for in-hand manipulation","volume":"9","author":"Suresh","year":"2024","journal-title":"Sci. Robot."},{"key":"10.1016\/j.patcog.2026.113905_b16","doi-asserted-by":"crossref","unstructured":"H. Li, J. Akl, S. Sridhar, T. Brady, T. Pad\u0131r, ViTa-Zero: Zero-shot visuotactile object 6D pose estimation, in: Proceedings of the IEEE International Conference on Robotics and Automation, ICRA, 2025, pp. 16050\u201316057.","DOI":"10.1109\/ICRA55743.2025.11128495"},{"key":"10.1016\/j.patcog.2026.113905_b17","doi-asserted-by":"crossref","unstructured":"L. Mack, F. Gruninger, B.A. Richardson, R. Lendway, K.J. Kuchenbecker, J. Stueckler, Visuo-tactile object pose estimation for a multi-finger robot hand with low-resolution in-hand tactile sensing, in: Proceedings of the IEEE International Conference on Robotics and Automation, ICRA, 2025, pp. 12401\u201312407.","DOI":"10.1109\/ICRA55743.2025.11127966"},{"key":"10.1016\/j.patcog.2026.113905_b18","series-title":"ViTaSCOPE: Visuo-tactile implicit representation for in-hand pose and extrinsic contact estimation","author":"Lee","year":"2025"},{"key":"10.1016\/j.patcog.2026.113905_b19","doi-asserted-by":"crossref","unstructured":"N. Wandel, K. Krispin, et al., SemAlign3D: Semantic correspondence between RGB-images through aligning 3D object-class representations, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2025.","DOI":"10.1109\/CVPR52734.2025.00114"},{"issue":"7","key":"10.1016\/j.patcog.2026.113905_b20","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TPAMI.2025.3552132","article-title":"Diff9D: Diffusion-based domain-generalized category-level 9-DoF object pose estimation","volume":"47","author":"Liu","year":"2025","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2026.113905_b21","doi-asserted-by":"crossref","unstructured":"M. Liu, S. Li, A. Chhatkuli, P. Truong, L. Van Gool, F. Tombari, One2Any: One-reference 6D pose estimation for any object, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2025, pp. 6457\u20136467.","DOI":"10.1109\/CVPR52734.2025.00605"},{"key":"10.1016\/j.patcog.2026.113905_b22","doi-asserted-by":"crossref","unstructured":"J. Kim, J. Park, K. Lee, N.I. Cho, RefPose: Leveraging reference geometric correspondences for accurate 6D pose estimation of unseen objects, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2025, pp. 6447\u20136456.","DOI":"10.1109\/CVPR52734.2025.00604"},{"key":"10.1016\/j.patcog.2026.113905_b23","doi-asserted-by":"crossref","unstructured":"T. Lee, B. Wen, M. Kang, G. Kang, I.S. Kweon, K.-J. Yoon, Any6D: Model-free 6D pose estimation of novel objects, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2025, pp. 11633\u201311643.","DOI":"10.1109\/CVPR52734.2025.01086"},{"key":"10.1016\/j.patcog.2026.113905_b24","doi-asserted-by":"crossref","unstructured":"W. Deng, D. Campbell, C. Sun, J. Zhang, S. Kanitkar, M. Shaffer, S. Gould, Pos3R: 6D pose estimation for unseen objects made easy, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2025.","DOI":"10.1109\/CVPR52734.2025.01567"},{"key":"10.1016\/j.patcog.2026.113905_b25","doi-asserted-by":"crossref","unstructured":"A. Nguyen, S. Hu, E. Kasaura, C. Hartley, M. Salzmann, V. Lepetit, GigaPose: Fast and robust novel object pose estimation via one correspondence, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2024, pp. 9903\u20139913.","DOI":"10.1109\/CVPR52733.2024.00945"},{"key":"10.1016\/j.patcog.2026.113905_b26","doi-asserted-by":"crossref","unstructured":"S. Zhang, Y. Huang, W. Zhao, et al., Environment-Agnostic Pose: Generating Environment-independent Object Representations for 6D Pose Estimation, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2025, pp. 8678\u20138687.","DOI":"10.1109\/ICCV51701.2025.00812"},{"issue":"12","key":"10.1016\/j.patcog.2026.113905_b27","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TPAMI.2024.3417214","article-title":"When invariant representation learning meets label shift: Insufficiency and theoretical insights","volume":"46","author":"Luo","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2026.113905_b28","series-title":"Advances in Neural Information Processing Systems","article-title":"Plug-and-play feature causality decomposition for multimodal representation learning","author":"Liu","year":"2025"},{"key":"10.1016\/j.patcog.2026.113905_b29","first-page":"1","article-title":"Exploring invariance matters for domain generalization","volume":"34","author":"Wang","year":"2025","journal-title":"IEEE Trans. Image Process.: Publ. IEEE Signal Process. Soc."},{"key":"10.1016\/j.patcog.2026.113905_b30","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2025.102943","article-title":"TVT-transformer: A tactile-visual-textual fusion network for object recognition","volume":"118","author":"Li","year":"2025","journal-title":"Inf. Fusion"},{"issue":"4","key":"10.1016\/j.patcog.2026.113905_b31","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/LRA.2025.3536218","article-title":"CAFuser: Condition-aware multimodal fusion for robust semantic perception of driving scenes","volume":"10","author":"Br\u00f6dermann","year":"2025","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.patcog.2026.113905_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111413","article-title":"A novel 6DoF pose estimation method using transformer fusion","volume":"162","author":"Wang","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113905_b33","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111501","article-title":"Multimodal fusion via voting network for 3D object detection in indoors","volume":"164","author":"Li","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113905_b34","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.112591","article-title":"MCE: Towards a general framework for handling missing modalities under imbalanced missing rates","volume":"172","author":"Zhao","year":"2026","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113905_b35","doi-asserted-by":"crossref","unstructured":"H. Li, M. Jia, T. Akbulut, Y. Xiang, G. Konidaris, S. Sridhar, V-HOP: Visuo-haptic 6D object pose tracking, in: Proceedings of Robotics: Science and Systems, RSS, 2025.","DOI":"10.15607\/RSS.2025.XXI.037"},{"issue":"6","key":"10.1016\/j.patcog.2026.113905_b36","doi-asserted-by":"crossref","first-page":"1489","DOI":"10.1109\/JAS.2020.1003180","article-title":"Reinforcement learning based data fusion method for multi-sensors","volume":"7","author":"Zhou","year":"2020","journal-title":"IEEE\/CAA J. Autom. Sin."},{"key":"10.1016\/j.patcog.2026.113905_b37","doi-asserted-by":"crossref","unstructured":"R. Dai, C. Li, Y. Yan, et al., Unbiased missing-modality multimodal learning, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2025, pp. 24507\u201324517.","DOI":"10.1109\/ICCV51701.2025.02272"},{"key":"10.1016\/j.patcog.2026.113905_b38","series-title":"Causality: Models, Reasoning, and Inference","author":"Pearl","year":"2009"},{"key":"10.1016\/j.patcog.2026.113905_b39","unstructured":"C. Yang, X. Li, et al., VITA: A multi-modal dataset for object pose estimation with vision and tactile sensing, in: Proceedings of the IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS, 2022."},{"key":"10.1016\/j.patcog.2026.113905_b40","doi-asserted-by":"crossref","unstructured":"D. Turpin, L. Wang, S. Tsogkas, S. Dickinson, A. Garg, Fast-Grasp\u2019D: Dexterous multi-finger grasp generation through differentiable simulation, in: Proceedings of the IEEE International Conference on Robotics and Automation, ICRA, 2023, pp. 8082\u20138089.","DOI":"10.1109\/ICRA48891.2023.10160314"},{"issue":"2","key":"10.1016\/j.patcog.2026.113905_b41","doi-asserted-by":"crossref","first-page":"1106","DOI":"10.1109\/LRA.2023.3337690","article-title":"Enhancing generalizable 6D pose tracking of an in-hand object with tactile sensing","volume":"9","author":"Liu","year":"2024","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.patcog.2026.113905_b42","first-page":"1","article-title":"AxisPose: Model-free matching-free single-shot 6D object pose estimation via axis generation","author":"Zou","year":"2026","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"8","key":"10.1016\/j.patcog.2026.113905_b43","doi-asserted-by":"crossref","first-page":"8284","DOI":"10.1109\/LRA.2025.3585384","article-title":"HIPPo: Harnessing image-to-3D priors for model-free zero-shot 6D pose estimation","volume":"10","author":"Liu","year":"2025","journal-title":"IEEE Robot. Autom. Lett."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326008708?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326008708?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T17:15:53Z","timestamp":1781111753000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326008708"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":43,"alternative-id":["S0031320326008708"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113905","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"CausalPose: Causal visuo-tactile fusion for robust 6-DoF object pose estimation","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113905","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113905"}}