{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,25]],"date-time":"2025-12-25T16:04:06Z","timestamp":1766678646063,"version":"3.37.3"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"17","license":[{"start":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T00:00:00Z","timestamp":1699660800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T00:00:00Z","timestamp":1699660800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62273318"],"award-info":[{"award-number":["62273318"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-17626-6","type":"journal-article","created":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T06:01:30Z","timestamp":1699682490000},"page":"53043-53063","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Bi-directional attention based RGB-D fusion for category-level object pose and shape estimation"],"prefix":"10.1007","volume":"83","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5308-0663","authenticated-orcid":false,"given":"Kaifeng","family":"Tang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5301-9376","authenticated-orcid":false,"given":"Chi","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1170-4276","authenticated-orcid":false,"given":"Ming","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,11]]},"reference":[{"key":"17626_CR1","doi-asserted-by":"publisher","unstructured":"Avetisyan A, Dahnert M, Dai A, et\u00a0al (2019) Scan2cad: Learning cad model alignment in rgb-d scans. In: IEEE Conf Comput Vis Pattern Recognit, Long Beach, CA, USA, pp 2614\u20132623. https:\/\/doi.org\/10.1109\/CVPR.2019.00272","DOI":"10.1109\/CVPR.2019.00272"},{"key":"17626_CR2","doi-asserted-by":"publisher","unstructured":"Brachmann E, Krull A, Michel F, et\u00a0al (2014) Learning 6d object pose estimation using 3d object coordinates. In: Eur Conf Comput Vis, pp 536\u2013551. https:\/\/doi.org\/10.1007\/978-3-319-10605-2_35","DOI":"10.1007\/978-3-319-10605-2_35"},{"key":"17626_CR3","doi-asserted-by":"publisher","unstructured":"Chen CFR, Fan Q, Panda R (2021a) Crossvit: Cross-attention multi-scale vision transformer for image classification. In: IEEE Int Conf Comput Vis, Montreal, QC, Canada, pp 357\u2013366. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00041","DOI":"10.1109\/ICCV48922.2021.00041"},{"key":"17626_CR4","doi-asserted-by":"publisher","unstructured":"Chen D, Li J, Wang Z, et\u00a0al (2020) Learning canonical shape space for category-level 6d object pose and size estimation. In: IEEE Conf Comput Vis Pattern Recognit, Seattle, WA, USA, pp 11973\u201311982. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01199","DOI":"10.1109\/CVPR42600.2020.01199"},{"key":"17626_CR5","doi-asserted-by":"publisher","unstructured":"Chen K, Dou Q (2021) Sgpa: Structure-guided prior adaptation for category-level 6d object pose estimation. In: IEEE Int Conf Comput Vis, Montreal, QC, Canada, pp 2773\u20132782. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00277","DOI":"10.1109\/ICCV48922.2021.00277"},{"key":"17626_CR6","doi-asserted-by":"publisher","unstructured":"Chen W, Jia X, Chang HJ, et\u00a0al (2021b) Fs-net: Fast shape-based network for category-level 6d object pose estimation with decoupled rotation mechanism. In: IEEE Conf Comput Vis Pattern Recognit, Nashville, TN, USA, pp 1581\u20131590. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00163","DOI":"10.1109\/CVPR46437.2021.00163"},{"key":"17626_CR7","doi-asserted-by":"publisher","unstructured":"Chen X, Ma H, Wan J, et\u00a0al (2017) Multi-view 3d object detection network for autonomous driving. In: IEEE Conf Comput Vis Pattern Recognit, Honolulu, HI, USA, pp 1907\u20131915. https:\/\/doi.org\/10.1109\/CVPR.2017.691","DOI":"10.1109\/CVPR.2017.691"},{"key":"17626_CR8","doi-asserted-by":"publisher","unstructured":"Di Y, Zhang R, Lou Z, et\u00a0al (2022) Gpv-pose: Category-level object pose estimation via geometry-guided point-wise voting. In: IEEE Conf Comput Vis Pattern Recognit, New Orleans, LA, USA, pp 6781\u20136791. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00666","DOI":"10.1109\/CVPR52688.2022.00666"},{"key":"17626_CR9","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, et\u00a0al (2020) An image is worth 16x16 words: Transformers for image recognition at scale. arXiv:2010.11929"},{"issue":"9","key":"17626_CR10","doi-asserted-by":"publisher","first-page":"5315","DOI":"10.1109\/LRA.2023.3293317","volume":"8","author":"F Duffhauss","year":"2023","unstructured":"Duffhauss F, Koch S, Ziesche H et al (2023) Symfm6d: Symmetry-aware multi-directional fusion for multi-view 6d object pose estimation. IEEE Robot Autom Lett 8(9):5315\u20135322. https:\/\/doi.org\/10.1109\/LRA.2023.3293317","journal-title":"IEEE Robot Autom Lett"},{"key":"17626_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2022.108310","volume":"103","author":"D Fan","year":"2022","unstructured":"Fan D, Wan L, Xu W et al (2022) A bi-directional attention guided cross-modal network for music based dance generation. Comput Electr Eng 103:108310. https:\/\/doi.org\/10.1016\/j.compeleceng.2022.108310","journal-title":"Comput Electr Eng"},{"issue":"6","key":"17626_CR12","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1145\/358669.358692","volume":"24","author":"MA Fischler","year":"1981","unstructured":"Fischler MA, Bolles RC (1981) Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography. Commun ACM 24(6):381\u2013395. https:\/\/doi.org\/10.1145\/358669.358692","journal-title":"Commun ACM"},{"issue":"2","key":"17626_CR13","doi-asserted-by":"publisher","first-page":"216","DOI":"10.1109\/MNET.001.1900260","volume":"34","author":"Z Gao","year":"2020","unstructured":"Gao Z, Zhang H, Dong S et al (2020) Salient object detection in the distributed cloud-edge intelligent network. IEEE Netw 34(2):216\u2013224. https:\/\/doi.org\/10.1109\/MNET.001.1900260","journal-title":"IEEE Netw"},{"key":"17626_CR14","doi-asserted-by":"publisher","unstructured":"Georgakis G, Karanam S, Wu Z, et\u00a0al (2019) Learning local rgb-to-cad correspondences for object pose estimation. In: IEEE Int Conf Comput Vis, Seoul, Korea, pp 8967\u20138976. https:\/\/doi.org\/10.1109\/ICCV.2019.00906","DOI":"10.1109\/ICCV.2019.00906"},{"key":"17626_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2022.3170972","volume":"71","author":"F Gu","year":"2022","unstructured":"Gu F, Lu J, Cai C (2022) Rpformer: A robust parallel transformer for visual tracking in complex scenes. IEEE Trans Instrum Meas 71:1\u201314. https:\/\/doi.org\/10.1109\/TIM.2022.3170972","journal-title":"IEEE Trans Instrum Meas"},{"key":"17626_CR16","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-15168-5","author":"F Gu","year":"2023","unstructured":"Gu F, Lu J, Cai C (2023) A robust attention-enhanced network with transformer for visual tracking. Multimed Tools Appl. https:\/\/doi.org\/10.1007\/s11042-023-15168-5","journal-title":"Multimed Tools Appl"},{"key":"17626_CR17","doi-asserted-by":"publisher","first-page":"4989","DOI":"10.1109\/TIP.2023.3308750","volume":"32","author":"F Guo","year":"2023","unstructured":"Guo F, Jin T, Zhu S et al (2023) B2c-afm: Bi-directional co-temporal and cross-spatial attention fusion model for human action recognition. IEEE Trans Image Process 32:4989\u20135003. https:\/\/doi.org\/10.1109\/TIP.2023.3308750","journal-title":"IEEE Trans Image Process"},{"key":"17626_CR18","doi-asserted-by":"publisher","unstructured":"Hao T, Mohit B (2019) Lxmert: Learning cross-modality encoder representations from transformers. EMNLP-IJCNLP. https:\/\/doi.org\/10.18653\/v1\/d19-1514","DOI":"10.18653\/v1\/d19-1514"},{"key":"17626_CR19","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, et\u00a0al (2016) Deep residual learning for image recognition. In: IEEE Conf Comput Vis Pattern Recognit, Las Vegas, NV, USA, pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"17626_CR20","doi-asserted-by":"publisher","unstructured":"He K, Gkioxari G, Dollar P, et\u00a0al (2017) Mask r-cnn. In: IEEE Int Conf Comput Vis, Venice, Italy, pp 2961\u20132969. https:\/\/doi.org\/10.1109\/ICCV.2017.322","DOI":"10.1109\/ICCV.2017.322"},{"key":"17626_CR21","doi-asserted-by":"publisher","unstructured":"He Y, Sun W, Huang H, et\u00a0al (2020) Pvn3d: A deep point-wise 3d keypoints voting network for 6dof pose estimation. In: IEEE Conf Comput Vis Pattern Recognit, pp 11629\u201311638. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01165","DOI":"10.1109\/CVPR42600.2020.01165"},{"key":"17626_CR22","doi-asserted-by":"publisher","unstructured":"He Y, Huang H, Fan H, et\u00a0al (2021) Ffb6d: A full flow bidirectional fusion network for 6d pose estimation. In: IEEE Conf Comput Vis Pattern Recognit, Nashville, TN, USA, pp 3003\u20133013. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00302","DOI":"10.1109\/CVPR46437.2021.00302"},{"key":"17626_CR23","doi-asserted-by":"publisher","unstructured":"He Y, Wang Y, Fan H, et\u00a0al (2022) Fs6d: Few-shot 6d pose estimation of novel objects. In: IEEE Conf Comput Vis Pattern Recognit, New Orleans, LA, USA, pp 6814\u20136824. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00669","DOI":"10.1109\/CVPR52688.2022.00669"},{"issue":"5","key":"17626_CR24","doi-asserted-by":"publisher","first-page":"876","DOI":"10.1109\/TPAMI.2011.206","volume":"34","author":"S Hinterstoisser","year":"2012","unstructured":"Hinterstoisser S, Cagniart C, Ilic S et al (2012) Gradient Response Maps for Real-Time Detection of Textureless Objects. IEEE Trans Pattern Anal Mach Intell 34(5):876\u2013888. https:\/\/doi.org\/10.1109\/TPAMI.2011.206","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17626_CR25","doi-asserted-by":"publisher","unstructured":"Hinterstoisser S, Lepetit V, Ilic S, et\u00a0al (2012b) Model based training, detection and pose estimation of texture-less 3d objects in heavily cluttered scenes. In: Asian Conf Comput Vis, Daejeon, Korea, pp 548\u2013562, https:\/\/doi.org\/10.1007\/978-3-642-37331-2sps42","DOI":"10.1007\/978-3-642-37331-2sps42"},{"key":"17626_CR26","doi-asserted-by":"publisher","unstructured":"Hoda\u0148 T, Zabulis X, Lourakis M, et\u00a0al (2015) Detection and fine 3d pose estimation of texture-less objects in rgb-d images. In: IEEE Int Conf Intell Robot Syst, Hamburg, Germany, pp 4421\u20134428. https:\/\/doi.org\/10.1109\/IROS.2015.7354005","DOI":"10.1109\/IROS.2015.7354005"},{"key":"17626_CR27","doi-asserted-by":"publisher","unstructured":"Kehl W, Milletari F, Tombari F, et\u00a0al (2016) Deep learning of local rgb-d patches for 3d object detection and 6d pose estimation. In: Eur Conf Comput Vis, pp 205\u2013220. https:\/\/doi.org\/10.1007\/978-3-319-46487-9sps13","DOI":"10.1007\/978-3-319-46487-9sps13"},{"key":"17626_CR28","doi-asserted-by":"publisher","unstructured":"Konishi Y, Hattori K, Hashimoto M (2019) Real-Time 6D Object Pose Estimation on CPU. In: IEEE Int Conf Intell Robot Syst, Macau, China, pp 3451\u20133458. https:\/\/doi.org\/10.1109\/IROS40897.2019.8967967","DOI":"10.1109\/IROS40897.2019.8967967"},{"key":"17626_CR29","doi-asserted-by":"publisher","unstructured":"Li G, Zhu D, Zhang G, et\u00a0al (2023) Sd-pose: Structural discrepancy aware category-level 6d object pose estimation. In: IEEE Winter Conf Appl Comput Vis, Waikoloa, HI, USA, pp 5685\u20135694. https:\/\/doi.org\/10.1109\/WACV56688.2023.00564","DOI":"10.1109\/WACV56688.2023.00564"},{"key":"17626_CR30","doi-asserted-by":"publisher","unstructured":"Li J, Meng Y, Wu Z, et\u00a0al (2022) Neufa: Neural Network Based End-to-End Forced Alignment with Bidirectional Attention Mechanism. In: IEEE Int Conf Acoust Speech Signal Process, Singapore, pp 8007\u20138011. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747085, iSSN: 2379-190X","DOI":"10.1109\/ICASSP43922.2022.9747085"},{"key":"17626_CR31","doi-asserted-by":"publisher","unstructured":"Lin J, Wei Z, Li Z, et\u00a0al (2021) Dualposenet: Category-level 6d object pose and size estimation using dual pose network with refined learning of pose consistency. In: IEEE Int Conf Comput Vis, Montreal, QC, Canada, pp 3560\u20133569. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00354","DOI":"10.1109\/ICCV48922.2021.00354"},{"key":"17626_CR32","doi-asserted-by":"publisher","unstructured":"Liu C, Sun W, Liu J, et\u00a0al (2023a) Fine segmentation and difference-aware shape adjustment for category-level 6dof object pose estimation. Appl Intell pp 1\u201318. https:\/\/doi.org\/10.1007\/s10489-023-04688-0","DOI":"10.1007\/s10489-023-04688-0"},{"issue":"10","key":"17626_CR33","doi-asserted-by":"publisher","first-page":"6728","DOI":"10.1109\/TCSVT.2022.3169144","volume":"32","author":"J Liu","year":"2022","unstructured":"Liu J, Cao Z, Tang Y et al (2022) Category-Level 6D Object Pose Estimation With Structure Encoder and Reasoning Attention. IEEE Trans Circuits Syst Video Technol 32(10):6728\u20136740. https:\/\/doi.org\/10.1109\/TCSVT.2022.3169144","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"17626_CR34","doi-asserted-by":"publisher","unstructured":"Liu J, Sun W, Liu C, et\u00a0al (2023b) Robotic Continuous Grasping System by Shape Transformer-Guided Multi-Object Category-Level 6D Pose Estimation. IEEE Trans Industr Inform pp 1\u201311. https:\/\/doi.org\/10.1109\/TII.2023.3244348","DOI":"10.1109\/TII.2023.3244348"},{"key":"17626_CR35","doi-asserted-by":"publisher","unstructured":"Liu P, Zhang Q, Cheng J (2023c) Bdr6d: Bidirectional deep residual fusion network for 6d pose estimation. IEEE Trans Autom Sci Eng pp 1\u201312. https:\/\/doi.org\/10.1109\/TASE.2023.3248843","DOI":"10.1109\/TASE.2023.3248843"},{"key":"17626_CR36","doi-asserted-by":"publisher","unstructured":"Liu P, Zhang Q, Cheng J (2023d) GSNet: Model reconstruction network for category-level 6d object pose and size estimation. In: IEEE Int Conf Robot Autom, London, United Kingdom, pp 2898\u20132904, https:\/\/doi.org\/10.1109\/ICRA48891.2023.10160688","DOI":"10.1109\/ICRA48891.2023.10160688"},{"key":"17626_CR37","doi-asserted-by":"publisher","unstructured":"Oberweger M, Rad M, Lepetit V (2018) Making deep heatmaps robust to partial occlusions for 3d object pose estimation. In: Eur Conf Comput Vis, pp 119\u2013134. https:\/\/doi.org\/10.1007\/978-3-030-01267-0sps8","DOI":"10.1007\/978-3-030-01267-0sps8"},{"key":"17626_CR38","doi-asserted-by":"publisher","unstructured":"Peng S, Liu Y, Huang Q, et\u00a0al (2019) Pvnet: Pixel-wise voting network for 6dof pose estimation. In: IEEE Conf Comput Vis Pattern Recognit, pp 4561\u20134570. https:\/\/doi.org\/10.1109\/TPAMI.2020.3047388","DOI":"10.1109\/TPAMI.2020.3047388"},{"key":"17626_CR39","unstructured":"Qi CR, Yi L, Su H, et\u00a0al (2017) Pointnet++: Deep hierarchical feature learning on point sets in a metric space. In: Adv Neural Inf Process Syst"},{"key":"17626_CR40","doi-asserted-by":"publisher","unstructured":"Rad M, Lepetit V (2017) Bb8: A scalable, accurate, robust to partial occlusion method for predicting the 3d poses of challenging objects without using depth. In: IEEE Int Conf Comput Vis, Venice, Italy, pp 3828\u20133836. https:\/\/doi.org\/10.1109\/ICCV.2017.413","DOI":"10.1109\/ICCV.2017.413"},{"key":"17626_CR41","doi-asserted-by":"publisher","unstructured":"Rad M, Oberweger M, Lepetit V (2018) Feature mapping for learning fast and accurate 3d pose inference from synthetic images. In: IEEE Conf Comput Vis Pattern Recognit, Salt Lake City, UT, USA, pp 4663\u20134672. https:\/\/doi.org\/10.1109\/CVPR.2018.00490","DOI":"10.1109\/CVPR.2018.00490"},{"key":"17626_CR42","doi-asserted-by":"crossref","unstructured":"Saleh M, Wang Y, Navab N, et\u00a0al (2022) Cloudattention: Efficient multi-scale attention scheme for 3d point cloud learning. arXiv:2208.00524","DOI":"10.1109\/IROS47612.2022.9982276"},{"key":"17626_CR43","doi-asserted-by":"publisher","unstructured":"Tian M, Ang MH, Lee GH (2020) Shape prior deformation for categorical 6d object pose and size estimation. In: Eur Conf Comput Vis, pp 530\u2013546. https:\/\/doi.org\/10.1007\/978-3-030-58589-1sps32","DOI":"10.1007\/978-3-030-58589-1sps32"},{"key":"17626_CR44","unstructured":"Tremblay J, To T, Sundaralingam B, et\u00a0al (2018) Deep object pose estimation for semantic robotic grasping of household objects. arXiv:1809.10790"},{"issue":"04","key":"17626_CR45","doi-asserted-by":"publisher","first-page":"376","DOI":"10.1109\/34.88573","volume":"13","author":"S Umeyama","year":"1991","unstructured":"Umeyama S (1991) Least-squares estimation of transformation parameters between two point patterns. IEEE Trans Pattern Anal Mach Intell 13(04):376\u2013380. https:\/\/doi.org\/10.1109\/34.88573","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17626_CR46","unstructured":"Vaswani A, Shazeer N, Parmar N, et\u00a0al (2017) Attention is all you need. In: Adv Neural Inf Process Syst"},{"key":"17626_CR47","doi-asserted-by":"publisher","unstructured":"Wang C, Xu D, Zhu Y, et\u00a0al (2019a) Densefusion: 6d object pose estimation by iterative dense fusion. In: IEEE Conf Comput Vis Pattern Recognit, Long Beach, CA, USA, pp 3343\u20133352. https:\/\/doi.org\/10.1109\/CVPR.2019.00346","DOI":"10.1109\/CVPR.2019.00346"},{"key":"17626_CR48","doi-asserted-by":"publisher","unstructured":"Wang C, Mart\u00edn-Mart\u00edn R, Xu D, et\u00a0al (2020) 6-pack: Category-level 6d pose tracker with anchor-based keypoints. In: IEEE Int Conf Robot Autom, Paris, France, pp 10059\u201310066. https:\/\/doi.org\/10.1109\/ICRA40945.2020.9196679","DOI":"10.1109\/ICRA40945.2020.9196679"},{"key":"17626_CR49","doi-asserted-by":"publisher","unstructured":"Wang H, Sridhar S, Huang J, et\u00a0al (2019b) Normalized object coordinate space for category-level 6d object pose and size estimation. In: IEEE Conf Comput Vis Pattern Recognit, Long Beach, CA, USA, pp 2642\u20132651. https:\/\/doi.org\/10.1109\/CVPR.2019.00275","DOI":"10.1109\/CVPR.2019.00275"},{"key":"17626_CR50","doi-asserted-by":"publisher","unstructured":"Wang H, Li W, Kim J, et\u00a0al (2022a) Attention-guided RGB-D Fusion Network for Category-level 6D Object Pose Estimation. In: IEEE Int Conf Intell Robot Syst, Kyoto, Japan, pp 10651\u201310658. https:\/\/doi.org\/10.1109\/IROS47612.2022.9981242","DOI":"10.1109\/IROS47612.2022.9981242"},{"key":"17626_CR51","doi-asserted-by":"publisher","unstructured":"Wang J, Chen K, Dou Q (2021) Category-Level 6D Object Pose Estimation via Cascaded Relation and Recurrent Reconstruction Networks. In: IEEE Int Conf Intell Robot Syst, Prague, Czech Republic, pp 4807\u20134814. https:\/\/doi.org\/10.1109\/IROS51168.2021.9636212","DOI":"10.1109\/IROS51168.2021.9636212"},{"issue":"5","key":"17626_CR52","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3326362","volume":"38","author":"Y Wang","year":"2019","unstructured":"Wang Y, Sun Y, Liu Z et al (2019) Dynamic graph cnn for learning on point clouds. ACM Trans Graph 38(5):1\u201312. https:\/\/doi.org\/10.1145\/3326362","journal-title":"ACM Trans Graph"},{"key":"17626_CR53","doi-asserted-by":"publisher","DOI":"10.1007\/s12652-022-03874-1","author":"Y Wang","year":"2022","unstructured":"Wang Y, Jiang X, Fujita H et al (2022) EFN6D: an efficient RGB-D fusion network for 6D pose estimation. J Ambient Intell Humaniz Comput. https:\/\/doi.org\/10.1007\/s12652-022-03874-1","journal-title":"J Ambient Intell Humaniz Comput"},{"key":"17626_CR54","doi-asserted-by":"publisher","unstructured":"Wu C, Chen L, Wu S (2022) Cross-attention-based reflection-aware 6d pose estimation network for non-lambertian objects from rgb images. Machines 10(12). https:\/\/doi.org\/10.3390\/machines10121107","DOI":"10.3390\/machines10121107"},{"key":"17626_CR55","doi-asserted-by":"publisher","unstructured":"Wu Z, Song S, Khosla A, et\u00a0al (2015) 3d shapenets: A deep representation for volumetric shapes. In: IEEE Conf Comput Vis Pattern Recognit, Boston, MA, USA, pp 1912\u20131920. https:\/\/doi.org\/10.1109\/CVPR.2015.7298801","DOI":"10.1109\/CVPR.2015.7298801"},{"key":"17626_CR56","doi-asserted-by":"crossref","unstructured":"Xiang Y, Schmidt T, Narayanan V, et\u00a0al (2017) Posecnn: A convolutional neural network for 6d object pose estimation in cluttered scenes. arXiv preprint arXiv:1711.00199","DOI":"10.15607\/RSS.2018.XIV.019"},{"key":"17626_CR57","doi-asserted-by":"publisher","unstructured":"Xu D, Anguelov D, Jain A (2018) Pointfusion: Deep sensor fusion for 3d bounding box estimation. In: IEEE Conf Comput Vis Pattern Recognit, Salt Lake City, UT, USA, pp 244\u2013253. https:\/\/doi.org\/10.1109\/CVPR.2018.00033","DOI":"10.1109\/CVPR.2018.00033"},{"key":"17626_CR58","doi-asserted-by":"publisher","unstructured":"Yu X, Rao Y, Wang Z, et\u00a0al (2021) Pointr: Diverse point cloud completion with geometry-aware transformers. In: IEEE Int Conf Comput Vis, Montreal, QC, Canada, pp 12498\u201312507. https:\/\/doi.org\/10.1109\/ICCV48922.2021.01227","DOI":"10.1109\/ICCV48922.2021.01227"},{"key":"17626_CR59","doi-asserted-by":"publisher","unstructured":"Zeng A, Yu KT, Song S, et\u00a0al (2017) Multi-view self-supervised deep learning for 6D pose estimation in the Amazon Picking Challenge. In: IEEE Int Conf Robot Autom, Singapore, pp 1386\u20131383. https:\/\/doi.org\/10.1109\/ICRA.2017.7989165","DOI":"10.1109\/ICRA.2017.7989165"},{"key":"17626_CR60","doi-asserted-by":"publisher","unstructured":"Zhou C, Luo Z, Luo Y, et\u00a0al (2022a) Pttr: Relational 3d point cloud object tracking with transformer. In: IEEE Conf Comput Vis Pattern Recognit, New Orleans, LA, USA, pp 8531\u20138540. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00834","DOI":"10.1109\/CVPR52688.2022.00834"},{"key":"17626_CR61","doi-asserted-by":"publisher","first-page":"1630","DOI":"10.1109\/TMM.2020.3001533","volume":"23","author":"G Zhou","year":"2021","unstructured":"Zhou G, Yan Y, Wang D et al (2021) A novel depth and color feature fusion framework for 6d object pose estimation. IEEE Trans Multimedia 23:1630\u20131639. https:\/\/doi.org\/10.1109\/TMM.2020.3001533","journal-title":"IEEE Trans Multimedia"},{"key":"17626_CR62","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108468","volume":"124","author":"H Zhou","year":"2022","unstructured":"Zhou H, Qi L, Huang H et al (2022) CANet: Co-attention network for RGB-D semantic segmentation. Pattern Recognit 124:108468. https:\/\/doi.org\/10.1016\/j.patcog.2021.108468","journal-title":"Pattern Recognit"},{"key":"17626_CR63","doi-asserted-by":"publisher","unstructured":"Zhu M, Derpanis KG, Yang Y, et\u00a0al (2014) Single image 3d object detection and pose estimation for grasping. In: IEEE Int Conf Robot Autom, Hong Kong, China, pp 3936\u20133943. https:\/\/doi.org\/10.1109\/ICRA.2014.6907430","DOI":"10.1109\/ICRA.2014.6907430"},{"key":"17626_CR64","doi-asserted-by":"publisher","first-page":"6907","DOI":"10.1109\/TIP.2022.3216980","volume":"31","author":"L Zou","year":"2022","unstructured":"Zou L, Huang Z, Gu N et al (2022) 6d-vit: Category-level 6d object pose estimation via transformer-based instance representation learning. IEEE Trans Image Proc 31:6907\u20136921. https:\/\/doi.org\/10.1109\/TIP.2022.3216980","journal-title":"IEEE Trans Image Proc"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17626-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-17626-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17626-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,15]],"date-time":"2024-05-15T07:37:24Z","timestamp":1715758644000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-17626-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,11]]},"references-count":64,"journal-issue":{"issue":"17","published-online":{"date-parts":[[2024,5]]}},"alternative-id":["17626"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-17626-6","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2023,11,11]]},"assertion":[{"value":"26 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 September 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 October 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 November 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}