{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:45:32Z","timestamp":1757313932596,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"19","license":[{"start":{"date-parts":[[2021,4,8]],"date-time":"2021-04-08T00:00:00Z","timestamp":1617840000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,4,8]],"date-time":"2021-04-08T00:00:00Z","timestamp":1617840000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1007\/s00521-020-05644-6","type":"journal-article","created":{"date-parts":[[2021,4,8]],"date-time":"2021-04-08T19:07:39Z","timestamp":1617908859000},"page":"12283-12300","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Unreal mask: one-shot multi-object class-based pose estimation for robotic manipulation using keypoints with a synthetic dataset"],"prefix":"10.1007","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4808-478X","authenticated-orcid":false,"given":"S. H.","family":"Zabihifar","sequence":"first","affiliation":[]},{"given":"A. N.","family":"Semochkin","sequence":"additional","affiliation":[]},{"given":"E. V.","family":"Seliverstova","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Efimov","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,4,8]]},"reference":[{"key":"5644_CR1","doi-asserted-by":"crossref","unstructured":"Milan A, et al. (2018) Semantic segmentation from limited training data. In: Proceedings-IEEE international conference on robotics and automation, pp 1908\u20131915","DOI":"10.1109\/ICRA.2018.8461082"},{"key":"5644_CR2","doi-asserted-by":"crossref","unstructured":"Schwarz M, et al. (2018) Fast object learning and dual-arm coordination for cluttered stowing, picking, and packing. In: Proceedings-IEEE international conference on robotics and automation, pp 3347\u20133354","DOI":"10.1109\/ICRA.2018.8461195"},{"key":"5644_CR3","first-page":"5784","volume":"2017","author":"JM Wong","year":"2017","unstructured":"Wong JM et al (2017) SegICP: integrated deep semantic segmentation and pose estimation. IEEE Int Conf Intell Robot Syst 2017:5784\u20135789","journal-title":"IEEE Int Conf Intell Robot Syst"},{"key":"5644_CR4","doi-asserted-by":"crossref","unstructured":"Marion P, Florence PR, Manuelli L, Tedrake R (2018) Label fusion: a pipeline for generating ground truth labels for real RGBD data of cluttered scenes. In: Proceedings-IEEE international conference on robotics and automation, pp 3235\u20133242","DOI":"10.1109\/ICRA.2018.8460950"},{"key":"5644_CR5","doi-asserted-by":"crossref","unstructured":"Pavlakos G, Zhou X, Chan A, Derpanis KG, Daniilidis K (2017) \u201c6-DoF object pose from semantic keypoints. In: Proceedings-IEEE international conference on robotics and automation, pp 2011\u20132018","DOI":"10.1109\/ICRA.2017.7989233"},{"key":"5644_CR6","unstructured":"Wu Y, Kirillov A, Massa F, Lo WY, Girshick R (2019) Detectron2. https:\/\/github.com\/facebookresearch\/detectron2"},{"key":"5644_CR7","unstructured":"Manuelli L, Gao W, Florence P, Tedrake R (2019) kPAM: keypoint affordances for category-level robotic manipulation. arXiv, pp 1\u201310"},{"key":"5644_CR8","doi-asserted-by":"crossref","unstructured":"Hu Y, Hugonot J, Fua P, Salzmann M (2019) Segmentation-driven 6D object pose estimation. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol 2019, pp 3380\u20133389","DOI":"10.1109\/CVPR.2019.00350"},{"key":"5644_CR9","doi-asserted-by":"crossref","unstructured":"Zakharov S, Shugurov I, Ilic S (2019) DPOD: 6D pose object detector and refiner In: IEEE international conference on computer vision (ICCV), pp 1941\u20131950","DOI":"10.1109\/ICCV.2019.00203"},{"key":"5644_CR10","doi-asserted-by":"crossref","unstructured":"Rad M, Lepetit V (2017) BB8: a scalable, accurate, robust to partial occlusion method for predicting the 3D poses of challenging objects without using depth. In: Proceedings of the IEEE international conference on computer vision, vol 2017, pp 3848\u20133856","DOI":"10.1109\/ICCV.2017.413"},{"key":"5644_CR11","doi-asserted-by":"crossref","unstructured":"Tekin B, Sinha SN, Fua P (2018) Real-time seamless single shot 6D object pose prediction. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2018.00038"},{"key":"5644_CR12","doi-asserted-by":"crossref","unstructured":"Kehl W, Manhardt F, Tombari F, Ilic S, Navab N (2017) SSD-6D: making RGB-based 3D detection and 6D pose estimation great again. In: Proceedings of the IEEE international conference on computer vision, vol 2017, pp 1530\u20131538","DOI":"10.1109\/ICCV.2017.169"},{"key":"5644_CR13","doi-asserted-by":"crossref","unstructured":"Song C, Song J, Huang Q (2020) HybridPose: 6D object pose estimation under hybrid representations. http:\/\/arxiv.org\/abs\/2001.01869","DOI":"10.1109\/CVPR42600.2020.00051"},{"key":"5644_CR14","doi-asserted-by":"crossref","unstructured":"Peng S, Liu Y, Huang Q, Zhou X, Bao H (2019) PVNET: pixel-wise voting network for 6dof pose estimation. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2019.00469"},{"key":"5644_CR15","doi-asserted-by":"publisher","first-page":"657","DOI":"10.1007\/s11263-019-01250-9","volume":"128","author":"Y Li","year":"2019","unstructured":"Li Y, Wang G, Ji X, Xiang Y, Fox D (2019) DeepIM: deep iterative matching for 6D pose estimation. Int J Comput Vis 128:657","journal-title":"Int J Comput Vis"},{"issue":"2","key":"5644_CR16","doi-asserted-by":"publisher","first-page":"420","DOI":"10.1109\/LRA.2016.2634089","volume":"2","author":"T Schmidt","year":"2017","unstructured":"Schmidt T, Newcombe R, Fox D (2017) Self-supervised visual descriptor learning for dense correspondence. IEEE Robot Autom Lett 2(2):420\u2013427","journal-title":"IEEE Robot Autom Lett"},{"key":"5644_CR17","unstructured":"Chang AX et al (2015) ShapeNet: an information-rich 3D model repository, arXiv [cs.GR]. Available at: arXiv:1512.03012"},{"key":"5644_CR18","unstructured":"Tremblay J, To T, Sundaralingam B, Xiang Y, Fox D, Birchfield S (2018) Deep object pose estimation for semantic robotic grasping of household objects. In: 2nd Conference on robot learning, no. CoRL, pp 1\u201311"},{"key":"5644_CR19","doi-asserted-by":"crossref","unstructured":"Hinterstoisser S, Lepetit V, Wohlhart P, Konolige K (2019) On pre-trained image features and synthetic images for deep learning. In: Lecture notes in computer science (including lecture notes in artificial intelligence and lecture notes in bioinformatics), vol 11129, pp 682\u2013697","DOI":"10.1007\/978-3-030-11009-3_42"},{"key":"5644_CR20","unstructured":"Nikolenko SI (2019) Synthetic data for deep learning, arXiv [cs.LG]. Available at: arXiv:1909.11512"},{"key":"5644_CR21","doi-asserted-by":"crossref","unstructured":"Gupta S, Arbel\u00e1ez P, Girshick R, Malik J (2015) Aligning 3D models to RGB-D images of cluttered scenes. In: IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2015.7299105"},{"key":"5644_CR22","doi-asserted-by":"crossref","unstructured":"Xiang Y et al (2018) PoseCNN: a convolutional neural network for 6D object pose estimation in cluttered scenes. In: Robotics: science and systems XIV. Robotics: science and systems foundation","DOI":"10.15607\/RSS.2018.XIV.019"},{"issue":"4\u20135","key":"5644_CR23","doi-asserted-by":"publisher","first-page":"705","DOI":"10.1177\/0278364914549607","volume":"34","author":"I Lenz","year":"2015","unstructured":"Lenz I, Lee H (2015) Deep learning for detecting robotic grasps. Int J Rob Res 34(4\u20135):705\u2013724","journal-title":"Int J Rob Res"},{"key":"5644_CR24","doi-asserted-by":"crossref","unstructured":"Morrison D, Leitner J, Corke P (2018) Closing the loop for robotic grasping: a real-time, generative grasp synthesis approach. arXiv preprint http:\/\/arxiv.org\/abs\/1804.05172","DOI":"10.15607\/RSS.2018.XIV.021"},{"key":"5644_CR25","doi-asserted-by":"crossref","unstructured":"Sun X, Xiao B, Wei F, Liang S, Wei Y (2018) Integral human pose regression. In: Lecture notes in computer science (including lecture notes in artificial intelligence and lecture notes in bioinformatics), vol 11210, pp 536\u2013553","DOI":"10.1007\/978-3-030-01231-1_33"},{"key":"5644_CR26","doi-asserted-by":"crossref","unstructured":"Gualtieri M, Ten Pas A, Saenko K, Platt R (2016) High precision grasp pose detection in dense clutter. In: International conference on intelligent robots and systems, vol 2016, pp 598\u2013605","DOI":"10.1109\/IROS.2016.7759114"},{"key":"5644_CR27","doi-asserted-by":"crossref","unstructured":"Mahler J et al. (2016) A cloud-based network of 3D objects for robust grasp planning using a multi-armed bandit model with correlated rewards. In: 2016 IEEE International conference on robotics and automation (ICRA), pp 1957\u20131964","DOI":"10.1109\/ICRA.2016.7487342"},{"key":"5644_CR28","doi-asserted-by":"crossref","unstructured":"Gualtieri M, Ten Pas A, Platt R (2018) Pick and place without geometric object models. In: 2018 IEEE international conference on robotics and automation (ICRA), pp 7433\u20137440","DOI":"10.1109\/ICRA.2018.8460553"},{"key":"5644_CR29","unstructured":"Andrychowicz M, et al. (2018) learning dexterous in-hand manipulation. arXiv, pp 1\u201327"},{"key":"5644_CR30","unstructured":"To T, et al. (2018) NDDS: N NVIDIA deep learning dataset synthesizer. https:\/\/github.com\/NVIDIA\/Dataset_Synthesizer"},{"issue":"2","key":"5644_CR31","doi-asserted-by":"publisher","first-page":"386","DOI":"10.1109\/TPAMI.2018.2844175","volume":"42","author":"K He","year":"2020","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2020) Mask R-CNN. IEEE Trans Pattern Anal Mach Intell 42(2):386\u2013397","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"5644_CR32","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster R-CNN: towards real-time object detection with region proposal networks. In: NIPS, pp 1\u201314"},{"key":"5644_CR33","doi-asserted-by":"crossref","unstructured":"Wang X (2017) A-Fast-RCNN: hard positive generation via adversary for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2606\u20132615","DOI":"10.1109\/CVPR.2017.324"},{"key":"5644_CR34","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"5644_CR35","doi-asserted-by":"crossref","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K, San Diego U (2017) Aggregated residual transformations for deep neural networks. In IEEE conference on computer vision and pattern recognition, pp 1492\u20131500","DOI":"10.1109\/CVPR.2017.634"},{"key":"5644_CR36","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"5644_CR37","doi-asserted-by":"crossref","unstructured":"Davis J, Goadrich M (2006) The relationship between precision-recall and ROC curves. In 23rd international conference on machine learning, pp 233\u2013240","DOI":"10.1145\/1143844.1143874"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-05644-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-020-05644-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-05644-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T00:31:27Z","timestamp":1634603487000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-020-05644-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,4,8]]},"references-count":37,"journal-issue":{"issue":"19","published-print":{"date-parts":[[2021,10]]}},"alternative-id":["5644"],"URL":"https:\/\/doi.org\/10.1007\/s00521-020-05644-6","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2021,4,8]]},"assertion":[{"value":"29 June 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 December 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 April 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}