{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T19:35:36Z","timestamp":1772307336895,"version":"3.50.1"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2023,3,2]],"date-time":"2023-03-02T00:00:00Z","timestamp":1677715200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,3,2]],"date-time":"2023-03-02T00:00:00Z","timestamp":1677715200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2023,7]]},"DOI":"10.1007\/s11227-023-05068-8","type":"journal-article","created":{"date-parts":[[2023,3,2]],"date-time":"2023-03-02T09:05:06Z","timestamp":1677747906000},"page":"11743-11766","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["An intuitive pre-processing method based on human\u2013robot interactions: zero-shot learning semantic segmentation based on synthetic semantic template"],"prefix":"10.1007","volume":"79","author":[{"given":"Yen-Chun","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7138-0272","authenticated-orcid":false,"given":"Chin-Feng","family":"Lai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,2]]},"reference":[{"key":"5068_CR1","unstructured":"Sarullo A, Mu T (2020) Zero-shot human-object interaction recognition via affordance graphs. ArXiv, 2020. 2"},{"key":"5068_CR2","doi-asserted-by":"crossref","unstructured":"Minaee S, Boykov Y, Porikli F, Plaza A, Kehtarnavaz N, Terzopoulos D (2020) Image segmentation using deep learning: A survey. arXiv:2001.05566","DOI":"10.1109\/TPAMI.2021.3059968"},{"key":"5068_CR3","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: convolutional networks for biomedical image segmentation. In MICCAI, pp 234\u2013241. Springer, Cham","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"5068_CR4","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In CVPR, pp 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"issue":"4","key":"5068_CR5","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen L-C, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2017) Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"5068_CR6","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00b4ar P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"5068_CR7","doi-asserted-by":"crossref","unstructured":"Bolya D, Zhou C, Xiao F, Lee YJ (2019) YOLACT: real-time instance segmentation. arXiv preprint arXiv:190402689","DOI":"10.1109\/ICCV.2019.00925"},{"issue":"1\u20133","key":"5068_CR8","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1007\/s11263-007-0090-8","volume":"77","author":"B Russell","year":"2008","unstructured":"Russell B, Torralba A, Murphy K, Freeman W (2008) LabelMe: a database and web-based tool for image annotation. IJCV 77(1\u20133):157\u2013173","journal-title":"IJCV"},{"key":"5068_CR9","unstructured":"LabelImg.: A Graphical Image annotation tool. https:\/\/github.com\/tzutalin\/labelImg Reference Video Link: https:\/\/youtu.be\/p0nR2YsCY_U"},{"key":"5068_CR10","doi-asserted-by":"crossref","unstructured":"Mercier M. Garon, Gigure P, Franois JD (2020) Template-based object instance detection. arXiv:1911.11822","DOI":"10.1109\/WACV48630.2021.00155"},{"key":"5068_CR11","unstructured":"Mennatullah S, Chen J, Steven L, Laura P, Mahmoud G, Mohamed E, Martin J (2019) Video segmentation using teacher-student adaptation in a human robot interaction (HRI) setting. In: ICRA"},{"key":"5068_CR12","doi-asserted-by":"crossref","unstructured":"Tianyn Z, Qi Z, Jing D (2020) Intuitive robot teleoperation for civil engineering operations with virtual reality and deep learning scene reconstruction. Adv Eng Inform 46 (Oct):101170","DOI":"10.1016\/j.aei.2020.101170"},{"key":"5068_CR13","doi-asserted-by":"crossref","unstructured":"Wu J, Li K, Zhao X, Tan M (2018) Unfamiliar dynamic hand gestures recognition based on zero-shot learning. ICONIP, pp. 244\u2013254","DOI":"10.1007\/978-3-030-04221-9_22"},{"key":"5068_CR14","doi-asserted-by":"crossref","unstructured":"Witkin KA, Terzopoulos D (1988) Snakes: active contour models. Int J Comp Vis 1(4):321\u2013331","DOI":"10.1007\/BF00133570"},{"key":"5068_CR15","unstructured":"Toussaint PM, Nakajima S (2009) Multi-class image segmentation using conditional random fields and global classification. In: Proceedings of the 26th Annual International Conference on Machine Learning. ACM, New York, pp. 817\u2013824"},{"key":"5068_CR16","doi-asserted-by":"publisher","first-page":"764","DOI":"10.1016\/j.procs.2015.06.090","volume":"54","author":"N Dhanachandra","year":"2015","unstructured":"Dhanachandra N, Manglem K, Chanu YJ (2015) Image segmentation using k-means clustering algorithm and subtractive clustering algorithm. Proc Comp Sci 54:764\u2013771","journal-title":"Proc Comp Sci"},{"key":"5068_CR17","doi-asserted-by":"crossref","unstructured":"Minaee S, Boykov Y, Porikli F, Plaza A, Kehtarnavaz N, Terzopoulos D (2020) Image segmentation using deep learning: a survey. arXiv preprint arXiv:2001.05566.","DOI":"10.1109\/TPAMI.2021.3059968"},{"key":"5068_CR18","doi-asserted-by":"crossref","unstructured":"Chen L-C, Zhu Y, Papandreou G, Schroff F, Adam H (2018) Encoder-decoder with atrous separable convolution for semantic image segmentation. In:Proceedings of the European Conference on Computer Vision (ECCV), pp 801\u2013818","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"5068_CR19","unstructured":"Yuan Y, Chen X, Wang J (2019) Object-contextual representations for semantic segmentation. arXiv preprint arXiv:1909.11065"},{"key":"5068_CR20","doi-asserted-by":"crossref","unstructured":"Wang Q, Zhang L, Bertinetto L, Hu W, Torr PH (2019) Fast online object tracking and segmentation: A unifying approach. In: IEEE Conference on Computer Vision And Pattern Recognition","DOI":"10.1109\/CVPR.2019.00142"},{"key":"5068_CR21","doi-asserted-by":"crossref","unstructured":"Wu Y, Abd-Almageed W, Natarajan P (2017) Deep matching and validation network: an end-to-end solution to constrained image splicing localization and detection. In ACMMM, New York, pp 1480\u20131502","DOI":"10.1145\/3123266.3123411"},{"key":"5068_CR22","unstructured":"Billings G, Johnson-Roberson M (2018) Silhonet: An rgb method for 3d object pose estimation and grasp planning. arXiv preprint arXiv:1809.06893"},{"key":"5068_CR23","unstructured":"Miller EG (2002) Learning from one example in machine vision by sharing probability densities. PhD Dissertation, Massachusetts Institute of Technology"},{"key":"5068_CR24","unstructured":"Larochelle H, Erhan D, Bengio Y (2008) Zero-data learning of new tasks. In: AAAI"},{"key":"5068_CR25","doi-asserted-by":"crossref","unstructured":"Wang W, Zheng VW, Yu H, Miao C (2019) A survey of zero-shot learning: Settings, methods, and applications. ACM Trans Intell Syst Technol 10(2):13:1\u201313:37","DOI":"10.1145\/3293318"},{"key":"5068_CR26","doi-asserted-by":"crossref","unstructured":"Lampert C, Nickisch H, Harmeling S (2009) Learning to detect unseen object classes by between-class attribute transfer. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 951\u2013958","DOI":"10.1109\/CVPR.2009.5206594"},{"key":"5068_CR27","doi-asserted-by":"crossref","unstructured":"Perronnin AF, Harchaoui Z, Schmid C (2016) Label-embedding for image classification. IEEE Trans Pattern Anal Mach Intell 38(7):1425\u20131438","DOI":"10.1109\/TPAMI.2015.2487986"},{"key":"5068_CR28","doi-asserted-by":"crossref","unstructured":"Ba J, Swersky K, Fidler S, Salakhutdinov R (2015) Predicting deep zero-shot convolutional neural networks using textual descriptions. In: Proceedings of the IEEE International Conference on Computer Vision, pp 4247\u20134255","DOI":"10.1109\/ICCV.2015.483"},{"key":"5068_CR29","doi-asserted-by":"crossref","unstructured":"Xian Z, Akata G, Sharma Q, Hein NM, Schiele B (2016) Latent embeddings for zero-shot classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 69\u201377","DOI":"10.1109\/CVPR.2016.15"},{"key":"5068_CR30","doi-asserted-by":"crossref","unstructured":"Wang D, Li Y, Lin Lin Y, Zhuang Y (2016) Relational knowledge transfer for zero-shot learning. In: Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence, pp 2145\u20132151","DOI":"10.1609\/aaai.v30i1.10195"},{"key":"5068_CR31","doi-asserted-by":"crossref","unstructured":"Xian Y, Choudhury S, He Y, Schiele B, Akata Z (2019) Semantic projection network for zero-and few-label semantic segmentation. In: CVPR, pp. 8256\u20138265","DOI":"10.1109\/CVPR.2019.00845"},{"key":"5068_CR32","unstructured":"Bucher M, Vu T, Cord M, Perez P (2019) Zero-shot semantic segmentation. In: NeurIPS, pp. 468\u2013479"},{"key":"5068_CR33","doi-asserted-by":"crossref","unstructured":"Kato N, Yamasaki T, Aizawa K (2019) Zero-shot semantic segmentation via variational mapping. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops","DOI":"10.1109\/ICCVW.2019.00172"},{"key":"5068_CR34","unstructured":"Li P, Wei Y, Yang Y (2020) Consistent structural relation learning for zero-shot segmentation. In: Advances in Neural Information Processing Systems, 33"},{"key":"5068_CR35","doi-asserted-by":"crossref","unstructured":"Gu Z, Zhou S, Niu L, Zhao Z, Zhang L (2020) Context-aware feature generation for zero shot semantic segmentation. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1921\u20131929","DOI":"10.1145\/3394171.3413593"},{"key":"5068_CR36","doi-asserted-by":"crossref","unstructured":"Wang Q, Chen K (2017) Alternative semantic representations for zero-shot human action recognition. In: European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, pp 87\u2013102","DOI":"10.1007\/978-3-319-71249-9_6"},{"key":"5068_CR37","doi-asserted-by":"crossref","unstructured":"Mitash C, Bekris KE, Boularias A (2017) A selfsupervised learning system for object detection using physics simulation and multi-view pose estimation. arXiv:1703.03347","DOI":"10.1109\/IROS.2017.8202206"},{"key":"5068_CR38","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Weinberger KQ (2017) Densely connected convolutional networks. In: IEEE Conference on Computer Vision and Pattern Recognition","DOI":"10.1109\/CVPR.2017.243"},{"key":"5068_CR39","doi-asserted-by":"crossref","unstructured":"Tobin J, Fong R, Ray A, Schneider J, Zaremba W, Abbeel P (2017) Domain randomization for transferring deep neural networks from simulation to the real world. arXiv:1703.06907","DOI":"10.1109\/IROS.2017.8202133"},{"key":"5068_CR40","unstructured":"Iandola FN, Han S, Moskewicz MW, Ashraf K, Dally WJ, Keutzer K (2016) Squeezenet: Alexnet-level accuracy with 50x fewer parameters and>0.5 mb model size. arXiv:1602.07360"},{"key":"5068_CR41","doi-asserted-by":"crossref","unstructured":"Wang X, Girshick RB, Gupta A, He K (2018) Non-local neural networks. In: Proceedings of CVPR, pp. 7794\u20137803","DOI":"10.1109\/CVPR.2018.00813"},{"key":"5068_CR42","unstructured":"Buades BC, Morel J-M (2005) A non-local algorithm for image denoising. In Computer Vision and Pattern Recognition"},{"key":"5068_CR43","doi-asserted-by":"crossref","unstructured":"Tversky A (1977) Features of similarity. Psychol Rev 84(4):327","DOI":"10.1037\/0033-295X.84.4.327"},{"key":"5068_CR44","doi-asserted-by":"crossref","unstructured":"Alcantarilla PF, Nuevo J, Bartoli A (2013) Fast explicit diffusion for accelerated features in nonlinear scale spaces. In: Presented at the Brit. Mach. Vision Conf., Bristol, UK","DOI":"10.5244\/C.27.13"},{"key":"5068_CR45","doi-asserted-by":"crossref","unstructured":"Xiang Y, Mottaghi R, Savarese S (2014) Beyond PASCAL: A benchmark for 3D object detection in the wild. In: WACV, pp 75\u201382","DOI":"10.1109\/WACV.2014.6836101"},{"key":"5068_CR46","unstructured":"Li Z, Savarese S, Savva M, Song S, Su H et al (2015) Shapenet: An information-rich 3D model repository. arXiv:1512.03012"},{"key":"5068_CR47","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2010","unstructured":"Everingham M, Gool L, Williams C, Winn J, Zisserman A (2010) The Pascal Visual Object Classes (VOC) challenge\u2014a retrospective. Int J Comput Vis 111:98\u2013136","journal-title":"Int J Comput Vis"},{"key":"5068_CR48","unstructured":"Frome A, Corrado G, Shlens J, Bengio S, Dean J, Ranzato M, Mikolov T (2013) Devise: A deep visual-semantic embedding model. In: Advances in Neural Information Processing Systems (NeurIPS), pp 2121\u20132129"},{"key":"5068_CR49","unstructured":"Bansal SS, Liu Z, Essa I, Boots (2017) One-shot learning for semantic segmentation. In BMVC. BMVA Press, UK"},{"key":"5068_CR50","unstructured":"Rakelly K, Shelhamer E, Darrell T, Efros A, Levine S (2018) Conditional networks for few-shot semantic segmentation. In: ICLR (Workshop). OpenReview.net"},{"key":"5068_CR51","unstructured":"Liu H, Wang Y, Zhao J, Yang G, Lv F (2020) Learning unbiased zero-shot semantic segmentation networks via transductive transfer. arXiv preprint arXiv:2007.00515"},{"key":"5068_CR52","doi-asserted-by":"crossref","unstructured":"Rosen R, Wichert G, Lo G, Bettenhausen KD (2015) About the importance of autonomy and digital twins for the future of manufacturing. IFAC-PapersOnLine. Elsevier Ltd., Amsterdam, Volume 28, pp. 567\u2013572","DOI":"10.1016\/j.ifacol.2015.06.141"},{"key":"5068_CR53","doi-asserted-by":"crossref","unstructured":"Hodan T et al (2018) BOP: Benchmark for 6D object pose estimation. In: Proceedings of European Conference on Computer Vision, pp. 19\u201335","DOI":"10.1007\/978-3-030-01249-6_2"},{"key":"5068_CR54","doi-asserted-by":"crossref","unstructured":"Fang, H-S, Wang, C, Gou M, Lu C (2020) Graspnet-1billion: A large-scale benchmark for general object grasping. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp 11 444\u201311 453","DOI":"10.1109\/CVPR42600.2020.01146"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-023-05068-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-023-05068-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-023-05068-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,9]],"date-time":"2023-06-09T07:10:22Z","timestamp":1686294622000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-023-05068-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,2]]},"references-count":54,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2023,7]]}},"alternative-id":["5068"],"URL":"https:\/\/doi.org\/10.1007\/s11227-023-05068-8","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,3,2]]},"assertion":[{"value":"13 January 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 March 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflicts of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}]}}