{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T03:40:48Z","timestamp":1780717248311,"version":"3.54.1"},"reference-count":94,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,6,27]],"date-time":"2018-06-27T00:00:00Z","timestamp":1530057600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"crossref","award":["91420106"],"award-info":[{"award-number":["91420106"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"crossref","award":["90820305"],"award-info":[{"award-number":["90820305"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"crossref","award":["60775040"],"award-info":[{"award-number":["60775040"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"crossref","award":["2017YFB1302200"],"award-info":[{"award-number":["2017YFB1302200"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Research fund of Tsinghua University - Tencent Joint Laboratory for Internet Innovation Technology"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1007\/s10462-018-9641-3","type":"journal-article","created":{"date-parts":[[2018,6,27]],"date-time":"2018-06-27T05:41:32Z","timestamp":1530078092000},"page":"1089-1106","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":423,"title":["Recent progress in semantic image segmentation"],"prefix":"10.1007","volume":"52","author":[{"given":"Xiaolong","family":"Liu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhidong","family":"Deng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuhan","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2018,6,27]]},"reference":[{"issue":"6","key":"9641_CR1","first-page":"33","volume":"29","author":"EH Adelson","year":"1984","unstructured":"Adelson EH, Anderson CH, Bergen JR, Burt PJ, Ogden JM (1984) Pyramid methods in image processing. RCA Eng 29(6):33\u201341","journal-title":"RCA Eng"},{"key":"9641_CR2","doi-asserted-by":"crossref","unstructured":"Barghout L (2014) Visual taxometric approach to image segmentation using fuzzy-spatial taxon cut yields contextually relevant regions. In: IPMU , vol 2, pp 163\u2013173","DOI":"10.1007\/978-3-319-08855-6_17"},{"key":"9641_CR3","unstructured":"Barghout L, Lee L (2003) Perceptual information processing system. US Patent App. 10\/618,543"},{"issue":"3","key":"9641_CR4","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1016\/j.cviu.2007.09.014","volume":"110","author":"H Bay","year":"2008","unstructured":"Bay H, Ess A, Tuytelaars T, Van Gool L (2008) Speeded-up robust features (surf). Comput Vis Image Underst 110(3):346\u2013359","journal-title":"Comput Vis Image Underst"},{"issue":"1","key":"9641_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000006","volume":"2","author":"Y Bengio","year":"2009","unstructured":"Bengio Y et al (2009) Learning deep architectures for ai. Foundations and trends $$^{\\textregistered }$$ \u00ae . Mach Learn 2(1):1\u2013127","journal-title":"Mach Learn"},{"key":"9641_CR6","first-page":"168","volume":"2010","author":"L Bourdev","year":"2010","unstructured":"Bourdev L, Maji S, Brox T, Malik J (2010) Detecting people using mutually consistent poselet activations. Comput Vis ECCV 2010:168\u2013181","journal-title":"Comput Vis ECCV"},{"key":"9641_CR7","doi-asserted-by":"crossref","unstructured":"Brox T, Bourdev L, Maji S, Malik J (2011) Object segmentation by alignment of poselet activations to image contours. In: Proceedings of the 2011 IEEE conference on computer vision and pattern recognition (CVPR). IEEE, pp 2225\u20132232","DOI":"10.1109\/CVPR.2011.5995659"},{"key":"9641_CR8","doi-asserted-by":"crossref","unstructured":"Chen LC, Barron JT, Papandreou G, Murphy K, Yuille AL (2016a) Semantic image segmentation with task-specific edge detection using CNNS and a discriminatively trained domain transform. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4545\u20134554","DOI":"10.1109\/CVPR.2016.492"},{"key":"9641_CR9","unstructured":"Chen LC, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2016b) Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. arXiv preprint arXiv:1606.00915"},{"key":"9641_CR10","doi-asserted-by":"crossref","unstructured":"Chen LC, Yang Y, Wang J, Xu W, Yuille AL (2016c) Attention to scale: scale-aware semantic image segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3640\u20133649","DOI":"10.1109\/CVPR.2016.396"},{"key":"9641_CR11","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1016\/j.compmedimag.2014.12.006","volume":"43","author":"A Cohen","year":"2015","unstructured":"Cohen A, Rivlin E, Shimshoni I, Sabo E (2015) Memory based active contour algorithm using pixel-level classified images for colon crypt segmentation. Comput Med Imaging Graph 43:150\u2013164","journal-title":"Comput Med Imaging Graph"},{"key":"9641_CR12","doi-asserted-by":"crossref","unstructured":"Cordts M, Omran M, Ramos S, Rehfeld T, Enzweiler M, Benenson R, Franke U, Roth S, Schiele B (2016) The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3213\u20133223","DOI":"10.1109\/CVPR.2016.350"},{"key":"9641_CR13","unstructured":"Csurka G, Dance C, Fan L, Willamowski J, Bray C (2004) Visual categorization with bags of keypoints. In: Workshop on statistical learning in computer vision, ECCV, vol 1. Prague, pp 1\u20132"},{"key":"9641_CR14","unstructured":"CVonline: Image databases. http:\/\/homepages.inf.ed.ac.uk\/rbf\/CVonline\/Imagedbase.htm"},{"key":"9641_CR15","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: IEEE computer society conference on computer vision and pattern recognition, 2005. CVPR 2005, vol 1. IEEE, pp 886\u2013893","DOI":"10.1109\/CVPR.2005.177"},{"issue":"6","key":"9641_CR16","doi-asserted-by":"publisher","first-page":"3991","DOI":"10.1016\/S0006-3495(03)74813-9","volume":"85","author":"JB Monvel de","year":"2003","unstructured":"de Monvel JB, Scarfone E, Le Calvez S, Ulfendahl M (2003) Image-adaptive deconvolution for three-dimensional deep biological imaging. Biophys J 85(6):3991\u20134001","journal-title":"Biophys J"},{"key":"9641_CR17","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) Imagenet: A large-scale hierarchical image database. In: IEEE conference on computer vision and pattern recognition, 2009. CVPR 2009. IEEE, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"9641_CR18","volume-title":"The harris corner detector","author":"KG Derpanis","year":"2004","unstructured":"Derpanis KG (2004) The harris corner detector. York University, Toronto"},{"key":"9641_CR19","unstructured":"Dzung L, Chenyang X, Jerry L (1999) A survey of current methods in medical image segmentation. Technical report"},{"issue":"2","key":"9641_CR20","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CKI, Winn J, Zisserman A (2010) The pascal visual object classes (voc) challenge. Int J Comput Vis 88(2):303\u2013338","journal-title":"Int J Comput Vis"},{"issue":"8","key":"9641_CR21","doi-asserted-by":"publisher","first-page":"1915","DOI":"10.1109\/TPAMI.2012.231","volume":"35","author":"C Farabet","year":"2013","unstructured":"Farabet C, Couprie C, Najman L, LeCun Y (2013) Learning hierarchical features for scene labeling. IEEE Trans Pattern Anal Mach Intell 35(8):1915\u20131929","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9641_CR22","doi-asserted-by":"crossref","unstructured":"Fritsch J, Kuehnl T, Geiger A (2013) A new performance measure and evaluation benchmark for road detection algorithms. In: International conference on intelligent transportation systems (ITSC)","DOI":"10.1109\/ITSC.2013.6728473"},{"key":"9641_CR23","doi-asserted-by":"crossref","unstructured":"Ghiasi G, Fowlkes CC (2016) Laplacian pyramid reconstruction and refinement for semantic segmentation. arXiv preprint arXiv:1605.02264","DOI":"10.1007\/978-3-319-46487-9_32"},{"key":"9641_CR24","doi-asserted-by":"crossref","unstructured":"Hariharan B, Arbel\u00e1ez P, Girshick R, Malik J (2015) Hypercolumns for object segmentation and fine-grained localization. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 447\u2013456","DOI":"10.1109\/CVPR.2015.7298642"},{"key":"9641_CR25","volume-title":"Clustering algorithms","author":"JA Hartigan","year":"1975","unstructured":"Hartigan JA, Hartigan J (1975) Clustering algorithms, vol 209. Wiley, New York"},{"issue":"4","key":"9641_CR26","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1109\/TGRS.1990.572934","volume":"28","author":"DC He","year":"1990","unstructured":"He DC, Wang L (1990) Texture unit, texture spectrum, and texture analysis. IEEE Trans Geosci Remote Sens 28(4):509\u2013512","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"9641_CR27","unstructured":"He X, Zemel RS, Carreira-Perpi\u00f1\u00e1n M\u00c1 (2004) Multiscale conditional random fields for image labeling. In: Proceedings of the 2004 IEEE computer society conference on computer vision and pattern recognition, 2004. CVPR 2004, vol 2. IEEE, pp II\u2013II"},{"key":"9641_CR28","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"9641_CR29","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"9641_CR30","doi-asserted-by":"crossref","unstructured":"Holschneider M, Kronland-Martinet R, Morlet J, Tchamitchian P (1989) A real-time algorithm for signal analysis with the help of the wavelet transform. In: Combes J, Grossmann A, Tchamitchian P (eds) Wavelets, pp. 286\u2013297. Springer, Berlin","DOI":"10.1007\/978-3-642-97177-8_28"},{"issue":"6","key":"9641_CR31","doi-asserted-by":"publisher","first-page":"490","DOI":"10.1109\/42.929615","volume":"20","author":"S Hu","year":"2001","unstructured":"Hu S, Hoffman EA, Reinhardt JM (2001) Automatic lung segmentation for accurate quantitation of volumetric x-ray ct images. IEEE Trans Med Imaging 20(6):490\u2013498","journal-title":"IEEE Trans Med Imaging"},{"issue":"4","key":"9641_CR32","doi-asserted-by":"publisher","first-page":"725","DOI":"10.1080\/01431160110040323","volume":"23","author":"C Huang","year":"2002","unstructured":"Huang C, Davis L, Townshend J (2002) An assessment of support vector machines for land cover classification. Int J Remote Sens 23(4):725\u2013749","journal-title":"Int J Remote Sens"},{"issue":"3","key":"9641_CR33","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1023\/A:1023030907417","volume":"53","author":"R Kimmel","year":"2003","unstructured":"Kimmel R, Bruckstein AM (2003) Regularized laplacian zero crossings as optimal edge integrators. Int J Comput Vis 53(3):225\u2013243","journal-title":"Int J Comput Vis"},{"key":"9641_CR34","unstructured":"Kitti vision benchmark suite. http:\/\/www.cvlibs.net\/datasets\/kitti\/"},{"key":"9641_CR35","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems, pp 1097\u20131105"},{"key":"9641_CR36","doi-asserted-by":"crossref","unstructured":"Larochelle H, Bengio Y (2008) Classification using discriminative restricted boltzmann machines. In: Proceedings of the 25th international conference on machine learning. ACM, pp 536\u2013543","DOI":"10.1145\/1390156.1390224"},{"key":"9641_CR37","unstructured":"LeCun Y, Bengio Y (1995) Convolutional networks for images, speech, and time-series. In: Arbib M (ed) The handbook of brain theory and neural networks. MIT Press"},{"key":"9641_CR38","doi-asserted-by":"crossref","unstructured":"Leutenegger S, Chli M, Siegwart RY (2011) Brisk: binary robust invariant scalable keypoints. In: 2011 IEEE international conference on computer vision (ICCV). IEEE, pp 2548\u20132555","DOI":"10.1109\/ICCV.2011.6126542"},{"key":"9641_CR39","doi-asserted-by":"crossref","unstructured":"Li H, Lin Z, Shen X, Brandt J, Hua G (2015) A convolutional neural network cascade for face detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5325\u20135334","DOI":"10.1109\/CVPR.2015.7299170"},{"key":"9641_CR40","doi-asserted-by":"crossref","unstructured":"Li Y, Qi H, Dai J, Ji X, Wei Y (2017a) Fully convolutional instance-aware semantic segmentation. In: Computer vision and pattern recognition (CVPR). IEEE, pp 4438\u20134446","DOI":"10.1109\/CVPR.2017.472"},{"key":"9641_CR41","doi-asserted-by":"crossref","unstructured":"Li X, Liu Z, Luo P, Loy CC, Tang X (2017b) Not all pixels are equal: difficulty-aware semantic segmentation via deep layer cascade. arXiv preprint arXiv:1704.01344","DOI":"10.1109\/CVPR.2017.684"},{"key":"9641_CR42","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S, Bourdev L, Girshick R, Hays J, Perona P, Ramanan D, Zitnick CL, Dollar P (2014) Microsoft coco: common objects in context. arXiv preprint arXiv:1405.0312","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"9641_CR43","doi-asserted-by":"crossref","unstructured":"Lin G, Shen C, van\u00a0den Hengel A, Reid I (2016a) Efficient piecewise training of deep structured models for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3194\u20133203","DOI":"10.1109\/CVPR.2016.348"},{"key":"9641_CR44","doi-asserted-by":"crossref","unstructured":"Lin TY, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2016b) Feature pyramid networks for object detection. arXiv preprint arXiv:1612.03144","DOI":"10.1109\/CVPR.2017.106"},{"issue":"1","key":"9641_CR45","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1006\/cviu.1996.0510","volume":"67","author":"T Lindeberg","year":"1997","unstructured":"Lindeberg T, Li MX (1997) Segmentation and classification of edges using minimum description length approximation and complementary junction cues. Comput Vis Image Underst 67(1):88\u201398","journal-title":"Comput Vis Image Underst"},{"key":"9641_CR46","doi-asserted-by":"crossref","unstructured":"Liu Z, Li X, Luo P, Loy CC, Tang X (2015) Semantic image segmentation via deep parsing network. In: Proceedings of the IEEE international conference on computer vision, pp 1377\u20131385","DOI":"10.1109\/ICCV.2015.162"},{"issue":"10","key":"9641_CR47","first-page":"1337","volume":"79","author":"J Long","year":"2014","unstructured":"Long J, Shelhamer E, Darrell T (2014) Fully convolutional networks for semantic segmentation. IEEE Trans Pattern Anal Mach Intell 79(10):1337\u20131342","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"9641_CR48","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis 60(2):91\u2013110","journal-title":"Int J Comput Vis"},{"key":"9641_CR49","first-page":"183","volume":"2010","author":"E Mair","year":"2010","unstructured":"Mair E, Hager G, Burschka D, Suppa M, Hirzinger G (2010) Adaptive and generic corner detection based on the accelerated segment test. Comput Vis ECCV 2010:183\u2013196","journal-title":"Comput Vis ECCV"},{"issue":"2","key":"9641_CR50","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1109\/TITS.2007.895311","volume":"8","author":"S Maldonado-Bascon","year":"2007","unstructured":"Maldonado-Bascon S, Lafuente-Arroyo S, Gil-Jimenez P, Gomez-Moreno H, L\u00f3pez-Ferreras F (2007) Road-sign detection and recognition based on support vector machines. IEEE Trans Intell Transp Syst 8(2):264\u2013278","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"9641_CR51","unstructured":"Martin D, Fowlkes C (2017) The berkeley segmentation database and benchmark. Computer Science Department, Berkeley University. http:\/\/www.eecs.berkeley.edu\/Research\/Projects\/CS\/vision\/bsds"},{"issue":"3","key":"9641_CR52","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1016\/S0734-189X(87)80181-0","volume":"39","author":"G Medioni","year":"1987","unstructured":"Medioni G, Yasumoto Y (1987) Corner detection and curve representation using cubic b-splines. Comput Vis Graph Image Process 39(3):267\u2013278","journal-title":"Comput Vis Graph Image Process"},{"key":"9641_CR53","doi-asserted-by":"crossref","unstructured":"Menze M, Geiger A (2015) Object scene flow for autonomous vehicles. In: Proceedings of the conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2015.7298925"},{"key":"9641_CR54","unstructured":"Mobahi H, Rao SR, Yang AY, Sastry SS, Ma Y (2010) Segmentation of natural images by texture and boundary compression. arXiv preprint arXiv:1006.3679"},{"key":"9641_CR55","unstructured":"Mohan R (2014) Deep deconvolutional networks for scene parsing. arXiv preprint arXiv:1411.4101"},{"key":"9641_CR56","first-page":"372","volume":"2002","author":"N Moon","year":"2002","unstructured":"Moon N, Bullitt E, Van Leemput K, Gerig G (2002) Automatic brain and tumor segmentation. Med Image Comput Comput Assist Interv MICCAI 2002:372\u2013379","journal-title":"Med Image Comput Comput Assist Interv MICCAI"},{"key":"9641_CR57","doi-asserted-by":"crossref","unstructured":"Mostajabi M, Yadollahpour P, Shakhnarovich G (2015) Feedforward semantic segmentation with zoom-out features. In: Proceedings of the computer vision and pattern recognition, pp 3376\u20133385","DOI":"10.1109\/CVPR.2015.7298959"},{"issue":"11","key":"9641_CR58","doi-asserted-by":"publisher","first-page":"1452","DOI":"10.1109\/TPAMI.2004.110","volume":"26","author":"R Nock","year":"2004","unstructured":"Nock R, Nielsen F (2004) Statistical region merging. IEEE Trans Pattern Anal Mach Intell 26(11):1452\u20131458","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9641_CR59","doi-asserted-by":"crossref","unstructured":"Noh H, Hong S, Han B (2015) Learning deconvolution network for semantic segmentation. In: Proceedings of the IEEE international conference on computer vision, pp 1520\u20131528","DOI":"10.1109\/ICCV.2015.178"},{"key":"9641_CR60","doi-asserted-by":"crossref","DOI":"10.1007\/b97541","volume-title":"Geometric level set methods in imaging, vision, and graphics","author":"S Osher","year":"2003","unstructured":"Osher S, Paragios N (2003) Geometric level set methods in imaging, vision, and graphics. Springer, Berlin"},{"key":"9641_CR61","doi-asserted-by":"crossref","unstructured":"Papandreou G, Chen LC, Murphy KP, Yuille AL (2015) Weakly-and semi-supervised learning of a deep convolutional network for semantic image segmentation. In: Proceedings of the IEEE international conference on computer vision, pp 1742\u20131750","DOI":"10.1109\/ICCV.2015.203"},{"key":"9641_CR62","doi-asserted-by":"publisher","DOI":"10.1002\/9780470724163","volume-title":"Handbook of granular computing","author":"W Pedrycz","year":"2008","unstructured":"Pedrycz W, Skowron A, Kreinovich V (2008) Handbook of granular computing. Wiley, New York"},{"key":"9641_CR63","doi-asserted-by":"crossref","unstructured":"Rosten E, Drummond T (2005) Fusing points and lines for high performance tracking. In: Proceedings of the tenth IEEE international conference on computer vision, 2005. ICCV 2005, vol 2. IEEE, pp 1508\u20131515","DOI":"10.1109\/ICCV.2005.104"},{"issue":"1","key":"9641_CR64","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1109\/TPAMI.2008.275","volume":"32","author":"E Rosten","year":"2010","unstructured":"Rosten E, Porter R, Drummond T (2010) Faster and better: a machine learning approach to corner detection. IEEE Trans Pattern Anal Mach Intell 32(1):105\u2013119","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"1\u20133","key":"9641_CR65","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1007\/s11263-007-0090-8","volume":"77","author":"BC Russell","year":"2008","unstructured":"Russell BC, Torralba A, Murphy KP, Freeman WT (2008) Labelme: a database and web-based tool for image annotation. Int J Comput Vis 77(1\u20133):157\u2013173","journal-title":"Int J Comput Vis"},{"key":"9641_CR66","unstructured":"Russell C, Kohli P, Torr PH et al (2009) Associative hierarchical crfs for object class image segmentation. In: Proceedings of the 2009 IEEE 12th international conference on computer vision. IEEE, pp 739\u2013746"},{"key":"9641_CR67","doi-asserted-by":"crossref","unstructured":"Saito S, Li T, Li H (2016) Real-time facial segmentation and performance capture from RGB input. arXiv preprint arXiv:1604.02647","DOI":"10.1007\/978-3-319-46484-8_15"},{"key":"9641_CR68","doi-asserted-by":"crossref","unstructured":"Sharma A, Tuzel O, Jacobs DW (2015) Deep hierarchical parsing for semantic segmentation. In: Proceedings of the computer vision and pattern recognition, pp 530\u2013538","DOI":"10.1109\/CVPR.2015.7298651"},{"key":"9641_CR69","unstructured":"Shadow detection\/texture segmentation computer vision dataset. https:\/\/zenodo.org\/record\/59019#.WWHm3oSGNeM"},{"key":"9641_CR70","unstructured":"Shi J et\u00a0al (1994) Good features to track. In: Proceedings of the 1994 IEEE computer society conference on CVPR\u201994 computer vision and pattern recognition. IEEE, pp. 593\u2013600"},{"key":"9641_CR71","first-page":"1","volume":"2006","author":"J Shotton","year":"2006","unstructured":"Shotton J, Winn J, Rother C, Criminisi A (2006) Textonboost: joint appearance, shape and context modeling for multi-class object recognition and segmentation. Comput Vis ECCV 2006:1\u201315","journal-title":"Comput Vis ECCV"},{"key":"9641_CR72","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"issue":"1","key":"9641_CR73","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1023\/A:1007963824710","volume":"23","author":"SM Smith","year":"1997","unstructured":"Smith SM, Brady JM (1997) Susana new approach to low level image processing. Int J Comput Vis 23(1):45\u201378","journal-title":"Int J Comput Vis"},{"key":"9641_CR74","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"9641_CR75","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"9641_CR76","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, Alemi AA (2017) Inception-v4, inception-resnet and the impact of residual connections on learning. In: AAAI, pp 4278\u20134284","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"9641_CR77","unstructured":"Thoma M (2016) A survey of semantic segmentation. arXiv preprint arXiv:1602.06541"},{"key":"9641_CR78","unstructured":"VOC2010 preliminary results. http:\/\/host.robots.ox.ac.uk\/pascal\/VOC\/voc2010\/results\/index.html"},{"key":"9641_CR79","doi-asserted-by":"crossref","unstructured":"Viola P, Jones M (2001) Rapid object detection using a boosted cascade of simple features. In: Proceedings of the 2001 IEEE computer society conference on computer vision and pattern recognition, CVPR 2001, vol 1. IEEE, pp I\u2013I","DOI":"10.1109\/CVPR.2001.990517"},{"key":"9641_CR80","doi-asserted-by":"crossref","unstructured":"Wei GQ, Arbter K, Hirzinger G (1997) Automatic tracking of laparoscopic instruments by color coding. In: CVRMed-MRCAS\u201997. Springer, Berlin, pp 357\u2013366","DOI":"10.1007\/BFb0029257"},{"key":"9641_CR81","unstructured":"Wu Z, Shen C, Hengel A (2016a) High-performance semantic segmentation using very deep fully convolutional networks. arXiv preprint arXiv:1604.04339"},{"key":"9641_CR82","unstructured":"Wu Z, Shen C, Hengel A (2016b) Wider or deeper: revisiting the resnet model for visual recognition. arXiv preprint arXiv:1611.10080"},{"key":"9641_CR83","doi-asserted-by":"crossref","unstructured":"Xia W, Domokos C, Dong J, Cheong LF, Yan S (2013) Semantic segmentation without annotating segments. In: Proceedings of the IEEE international conference on computer vision, pp 2176\u20132183","DOI":"10.1109\/ICCV.2013.271"},{"key":"9641_CR84","doi-asserted-by":"crossref","unstructured":"Xiao J, Hays J, Ehinger KA, Oliva A, Torralba A (2010) Sun database: large-scale scene recognition from abbey to zoo. In: 2010 IEEE conference on computer vision and pattern recognition (CVPR). IEEE, pp 3485\u20133492","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"9641_CR85","unstructured":"Xie S, Girshick R, Dollr P, Tu Z, He K (2016) Aggregated residual transformations for deep neural networks. arXiv preprint arXiv:1611.05431"},{"key":"9641_CR86","doi-asserted-by":"crossref","unstructured":"Xu A, Wang L, Feng S, Qu Y (2010) Threshold-based level set method of image segmentation. In: 2010 3rd international conference on intelligent networks and intelligent systems (ICINIS). IEEE, pp 703\u2013706","DOI":"10.1109\/ICINIS.2010.181"},{"key":"9641_CR87","doi-asserted-by":"crossref","unstructured":"Xu J, Schwing AG, Urtasun R (2015) Learning to segment under various forms of weak supervision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3781\u20133790","DOI":"10.1109\/CVPR.2015.7299002"},{"key":"9641_CR88","unstructured":"Yu F, Koltun V (2015) Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122"},{"key":"9641_CR89","doi-asserted-by":"crossref","unstructured":"Yu F, Koltun V, Funkhouser T (2017) Dilated residual networks. arXiv preprint arXiv:1705.09914","DOI":"10.1109\/CVPR.2017.75"},{"key":"9641_CR90","unstructured":"Zhao H, Shi J, Qi X, Wang X, Jia J (2016) Pyramid scene parsing network. arXiv preprint arXiv:1612.01105"},{"key":"9641_CR91","doi-asserted-by":"crossref","unstructured":"Zheng L, Li G, Bao Y (2010) Improvement of grayscale image 2D maximum entropy threshold segmentation method. In: 2010 international conference on logistics systems and intelligent management, vol 1. IEEE, pp 324\u2013328","DOI":"10.1109\/ICLSIM.2010.5461410"},{"key":"9641_CR92","doi-asserted-by":"crossref","unstructured":"Zhou B, Zhao H, Puig X, Fidler S, Barriuso A, Torralba A (2017) Scene parsing through ade20k dataset. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2017.544"},{"issue":"1","key":"9641_CR93","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1007\/s11263-005-4638-1","volume":"62","author":"SC Zhu","year":"2005","unstructured":"Zhu SC, Guo CE, Wang Y, Xu Z (2005) What are textons? Int J Comput Vis 62(1):121\u2013143","journal-title":"Int J Comput Vis"},{"key":"9641_CR94","unstructured":"Zhu J, Mao J, Yuille AL (2014) Learning from weakly supervised data by the expectation loss svm (e-svm) algorithm. In: Advances in neural information processing systems, pp 1125\u20131133"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-018-9641-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10462-018-9641-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-018-9641-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,5]],"date-time":"2025-07-05T10:43:11Z","timestamp":1751712191000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10462-018-9641-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,6,27]]},"references-count":94,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2019,8]]}},"alternative-id":["9641"],"URL":"https:\/\/doi.org\/10.1007\/s10462-018-9641-3","relation":{},"ISSN":["0269-2821","1573-7462"],"issn-type":[{"value":"0269-2821","type":"print"},{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,6,27]]},"assertion":[{"value":"27 June 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}