{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T11:45:21Z","timestamp":1777117521317,"version":"3.51.4"},"reference-count":103,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2018,8,30]],"date-time":"2018-08-30T00:00:00Z","timestamp":1535587200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2018,10]]},"DOI":"10.1007\/s11704-018-7195-8","type":"journal-article","created":{"date-parts":[[2018,8,30]],"date-time":"2018-08-30T05:13:19Z","timestamp":1535605999000},"page":"840-857","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Learning deep representations for semantic image parsing: a comprehensive overview"],"prefix":"10.1007","volume":"12","author":[{"given":"Lili","family":"Huang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiefeng","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruimao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guanbin","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,30]]},"reference":[{"key":"7195_CR1","first-page":"2881","volume-title":"Proceedings of International Conference on Computer Vision and Pattern Recognition","author":"H S Zhao","year":"2017","unstructured":"Zhao H S, Shi J P, Qi X J, Wang X G, Jia J Y. Pyramid scene parsing network. In: Proceedings of International Conference on Computer Vision and Pattern Recognition. 2017, 2881\u20132890"},{"key":"7195_CR2","first-page":"2980","volume-title":"Proceedings of IEEE International Conference on Computation Vision","author":"K He","year":"2017","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R. Mask R-CNN. In: Proceedings of IEEE International Conference on Computation Vision. 2017, 2980\u20132988"},{"issue":"2","key":"7195_CR3","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1007\/s11263-005-6642-x","volume":"63","author":"Z Tu","year":"2005","unstructured":"Tu Z, Chen X, Yuille A L, Zhu S C. Image parsing: unifying segmentation, detection, and recognition. International Journal of Computer Vision, 2005, 63(2): 113\u2013140","journal-title":"International Journal of Computer Vision"},{"key":"7195_CR4","first-page":"393","volume-title":"Proceedings of European Conference on Computer Vision","author":"Z Tu","year":"2002","unstructured":"Tu Z, Zhu S C. Parsing images into region and curve processes. In: Proceedings of European Conference on Computer Vision. 2002, 393\u2013407"},{"issue":"1","key":"7195_CR5","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1109\/TPAMI.2008.65","volume":"31","author":"F Han","year":"2009","unstructured":"Han F, Zhu S C. Bottom-up\/top-down image parsing with attribute grammar. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2009, 31(1): 59\u201373","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"7195_CR6","first-page":"2276","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"L Lin","year":"2016","unstructured":"Lin L, Wang G, Zhang R, Zhang R, Liang X, Zuo W. Deep structured scene parsing by learning with image descriptions. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2016, 2276\u20132284"},{"issue":"2","key":"7195_CR7","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"D G Lowe","year":"2004","unstructured":"Lowe D G. Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 2004, 60(2): 91\u2013110","journal-title":"International Journal of Computer Vision"},{"key":"7195_CR8","first-page":"886","volume-title":"Proceedings of IEEE Computer Society Conference on Computer Vision and Pattern Recognition","author":"N Dalal","year":"2005","unstructured":"Dalal N, Triggs B. Histograms of oriented gradients for human detection. In: Proceedings of IEEE Computer Society Conference on Computer Vision and Pattern Recognition. 2005, 886\u2013893"},{"key":"7195_CR9","first-page":"469","volume-title":"Proceedings of European Conference on Computer Vision","author":"T Ahonen","year":"2004","unstructured":"Ahonen T, Hadid A, Pietik\u00e4inen M. Face recognition with local binary patterns. In: Proceedings of European Conference on Computer Vision. 2004, 469\u2013481"},{"key":"7195_CR10","first-page":"1377","volume-title":"Proceedings of IEEE International Conference on Computer Vision","author":"Z Liu","year":"2015","unstructured":"Liu Z, Li X, Luo P, Loy C C, Tang X. Semantic image segmentation via deep parsing network. In: Proceedings of IEEE International Conference on Computer Vision. 2015, 1377\u20131385"},{"issue":"4","key":"7195_CR11","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L C Chen","year":"2018","unstructured":"Chen L C, Papandreou G, Kokkinos I, Murphy K, Yuille A L. Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2018, 40(4): 834\u2013848","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"7195_CR12","first-page":"3431","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"J Long","year":"2015","unstructured":"Long J, Shelhamer E, Darrell T. Fully convolutional networks for semantic segmentation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2015, 3431\u20133440"},{"key":"7195_CR13","volume-title":"Semantic image segmentation with deep convolutional nets and fully connected crfs","author":"L C Chen","year":"2014","unstructured":"Chen L C, Papandreou G, Kokkinos I, Murphy K, Yuille A L. Semantic image segmentation with deep convolutional nets and fully connected crfs. 2014, arXiv preprint arXiv:14127062"},{"key":"7195_CR14","volume-title":"Large kernel matters-improve semantic segmentation by global convolutional network","author":"C Peng","year":"2017","unstructured":"Peng C, Zhang X, Yu G, Luo G, Sun J. Large kernel matters-improve semantic segmentation by global convolutional network. 2017, arXiv preprint arXiv:170302719"},{"key":"7195_CR15","first-page":"1097","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky A, Sutskever I, Hinton G E. Imagenet classification with deep convolutional neural networks. In: Proceedings of Advances in Neural Information Processing Systems. 2012, 1097\u20131105"},{"key":"7195_CR16","first-page":"1","volume-title":"Proceedings of NIPS-2010 Deep Learning and Unsupervised Feature Learning Workshop","author":"R Socher","year":"2010","unstructured":"Socher R, Manning C D, Ng A Y. Learning continuous phrase representations and syntactic parsing with recursive neural networks. In: Proceedings of NIPS-2010 Deep Learning and Unsupervised Feature Learning Workshop. 2010, 1\u20139"},{"key":"7195_CR17","volume-title":"Fully convolutional instance-aware semantic segmentation","author":"Y Li","year":"2016","unstructured":"Li Y, Qi H, Dai J, Ji X, Wei Y. Fully convolutional instance-aware semantic segmentation. 2016, arXiv preprint arXiv:161107709"},{"key":"7195_CR18","first-page":"1","volume-title":"Proceedings of International Conference on Statistical Language and Speech Processing","author":"Y Bengio","year":"2013","unstructured":"Bengio Y. Deep learning of representations: looking forward. In: Proceedings of International Conference on Statistical Language and Speech Processing. 2013, 1\u201337"},{"key":"7195_CR19","first-page":"17","volume-title":"Proceedings of ICML Workshop on Unsupervised and Transfer Learning","author":"Y Bengio","year":"2012","unstructured":"Bengio Y. Deep learning of representations for unsupervised and transfer learning. In: Proceedings of ICML Workshop on Unsupervised and Transfer Learning. 2012, 17\u201336"},{"issue":"8","key":"7195_CR20","doi-asserted-by":"crossref","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio Y, Courville A, Vincent P. Representation learning: a review and new perspectives. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2013, 35(8): 1798\u20131828","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"4","key":"7195_CR21","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun Y, Boser B, Denker J S, Henderson D, Howard R E, Hubbard W, Jackel L D. Backpropagation applied to handwritten zip code recognition. Neural Computation, 1989, 1(4): 541\u2013551","journal-title":"Neural Computation"},{"key":"7195_CR22","first-page":"534","volume-title":"Proceedings of European Conference on Computer Vision","author":"J Dai","year":"2016","unstructured":"Dai J, He K, Li Y, Ren S, Sun J. Instance-sensitive fully convolutional networks. In: Proceedings of European Conference on Computer Vision. 2016, 534\u2013549"},{"key":"7195_CR23","volume-title":"Label refinement network for coarse-to-fine semantic segmentation","author":"M A Islam","year":"2017","unstructured":"Islam M A, Naha S, Rochan M, Bruce N, Wang Y. Label refinement network for coarse-to-fine semantic segmentation. 2017, arXiv preprint arXiv:170300551"},{"key":"7195_CR24","volume-title":"A critical review of recurrent neural networks for sequence learning","author":"Z C Lipton","year":"2015","unstructured":"Lipton Z C, Berkowitz J, Elkan C. A critical review of recurrent neural networks for sequence learning. 2015, arXiv preprint arXiv:150600019"},{"issue":"7553","key":"7195_CR25","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G. Deep learning. Nature, 2015, 521(7553): 436\u2013444","journal-title":"Nature"},{"key":"7195_CR26","first-page":"3185","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"X Liang","year":"2016","unstructured":"Liang X, Shen X, Xiang D, Feng J, Lin L, Yan S. Semantic object parsing with local-global long short-term memory. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2016, 3185\u20133193"},{"key":"7195_CR27","first-page":"3128","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"A Karpathy","year":"2015","unstructured":"Karpathy A, Li F F. Deep visual-semantic alignments for generating image descriptions. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2015, 3128\u20133137"},{"key":"7195_CR28","first-page":"3104","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"I Sutskever","year":"2014","unstructured":"Sutskever I, Vinyals O, Le Q V. Sequence to sequence learning with neural networks. In: Proceedings of Advances in Neural Information Processing Systems. 2014, 3104\u20133112"},{"key":"7195_CR29","first-page":"541","volume-title":"Proceedings of European Conference on Computer Vision","author":"Z Li","year":"2016","unstructured":"Li Z, Gan Y, Liang X, Yu Y, Cheng H, Lin L. LSTM-CF: unifying context modeling and fusion with LSTMS for RGB-D scene labeling. In: Proceedings of European Conference on Computer Vision. 2016, 541\u2013557"},{"key":"7195_CR30","volume-title":"Geometric scene parsing with hierarchical LSTM","author":"Z Peng","year":"2016","unstructured":"Peng Z, Zhang R, Liang X, Liu X, Lin L. Geometric scene parsing with hierarchical LSTM. 2016, arXiv preprint arXiv:160401931"},{"key":"7195_CR31","first-page":"3547","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"W Byeon","year":"2015","unstructured":"Byeon W, Breuel TM, Raue F, Liwicki M. Scene labeling with LSTM recurrent neural networks. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2015, 3547\u20133555"},{"key":"7195_CR32","first-page":"125","volume-title":"Proceedings of European Conference on Computer Vision","author":"X Liang","year":"2016","unstructured":"Liang X, Shen X, Feng J, Lin L, Yan S. Semantic object parsing with graph LSTM. In: Proceedings of European Conference on Computer Vision. 2016, 125\u2013143"},{"key":"7195_CR33","first-page":"2175","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"X Liang","year":"2017","unstructured":"Liang X, Lin L, Shen X, Feng J, Yan S, Xing E P. Interpretable structure-evolving LSTM. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2017, 2175\u20132184"},{"key":"7195_CR34","volume-title":"Progressively diffused networks for semantic image segmentation","author":"R Zhang","year":"2017","unstructured":"Zhang R, Yang W, Peng Z, Wang X, Lin L. Progressively diffused networks for semantic image segmentation. 2017, arXiv preprint arXiv:170205839"},{"issue":"2-3","key":"7195_CR35","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1007\/BF00114844","volume":"7","author":"J L Elman","year":"1991","unstructured":"Elman J L. Distributed representations, simple recurrent networks, and grammatical structure. Machine Learning, 1991, 7(2-3): 195\u2013225","journal-title":"Machine Learning"},{"key":"7195_CR36","volume-title":"Parsenet: looking wider to see better","author":"W Liu","year":"2015","unstructured":"Liu W, Rabinovich A, Berg A C. Parsenet: looking wider to see better. 2015, arXiv preprint arXiv:150604579"},{"key":"7195_CR37","first-page":"1","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"C Szegedy","year":"2015","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A. Going deeper with convolutions. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2015, 1\u20139"},{"key":"7195_CR38","first-page":"770","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"K He","year":"2016","unstructured":"He K, Zhang X, Ren S, Sun J. Deep residual learning for image recognition. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2016, 770\u2013778"},{"key":"7195_CR39","volume-title":"Very deep convolutional networks for large-scale image recognition","author":"K Simonyan","year":"2014","unstructured":"Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition. 2014, arXiv preprint arXiv:14091556"},{"key":"7195_CR40","first-page":"82","volume-title":"Proceedings of International Conference on Machine Learning","author":"P H O Pinheiro","year":"2014","unstructured":"Pinheiro P H O, Collobert R. Recurrent convolutional neural networks for scene labeling. In: Proceedings of International Conference on Machine Learning. 2014, 82\u201390"},{"key":"7195_CR41","first-page":"549","volume-title":"Proceedings of the International Conference on Artificial Neural Networks","author":"A Graves","year":"2007","unstructured":"Graves A, Fern\u00e1ndez S, Schmidhuber J. Multi-dimensional recurrent neural networks. In: Proceedings of the International Conference on Artificial Neural Networks. 2007, 549\u2013558"},{"key":"7195_CR42","first-page":"625","volume-title":"Proceedings of IEEE International Conference on Multimedia and Expo","author":"L Lin","year":"2017","unstructured":"Lin L, Huang L, Chen T, Gan Y, Cheng H. Knowledge-guided recurrent neural network learning for task-oriented action prediction. In: Proceedings of IEEE International Conference on Multimedia and Expo. 2017, 625\u2013630"},{"issue":"8","key":"7195_CR43","doi-asserted-by":"crossref","first-page":"1915","DOI":"10.1109\/TPAMI.2012.231","volume":"35","author":"C Farabet","year":"2013","unstructured":"Farabet C, Couprie C, Najman L, LeCun Y. Learning hierarchical features for scene labeling. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2013, 35(8): 1915\u20131929","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"7195_CR44","first-page":"345","volume-title":"Proceedings of European Conference on Computer Vision","author":"S Gupta","year":"2014","unstructured":"Gupta S, Girshick R, Arbel\u00e1ez P, Malik J. Learning rich features from RGB-D images for object detection and segmentation. In: Proceedings of European Conference on Computer Vision. 2014, 345\u2013360"},{"issue":"9","key":"7195_CR45","doi-asserted-by":"crossref","first-page":"1360","DOI":"10.1109\/TIP.2005.852470","volume":"14","author":"F Ning","year":"2005","unstructured":"Ning F, Delhomme D, LeCun Y, Piano F, Bottou L, Barbano P E. Toward automatic phenotyping of developing embryos from videos. IEEE Transactions on Image Processing, 2005, 14(9): 1360\u20131371","journal-title":"IEEE Transactions on Image Processing"},{"issue":"12","key":"7195_CR46","doi-asserted-by":"crossref","first-page":"2402","DOI":"10.1109\/TPAMI.2015.2408360","volume":"37","author":"X Liang","year":"2015","unstructured":"Liang X, Liu S, Shen X, Yang J, Liu L, Dong J, Lin L, Yan S. Deep human parsing with active template regression. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2015, 37(12): 2402\u20132414","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"7195_CR47","first-page":"1386","volume-title":"Proceedings of IEEE International Conference on Computer Vision","author":"X Liang","year":"2015","unstructured":"Liang X, Xu C, Shen X, Yang J, Liu S, Tang J, Lin L, Yan S. Human parsing with contextualized convolutional neural network. In: Proceedings of IEEE International Conference on Computer Vision. 2015, 1386\u20131394"},{"key":"7195_CR48","first-page":"109","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"P Kr\u00e4henb\u00fchl","year":"2011","unstructured":"Kr\u00e4henb\u00fchl P, Koltun V. Efficientcient inference in fully connected CRFS with gaussian edge potentials. In: Proceedings of Advances in Neural Information Processing Systems. 2011, 109\u2013117"},{"key":"7195_CR49","first-page":"1520","volume-title":"Proceedings of IEEE International Conference on Computer Vision","author":"H Noh","year":"2015","unstructured":"Noh H, Hong S, Han B. Learning deconvolution network for semantic segmentation. In: Proceedings of IEEE International Conference on Computer Vision. 2015, 1520\u20131528"},{"key":"7195_CR50","volume-title":"Segnet: a deep convolutional encoder-decoder architecture for robust semantic pixel-wise labelling","author":"V Badrinarayanan","year":"2015","unstructured":"Badrinarayanan V, Handa A, Cipolla R. Segnet: a deep convolutional encoder-decoder architecture for robust semantic pixel-wise labelling. 2015, arXiv preprint arXiv:150507293"},{"key":"7195_CR51","first-page":"234","volume-title":"Proceedings of International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger O, Fischer P, Brox T. U-net: convolutional networks for biomedical image segmentation. In: Proceedings of International Conference on Medical Image Computing and Computer-Assisted Intervention. 2015, 234\u2013241"},{"key":"7195_CR52","volume-title":"Refinenet: multi-path refinement networks with identity mappings for high-resolution semantic segmentation","author":"G Lin","year":"2016","unstructured":"Lin G, Milan A, Shen C, Reid I. Refinenet: multi-path refinement networks with identity mappings for high-resolution semantic segmentation. 2016, arXiv preprint arXiv:161106612"},{"key":"7195_CR53","volume-title":"Rethinking atrous convolution for semantic image segmentation","author":"L C Chen","year":"2017","unstructured":"Chen L C, Papandreou G, Schroff F, Adam H. Rethinking atrous convolution for semantic image segmentation. 2017, arXiv preprint arXiv:170605587"},{"key":"7195_CR54","volume-title":"Multi-scale context aggregation by dilated convolutions","author":"F Yu","year":"2015","unstructured":"Yu F, Koltun V. Multi-scale context aggregation by dilated convolutions. 2015, arXiv preprint arXiv:151107122"},{"key":"7195_CR55","volume-title":"Not all pixels are equal: difficulty-aware semantic segmentation via deep layer cascade","author":"X Li","year":"2017","unstructured":"Li X, Liu Z, Luo P, Loy C C, Tang X. Not all pixels are equal: difficulty-aware semantic segmentation via deep layer cascade. 2017, arXiv preprint arXiv:170401344"},{"key":"7195_CR56","first-page":"693","volume-title":"Proceedings of International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"Y Zhou","year":"2017","unstructured":"Zhou Y, Xie L, Shen W, Wang Y, Fishman E K, Yuille A L. A fixedpoint model for pancreas segmentation in abdominal ct scans. In: Proceedings of International Conference on Medical Image Computing and Computer-Assisted Intervention. 2017, 693\u2013701"},{"key":"7195_CR57","first-page":"214","volume-title":"Proceedings of International Conference on Machine Learning","author":"Q Li","year":"2013","unstructured":"Li Q, Wang J, Wipf D, Tu Z. Fixed-point model for structured labeling. In: Proceedings of International Conference on Machine Learning. 2013, 214\u2013221"},{"key":"7195_CR58","first-page":"5859","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"G Wang","year":"2017","unstructured":"Wang G, Luo P, Lin L, Wang X. Learning object interactions and descriptions for semantic image segmentation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2017, 5859\u20135867"},{"key":"7195_CR59","first-page":"2718","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"P Luo","year":"2017","unstructured":"Luo P, Wang G, Lin L, Wang X. Deep dual learning for semantic image segmentation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2017, 2718\u20132726"},{"key":"7195_CR60","volume-title":"Fully connected deep structured networks","author":"A G Schwing","year":"2015","unstructured":"Schwing A G, Urtasun R. Fully connected deep structured networks. 2015, arXiv preprint arXiv:150302351"},{"key":"7195_CR61","first-page":"3182","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"W Yang","year":"2014","unstructured":"Yang W, Luo P, Lin L. Clothing co-parsing by joint image segmentation and labeling. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2014, 3182\u20133189"},{"key":"7195_CR62","first-page":"1144","volume-title":"Proceedings of International Conference on Pattern Recognition","author":"W Byeon","year":"2014","unstructured":"Byeon W, Liwicki M, Breuel T M. Texture classification using 2D LSTM networks. In: Proceedings of International Conference on Pattern Recognition. 2014, 1144\u20131149"},{"key":"7195_CR63","first-page":"2650","volume-title":"Proceedings of IEEE International Conference on Computer Vision","author":"D Eigen","year":"2015","unstructured":"Eigen D, Fergus R. Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture. In: Proceedings of IEEE International Conference on Computer Vision. 2015, 2650\u20132658"},{"key":"7195_CR64","first-page":"580","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"R Girshick","year":"2014","unstructured":"Girshick R, Donahue J, Darrell T, Malik J. Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2014, 580\u2013587"},{"key":"7195_CR65","volume-title":"Reinforcement learning for semantic segmentation in indoor scenes","author":"M Reza","year":"2016","unstructured":"Reza M, Kosecka J. Reinforcement learning for semantic segmentation in indoor scenes. 2016, arXiv preprint arXiv:160601178"},{"key":"7195_CR66","first-page":"1747","volume-title":"Proceedings of International Conference on Machine Learning","author":"A Oord Van","year":"2016","unstructured":"Van Oord A, Kalchbrenner N, Kavukcuoglu K. Pixel recurrent neural networks. In: Proceedings of International Conference on Machine Learning. 2016, 1747\u20131756"},{"key":"7195_CR67","volume-title":"Grid long short-term memory","author":"N Kalchbrenner","year":"2015","unstructured":"Kalchbrenner N, Danihelka I, Graves A. Grid long short-term memory. 2015, arXiv preprint arXiv:150701526"},{"key":"7195_CR68","first-page":"297","volume-title":"Proceedings of European Conference on Computer Vision","author":"B Hariharan","year":"2014","unstructured":"Hariharan B, Arbel\u00e1ez P, Girshick R, Malik J. Simultaneous detection and segmentation. In: Proceedings of European Conference on Computer Vision. 2014, 297\u2013312"},{"key":"7195_CR69","first-page":"633","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"X Liang","year":"2016","unstructured":"Liang X, Wei Y, Shen X, Jie Z, Feng J, Lin L, Yan S. Reversible recursive instance-level object segmentation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2016, 633\u2013641"},{"key":"7195_CR70","volume-title":"Proposal-free network for instance-level object segmentation","author":"X Liang","year":"2015","unstructured":"Liang X, Wei Y, Shen X, Yang J, Lin L, Yan S. Proposal-free network for instance-level object segmentation. 2015, arXiv preprint arXiv:150902636"},{"key":"7195_CR71","first-page":"539","volume-title":"Proceedings of International Conference on Machine Vision Applications","author":"F Abtahi","year":"2015","unstructured":"Abtahi F, Zhu Z, Burry AM. A deep reinforcement learning approach to character segmentation of license plate images. In: Proceedings of International Conference on Machine Vision Applications. 2015, 539\u2013542"},{"issue":"2","key":"7195_CR72","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1007\/s11263-015-0876-z","volume":"118","author":"L Lin","year":"2016","unstructured":"Lin L, Wang K, Zuo W, Wang M, Luo J, Zhang L. A deep structured model with radius\u2013margin bound for 3D human activity recognition. International Journal of Computer Vision, 2016, 118(2): 256\u2013273","journal-title":"International Journal of Computer Vision"},{"key":"7195_CR73","first-page":"447","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"B Hariharan","year":"2015","unstructured":"Hariharan B, Arbel\u00e1ez P, Girshick R, Malik J. Hypercolumns for object segmentation and fine-grained localization. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2015, 447\u2013456"},{"key":"7195_CR74","first-page":"3470","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"Y T Chen","year":"2015","unstructured":"Chen Y T, Liu X, Yang M H. Multi-instance object segmentation with occlusion handling. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2015, 3470\u20133478"},{"key":"7195_CR75","first-page":"328","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"P Arbel\u00e1ez","year":"2014","unstructured":"Arbel\u00e1ez P, Pont-Tuset J, Barron J T, Marques F, Malik J. Multiscale combinatorial grouping. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2014, 328\u2013335"},{"key":"7195_CR76","first-page":"247","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"G Li","year":"2017","unstructured":"Li G, Xie Y, Lin L, Yu Y. Instance-level salient object segmentation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2017, 247\u2013256"},{"key":"7195_CR77","first-page":"3150","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"J Dai","year":"2016","unstructured":"Dai J, He K, Sun J. Instance-aware semantic segmentation via multitask network cascades. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2016, 3150\u20133158"},{"key":"7195_CR78","first-page":"1440","volume-title":"Proceedings of IEEE International Conference on Computer Vision","author":"R Girshick","year":"2015","unstructured":"Girshick R. Fast r-cnn. In: Proceedings of IEEE International Conference on Computer Vision. 2015, 1440\u20131448"},{"key":"7195_CR79","first-page":"346","volume-title":"Proceedings of European Conference on Computer Vision","author":"K He","year":"2014","unstructured":"He K, Zhang X, Ren S, Sun J. Spatial pyramid pooling in deep convolutional networks for visual recognition. In: Proceedings of European Conference on Computer Vision. 2014, 346\u2013361"},{"key":"7195_CR80","first-page":"91","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"S Ren","year":"2015","unstructured":"Ren S, He K, Girshick R, Sun J. Faster R-CNN: towards real-time object detection with region proposal networks. In: Proceedings of Advances in Neural Information Processing Systems. 2015, 91\u201399"},{"key":"7195_CR81","first-page":"2274","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"A Newell","year":"2017","unstructured":"Newell A, Huang Z, Deng J. Associative embedding: end-to-end learning for joint detection and grouping. In: Proceedings of Advances in Neural Information Processing Systems. 2017, 2274\u20132284"},{"key":"7195_CR82","volume-title":"Learning dense convolutional embeddings for semantic segmentation","author":"A W Harley","year":"2015","unstructured":"Harley A W, Derpanis K G, Kokkinos I. Learning dense convolutional embeddings for semantic segmentation. 2015, arXiv preprint arXiv:151104377"},{"key":"7195_CR83","volume-title":"Semantic instance segmentation via deep metric learning","author":"A Fathi","year":"2017","unstructured":"Fathi A, Wojna Z, Rathod V, Wang P, Song H O, Guadarrama S, Murphy K P. Semantic instance segmentation via deep metric learning. 2017, arXiv preprint arXiv:170310277"},{"issue":"2","key":"7195_CR84","first-page":"1","volume":"2","author":"L Yang","year":"2006","unstructured":"Yang L, Jin R. Distance metric learning: a comprehensive survey. Michigan State Universiy, 2006, 2(2): 1\u201351","journal-title":"Michigan State Universiy"},{"key":"7195_CR85","first-page":"3190","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"J Xu","year":"2014","unstructured":"Xu J, Schwing A G, Urtasun R. Tell me what you see and I will show you where it is. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2014, 3190\u20133197"},{"issue":"4","key":"7195_CR86","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1093\/ijl\/3.4.235","volume":"3","author":"G A Miller","year":"1990","unstructured":"Miller G A, Beckwith R, Fellbaum C, Gross D, Miller K J. Introduction to wordnet: an on-line lexical database. International Journal of Lexicography, 1990, 3(4): 235\u2013244","journal-title":"International Journal of Lexicography"},{"key":"7195_CR87","first-page":"455","volume-title":"Proceedings of Annual Meeting of the Association for Computational Linguistics","author":"R Socher","year":"2013","unstructured":"Socher R, Bauer J, Manning C D, Ng A Y. Parsing with compositional vector grammars. In: Proceedings of Annual Meeting of the Association for Computational Linguistics. 2013, 455\u2013465"},{"issue":"2","key":"7195_CR88","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams C K, Winn J, Zisserman A. The pascal visual object classes (VOC) challenge. International Journal of Computer Vision, 2010, 88(2): 303\u2013338","journal-title":"International Journal of Computer Vision"},{"key":"7195_CR89","first-page":"1971","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"X Chen","year":"2014","unstructured":"Chen X, Mottaghi R, Liu X, Fidler S, Urtasun R, Yuille A L. Detect what you can: detecting and representing objects using holistic models and body parts. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2014, 1971\u20131978"},{"issue":"3","key":"7195_CR90","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M. Imagenet large scale visual recognition challenge. International Journal of Computer Vision, 2015, 115(3): 211\u2013252","journal-title":"International Journal of Computer Vision"},{"key":"7195_CR91","volume-title":"Semantic understanding of scenes through the ADE20K dataset","author":"B Zhou","year":"2016","unstructured":"Zhou B, Zhao H, Puig X, Fidler S, Barriuso A, Torralba A. Semantic understanding of scenes through the ADE20K dataset. 2016, arXiv preprint arXiv:160805442"},{"key":"7195_CR92","volume-title":"Microsoft COCO: common objects in context","author":"T Y Lin","year":"2015","unstructured":"Lin T Y, Maire M, Belongie S, Bourdev L, Girshick R, Hays J, Perona P, Ramanan D, Zitnick C L, Doll\u00e1r P. Microsoft COCO: common objects in context. 2015, arXiv preprint arXiv:14050312v3"},{"key":"7195_CR93","first-page":"740","volume-title":"Proceedings of European Conference on Computer Vision","author":"T Y Lin","year":"2014","unstructured":"Lin T Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick C L. Microsoft COCO: common objects in context. In: Proceedings of European Conference on Computer Vision. 2014, 740\u2013755"},{"key":"7195_CR94","first-page":"1972","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"C Liu","year":"2009","unstructured":"Liu C, Yuen J, Torralba A. Nonparametric scene parsing: label transfer via dense scene alignment. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2009, 1972\u20131979"},{"key":"7195_CR95","first-page":"746","volume-title":"Proceedings of European Conference on Computer Vision","author":"N Silberman","year":"2012","unstructured":"Silberman N, Hoiem D, Kohli P, Fergus R. Indoor segmentation and support inference from RGBD images. In: Proceedings of European Conference on Computer Vision. 2012, 746\u2013760"},{"key":"7195_CR96","first-page":"564","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"S Gupta","year":"2013","unstructured":"Gupta S, Arbelaez P, Malik J. Perceptual organization and recognition of indoor scenes from RGB-D images. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2013, 564\u2013571"},{"key":"7195_CR97","first-page":"567","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"S Song","year":"2015","unstructured":"Song S, Lichtenberg S P, Xiao J. Sun RGB-D: a RGB-D scene understanding benchmark suite. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2015, 567\u2013576"},{"key":"7195_CR98","volume-title":"Consumer Depth Cameras for Computer Vision","author":"A Janoch","year":"2013","unstructured":"Janoch A, Karayev S, Jia Y, Barron J T, Fritz M, Saenko K, Darrell T. A category-level 3D object dataset: putting the kinect to work. Consumer Depth Cameras for Computer Vision. London: Springer, 2013"},{"key":"7195_CR99","first-page":"1625","volume-title":"Proceedings of IEEE International Conference on Computer Vision","author":"J Xiao","year":"2013","unstructured":"Xiao J, Owens A, Torralba A. SUN3D: a database of big spaces reconstructed using SFM and object labels. In: Proceedings of IEEE International Conference on Computer Vision. 2013, 1625\u20131632"},{"key":"7195_CR100","first-page":"3570","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"K Yamaguchi","year":"2012","unstructured":"Yamaguchi K, Kiapour M H, Ortiz L E, Berg T L. Parsing clothing in fashion photographs. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2012, 3570\u20133577"},{"issue":"1","key":"7195_CR101","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1109\/TMM.2013.2285526","volume":"16","author":"S Liu","year":"2014","unstructured":"Liu S, Feng J, Domokos C, Xu H, Huang J, Hu Z, Yan S. Fashion parsing with weak color-category labels. IEEE Transactions on Multimedia, 2014, 16(1): 253\u2013265","journal-title":"IEEE Transactions on Multimedia"},{"key":"7195_CR102","first-page":"3408","volume-title":"Proceedings of IEEE International Conference on Computer Vision","author":"J Dong","year":"2013","unstructured":"Dong J, Chen Q, XiaW, Huang Z, Yan S. A deformable mixture parsing model with parselets. In: Proceedings of IEEE International Conference on Computer Vision. 2013, 3408\u20133415"},{"key":"7195_CR103","first-page":"3213","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"M Cordts","year":"2016","unstructured":"Cordts M, Omran M, Ramos S, Rehfeld T, Enzweiler M, Benenson R, Franke U, Roth S, Schiele B. The cityscapes dataset for semantic urban scene understanding. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition. 2016, 3213\u20133223"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11704-018-7195-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-018-7195-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-018-7195-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,29]],"date-time":"2019-08-29T23:59:06Z","timestamp":1567123146000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11704-018-7195-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,8,30]]},"references-count":103,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2018,10]]}},"alternative-id":["7195"],"URL":"https:\/\/doi.org\/10.1007\/s11704-018-7195-8","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,8,30]]},"assertion":[{"value":"8 June 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 March 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}