{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T14:35:15Z","timestamp":1771598115415,"version":"3.50.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2022,6,9]],"date-time":"2022-06-09T00:00:00Z","timestamp":1654732800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,9]],"date-time":"2022-06-09T00:00:00Z","timestamp":1654732800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s10994-022-06182-z","type":"journal-article","created":{"date-parts":[[2022,6,9]],"date-time":"2022-06-09T23:03:36Z","timestamp":1654815816000},"page":"2193-2209","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Distilling ensemble of explanations for weakly-supervised pre-training of image segmentation models"],"prefix":"10.1007","volume":"112","author":[{"given":"Xuhong","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5451-3253","authenticated-orcid":false,"given":"Haoyi","family":"Xiong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dingfu","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zeyu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yaqing","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dejing","family":"Dou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,6,9]]},"reference":[{"key":"6182_CR1","doi-asserted-by":"crossref","unstructured":"Bau, D., Zhou, B., Khosla, A., Oliva, A., & Torralba, A. (2017). Network dissection: Quantifying interpretability of deep visual representations. transactions on pattern analysis and machine intelligence.","DOI":"10.1109\/CVPR.2017.354"},{"issue":"2","key":"6182_CR2","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1016\/j.patrec.2008.04.005","volume":"30","author":"GJ Brostow","year":"2009","unstructured":"Brostow, G. J., Fauqueur, J., & Cipolla, R. (2009). Semantic object classes in video: A high-definition ground truth database. Pattern Recognition Letters, 30(2), 88\u201397.","journal-title":"Pattern Recognition Letters"},{"key":"6182_CR3","doi-asserted-by":"crossref","unstructured":"Bulo, S. R., Neuhold, G., & Kontschieder, P. (2017) Loss max-pooling for semantic image segmentation. In Proceedings of the International Conference on Computer Vision.","DOI":"10.1109\/CVPR.2017.749"},{"key":"6182_CR4","unstructured":"Chen, L.-C., Papandreou, G., Schroff, F., & Adam, H. (2017). Rethinking Atrous Convolution for Semantic Image Segmentation"},{"key":"6182_CR5","unstructured":"Chen, C., Li, O., Tao, D., Barnett, A., Rudin, C., & Su, J.K. (2019). This looks like that: Deep learning for interpretable image recognition."},{"key":"6182_CR6","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., & Schiele, B. (2016). The cityscapes dataset for semantic urban scene understanding. In Proceedings of the Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2016.350"},{"key":"6182_CR7","doi-asserted-by":"crossref","unstructured":"Dai, J., He, K., & Sun, J. (2015). Boxsup: Exploiting bounding boxes to supervise convolutional networks for semantic segmentation. In Proceedings of the International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2015.191"},{"key":"6182_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., & Fei-Fei, L. (2009). Imagenet: A large-scale hierarchical image database. In Proceedings of the Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"6182_CR9","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2010). The pascal visual object classes (voc) challenge. International Journal of Computer Vision, 88, 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"key":"6182_CR10","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0713-9","author":"M Guillaumin","year":"2014","unstructured":"Guillaumin, M., K\u00fcttel, D., & Ferrari, V. (2014). Imagenet auto-annotation with segmentation propagation. International Journal of Computer Vision. https:\/\/doi.org\/10.1007\/s11263-014-0713-9","journal-title":"International Journal of Computer Vision"},{"key":"6182_CR11","doi-asserted-by":"crossref","unstructured":"Hariharan, B., Arbel\u00e1ez, P., Bourdev, L., Maji, S., & Malik, J. (2011). Semantic contours from inverse detectors. In Proceedings of the International Conference on Computer Vision","DOI":"10.1109\/ICCV.2011.6126343"},{"key":"6182_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2016.90"},{"key":"6182_CR13","unstructured":"Kim, J.- H., Choo, W., & Song, H. O. (2020). Puzzle mix: Exploiting saliency and local statistics for optimal mixup. In Proceedings of the International Conference on Machine Learning"},{"key":"6182_CR14","doi-asserted-by":"crossref","unstructured":"Kornblith, S., Shlens, J., & Le, Q. V. (2019). Do better imagenet models transfer better? In Proceedings of the Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2019.00277"},{"key":"6182_CR15","doi-asserted-by":"crossref","unstructured":"Lee, J., Kim, E., Lee, S., Lee, J., & Yoon, S. (2019). Ficklenet: Weakly and semi-supervised semantic image segmentation using stochastic inference. In Proceedings of the Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2019.00541"},{"key":"6182_CR16","unstructured":"Li, X., Xiong, H., Huang, S., Ji, S., & Dou, D. (2021). Cross-model consensus of explanations and beyond for image classification models: An empirical study."},{"key":"6182_CR17","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C. L. (2014). Microsoft COCO: Common objects in context. In Proceedings of the European Conference on Computer Vision.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"6182_CR18","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Goyal, P., Girshick, R., He, K., & Doll\u00e1r, P. (2017). Focal loss for dense object detection. In Proceedings of the International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2017.324"},{"key":"6182_CR19","doi-asserted-by":"crossref","unstructured":"Papandreou, G., Chen, L.- C., Murphy, K. P., & Yuille, A. L. (2015). Weakly-and semi-supervised learning of a deep convolutional network for semantic image segmentation. In Proceedings of the International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2015.203"},{"key":"6182_CR20","doi-asserted-by":"crossref","unstructured":"Ribeiro, M. T., Singh, S., Guestrin, C. (2016). \u201cwhy should i trust you?\u201d explaining the predictions of any classifier. In Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining.","DOI":"10.1145\/2939672.2939778"},{"key":"6182_CR21","doi-asserted-by":"crossref","unstructured":"Ribeiro, M.T., Singh, S., & Guestrin, C. (2018). Anchors: High-precision model-agnostic explanations. In Proceedings of the AAAI Conference on Artificial Intelligence.","DOI":"10.1609\/aaai.v32i1.11491"},{"key":"6182_CR22","doi-asserted-by":"crossref","unstructured":"Ross, A., & Doshi-Velez, F. (2018). Improving the adversarial robustness and interpretability of deep neural networks by regularizing their input gradients. In Proceedings of the AAAI Conference on Artificial Intelligence.","DOI":"10.1609\/aaai.v32i1.11504"},{"issue":"3","key":"6182_CR23","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1145\/1015706.1015720","volume":"23","author":"C Rother","year":"2004","unstructured":"Rother, C., Kolmogorov, V., & Blake, A. (2004). \u201cgrabcut\" interactive foreground extraction using iterated graph cuts. Transactions on Graphics, 23(3), 309\u2013314.","journal-title":"Transactions on Graphics"},{"key":"6182_CR24","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., & Batra, D. (2017). Grad-cam: Visual explanations from deep networks via gradient-based localization. In Proceedings of the International Conference on Computer Vision","DOI":"10.1109\/ICCV.2017.74"},{"key":"6182_CR25","unstructured":"Shrikumar, A., Greenside, P., & undaje, A. (2017) Learning important features through propagating activation differences. In Proceedings of the International Conference on Machine Learning."},{"key":"6182_CR26","unstructured":"Smilkov, D., Thorat, N., Kim, B., Vi\u00e9gas, F., & Wattenberg, M. (2017). SmoothGrad: removing noise by adding noise"},{"key":"6182_CR27","unstructured":"Sundararajan, M., Taly, A., & Yan, Q. (2017). Axiomatic attribution for deep networks. In Proceedings of the International Conference on Machine Learning."},{"issue":"10","key":"6182_CR28","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2020","unstructured":"Wang, J., Sun, K., Cheng, T., Jiang, B., Deng, C., Zhao, Y., Liu, D., Mu, Y., Tan, M., Wang, X., et al. (2020). Deep high-resolution representation learning for visual recognition. Transactions on Pattern Analysis and Machine Intelligence, 43(10), 3349\u20133364.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence"},{"key":"6182_CR29","doi-asserted-by":"crossref","unstructured":"Wei, Y., Xiao, H., Shi, H., Jie, Z., Feng, J., & Huang, T.S. (2018). Revisiting dilated convolution: A simple approach for weakly-and semi-supervised semantic segmentation. In Proceedings of the Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2018.00759"},{"key":"6182_CR30","unstructured":"Yu, F., & Koltun, V. (2016). Multi-scale context aggregation by dilated convolutions. In Proceedings of the International Conference on Learning Representations."},{"key":"6182_CR31","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Chen, X., & Wang, J. (2020). Object-contextual representations for semantic segmentation. In Proceedings of the European Conference on Computer Vision.","DOI":"10.1007\/978-3-030-58539-6_11"},{"key":"6182_CR32","unstructured":"Zagoruyko, S., & Komodakis, N. (2016). Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer. In Proceedings of the International Conference on Learning Representations."},{"key":"6182_CR33","unstructured":"Zhang, H., Wu, C., Zhang, Z., Zhu, Y., Lin, H., Zhang, Z., Sun, Y., He, T., Mueller, J., Manmatha, R., Li, M., & Smola, A. (2020). ResNeSt: Split-Attention Networks."},{"key":"6182_CR34","unstructured":"Zhang, H., Cisse, M., Dauphin, Y. N., & Lopez-Paz, D. (2018). mixup: Beyond empirical risk minimization. In Proceedings of the International Conference on Learning Representations."},{"key":"6182_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, B., Xiao, J., Wei, Y., Sun, M., & Huang, K. (2020). Reliability does matter: An end-to-end weakly supervised semantic segmentation approach. In Proceedings of the AAAI Conference on Artificial Intelligence.","DOI":"10.1609\/aaai.v34i07.6971"},{"key":"6182_CR36","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., & Jia, J. (2017). Pyramid scene parsing network. In Proceedings of the Conference on Computer Vision and Pattern Recognition","DOI":"10.1109\/CVPR.2017.660"},{"key":"6182_CR37","doi-asserted-by":"crossref","unstructured":"Zheng, S., Jayasumana, S., Romera-Paredes, B., Vineet, V., Su, Z., Du, D., Huang, C., & Torr, P.H. (2015). Conditional random fields as recurrent neural networks. In Proceedings of the International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2015.179"},{"key":"6182_CR38","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., & Torralba, A. (2017) Scene parsing through ade20k dataset. In Proceedings of the Conference on Computer Vision and Pattern Recognition","DOI":"10.1109\/CVPR.2017.544"},{"key":"6182_CR39","unstructured":"Zoph, B., Ghiasi, G., Lin, T.-Y., Cui, Y., Liu, H., Cubuk, E.D., & Le, Q. (2020). Rethinking pre-training and self-training."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-022-06182-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-022-06182-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-022-06182-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,9]],"date-time":"2023-06-09T00:04:03Z","timestamp":1686269043000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-022-06182-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,9]]},"references-count":39,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["6182"],"URL":"https:\/\/doi.org\/10.1007\/s10994-022-06182-z","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,9]]},"assertion":[{"value":"11 November 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 January 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 May 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 June 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable (all based on public available open-source datasets)","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable (all based on public available open-source datasets)","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable (all based on public available open-source datasets)","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}