{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T16:28:44Z","timestamp":1778257724520,"version":"3.51.4"},"publisher-location":"Cham","reference-count":99,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585471","type":"print"},{"value":"9783030585488","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58548-8_7","type":"book-chapter","created":{"date-parts":[[2020,10,28]],"date-time":"2020-10-28T23:02:42Z","timestamp":1603926162000},"page":"108-126","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":509,"title":["Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic Segmentation"],"prefix":"10.1007","author":[{"given":"Huiyu","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yukun","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bradley","family":"Green","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hartwig","family":"Adam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alan","family":"Yuille","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang-Chieh","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,10,29]]},"reference":[{"key":"7_CR1","unstructured":"Abadi, M., et al.: Tensorflow: a system for large-scale machine learning. In: Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation (2016)"},{"issue":"1","key":"7_CR2","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1207\/s15516709cog0901_7","volume":"9","author":"DH Ackley","year":"1985","unstructured":"Ackley, D.H., Hinton, G.E., Sejnowski, T.J.: A learning algorithm for boltzmann machines. Cogn. Sci. 9(1), 147\u2013169 (1985)","journal-title":"Cogn. Sci."},{"key":"7_CR3","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv:1409.0473 (2014)"},{"key":"7_CR4","doi-asserted-by":"crossref","unstructured":"Bai, M., Urtasun, R.: Deep watershed transform for instance segmentation. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.305"},{"key":"7_CR5","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1016\/0031-3203(81)90009-1","volume":"3","author":"DH Ballard","year":"1981","unstructured":"Ballard, D.H.: Generalizing the hough transform to detect arbitrary shapes. Pattern Recogn. 3, 111\u2013122 (1981)","journal-title":"Pattern Recogn."},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Bello, I., Zoph, B., Vaswani, A., Shlens, J., Le, Q.V.: Attention augmented convolutional networks. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00338"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Bonde, U., Alcantarilla, P.F., Leutenegger, S.: Towards bounding-box free panoptic segmentation. arXiv:2002.07705 (2020)","DOI":"10.1007\/978-3-030-71278-5_23"},{"key":"7_CR8","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. In: ICLR (2019)"},{"key":"7_CR9","unstructured":"Buades, A., Coll, B., Morel, J.M.: A non-local algorithm for image denoising. In: CVPR (2005)"},{"key":"7_CR10","doi-asserted-by":"crossref","unstructured":"Chan, W., Jaitly, N., Le, Q., Vinyals, O.: Listen, attend and spell: a neural network for large vocabulary conversational speech recognition. In: ICASSP (2016)","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"7_CR11","unstructured":"Chen, L.C., et al.: Searching for efficient multi-scale architectures for dense image prediction. In: NeurIPS (2018)"},{"key":"7_CR12","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Semantic image segmentation with deep convolutional nets and fully connected CRFs. In: ICLR (2015)"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE TPAMI (2017)","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"7_CR14","unstructured":"Chen, L.C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv:1706.05587 (2017)"},{"key":"7_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-030-01234-2_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 833\u2013851. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49"},{"key":"7_CR16","unstructured":"Chen, Q., Cheng, A., He, X., Wang, P., Cheng, J.: SpatialFlow: bridging all tasks for panoptic segmentation. arXiv:1910.08787 (2019)"},{"key":"7_CR17","unstructured":"Chen, Y., Kalantidis, Y., Li, J., Yan, S., Feng, J.: A$$\\hat{\\,}$$ 2-nets: double attention networks. In: NeurIPS (2018)"},{"key":"7_CR18","unstructured":"Cheng, B., et al.: Panoptic-deeplab. In: ICCV COCO + Mapillary Joint Recognition Challenge Workshop (2019)"},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Cheng, B., et al.: Panoptic-deeplab: a simple, strong, and fast baseline for bottom-up panoptic segmentation. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01249"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Chollet, F.: Xception: deep learning with depthwise separable convolutions. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.195"},{"key":"7_CR21","unstructured":"Chorowski, J.K., Bahdanau, D., Serdyuk, D., Cho, K., Bengio, Y.: Attention-based models for speech recognition. In: NeurIPS (2015)"},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Dai, J., et al.: Deformable convolutional networks. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Dai, Z., Yang, Z., Yang, Y., Carbonell, J.G., Le, Q., Salakhutdinov, R.: Transformer-XL: Attentive language models beyond a fixed-length context. In: ACL (2019)","DOI":"10.18653\/v1\/P19-1285"},{"key":"7_CR25","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv:1810.04805 (2018)"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"Fu, J., et al.: Dual attention network for scene segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00326"},{"key":"7_CR27","unstructured":"Gao, H., Zhu, X., Lin, S., Dai, J.: Deformable kernels: adapting effective receptive fields for object deformation. arXiv:1910.02940 (2019)"},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Gao, N., et al.: SSAP: single-shot instance segmentation with affinity pyramid. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00073"},{"key":"7_CR29","unstructured":"Goyal, P., et al.: Accurate, large minibatch SGD: training imagenet in 1 hour. arXiv:1706.02677 (2017)"},{"key":"7_CR30","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"7_CR32","unstructured":"Ho, J., Kalchbrenner, N., Weissenborn, D., Salimans, T.: Axial attention in multidimensional transformers. arXiv:1912.12180 (2019)"},{"key":"7_CR33","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1007\/978-3-642-75988-8_28","volume-title":"Wavelets","author":"M Holschneider","year":"1990","unstructured":"Holschneider, M., Kronland-Martinet, R., Morlet, J., Tchamitchian, P.: A real-time algorithm for signal analysis with the help of the wavelet transform. In: Combes, J.M., Grossmann, A., Tchamitchian, P. (eds.) Wavelets, pp. 286\u2013297. Springer, Heidelberg (1990). https:\/\/doi.org\/10.1007\/978-3-642-75988-8_28"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Howard, A., et al.: Searching for mobilenetv3. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"7_CR35","unstructured":"Howard, A.G., et al.: MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv:1704.04861 (2017)"},{"key":"7_CR36","doi-asserted-by":"crossref","unstructured":"Hu, H., Gu, J., Zhang, Z., Dai, J., Wei, Y.: Relation networks for object detection. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00378"},{"key":"7_CR37","doi-asserted-by":"crossref","unstructured":"Hu, H., Zhang, Z., Xie, Z., Lin, S.: Local relation networks for image recognition. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00356"},{"key":"7_CR38","unstructured":"Huang, C.A., et al.: Music transformer: Generating music with long-term structure. In: ICLR (2019)"},{"key":"7_CR39","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wang, X., Huang, L., Huang, C., Wei, Y., Liu, W.: CCNet: criss-cross attention for semantic segmentation. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00069"},{"key":"7_CR40","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: ICML (2015)"},{"key":"7_CR41","doi-asserted-by":"crossref","unstructured":"Jaderberg, M., Vedaldi, A., Zisserman, A.: Speeding up convolutional neural networks with low rank expansions. In: BMVC (2014)","DOI":"10.5244\/C.28.88"},{"key":"7_CR42","unstructured":"Kendall, A., Gal, Y., Cipolla, R.: Multi-task learning using uncertainty to weigh losses for scene geometry and semantics. In: CVPR (2018)"},{"key":"7_CR43","doi-asserted-by":"crossref","unstructured":"Keuper, M., Levinkov, E., Bonneel, N., Lavou\u00e9, G., Brox, T., Andres, B.: Efficient decomposition of image and mesh graphs by lifted multicuts. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.204"},{"key":"7_CR44","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Girshick, R., He, K., Doll\u00e1r, P.: Panoptic feature pyramid networks. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00656"},{"key":"7_CR45","doi-asserted-by":"crossref","unstructured":"Kirillov, A., He, K., Girshick, R., Rother, C., Doll\u00e1r, P.: Panoptic segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00963"},{"key":"7_CR46","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: NeurIPS (2012)"},{"issue":"11","key":"7_CR47","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"7_CR48","unstructured":"Leibe, B., Leonardis, A., Schiele, B.: Combined object categorization and segmentation with an implicit shape model. In: Workshop on Statistical Learning in Computer Vision, ECCV (2004)"},{"key":"7_CR49","unstructured":"Li, J., Raventos, A., Bhargava, A., Tagawa, T., Gaidon, A.: Learning to fuse things and stuff. arXiv:1812.01192 (2018)"},{"key":"7_CR50","doi-asserted-by":"crossref","unstructured":"Li, Q., Qi, X., Torr, P.H.: Unifying training and inference for panoptic segmentation. arXiv:2001.04982 (2020)","DOI":"10.1109\/CVPR42600.2020.01333"},{"key":"7_CR51","unstructured":"Li, X., Zhao, H., Han, L., Tong, Y., Yang, K.: GFF: gated fully fusion for semantic segmentation. arXiv:1904.01803 (2019)"},{"key":"7_CR52","doi-asserted-by":"crossref","unstructured":"Li, Y., Chen, X., Zhu, Z., Xie, L., Huang, G., Du, D., Wang, X.: Attention-guided unified network for panoptic segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00719"},{"key":"7_CR53","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: Neural architecture search for lightweight non-local networks. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01031"},{"key":"7_CR54","doi-asserted-by":"crossref","unstructured":"Liang, J., Homayounfar, N., Ma, W.C., Xiong, Y., Hu, R., Urtasun, R.: PolyTransform: deep polygon transformer for instance segmentation. arXiv:1912.02801 (2019)","DOI":"10.1109\/CVPR42600.2020.00915"},{"key":"7_CR55","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"7_CR56","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"7_CR57","doi-asserted-by":"crossref","unstructured":"Liu, C., et al.: Auto-deeplab: Hierarchical neural architecture search for semantic image segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00017"},{"key":"7_CR58","unstructured":"Liu, L., et al.: On the variance of the adaptive learning rate and beyond. In: ICLR (2020)"},{"key":"7_CR59","doi-asserted-by":"crossref","unstructured":"Liu, S., Qi, L., Qin, H., Shi, J., Jia, J.: Path aggregation network for instance segmentation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00913"},{"key":"7_CR60","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"708","DOI":"10.1007\/978-3-030-01219-9_42","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Y Liu","year":"2018","unstructured":"Liu, Y., et al.: Affinity derivation and graph merge for instance segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11207, pp. 708\u2013724. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01219-9_42"},{"key":"7_CR61","doi-asserted-by":"crossref","unstructured":"Liu1, H., et al.: An end-to-end network for panoptic segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00633"},{"key":"7_CR62","doi-asserted-by":"crossref","unstructured":"Neuhold, G., Ollmann, T., Rota Bulo, S., Kontschieder, P.: The mapillary vistas dataset for semantic understanding of street scenes. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.534"},{"key":"7_CR63","doi-asserted-by":"crossref","unstructured":"Neven, D., Brabandere, B.D., Proesmans, M., Gool, L.V.: Instance segmentation by jointly optimizing spatial embeddings and clustering bandwidth. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00904"},{"key":"7_CR64","doi-asserted-by":"crossref","unstructured":"Papandreou, G., Kokkinos, I., Savalle, P.A.: Modeling local and global deformations in deep learning: epitomic convolution, multiple instance learning, and sliding window detection. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298636"},{"key":"7_CR65","unstructured":"Parmar, N., Ramachandran, P., Vaswani, A., Bello, I., Levskaya, A., Shlens, J.: Stand-alone self-attention in vision models. In: NeurIPS (2019)"},{"key":"7_CR66","unstructured":"Parmar, N., et al.: Image transformer. In: ICML (2018)"},{"key":"7_CR67","doi-asserted-by":"crossref","unstructured":"Peng, C., Zhang, X., Yu, G., Luo, G., Sun, J.: Large kernel matters-improve semantic segmentation by global convolutional network. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.189"},{"key":"7_CR68","doi-asserted-by":"crossref","unstructured":"Porzi, L., Bul\u00f2, S.R., Colovic, A., Kontschieder, P.: Seamless scene segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00847"},{"key":"7_CR69","unstructured":"Qi, H., et al.: Deformable convolutional networks - COCO detection and segmentation challenge 2017 entry. In: ICCV COCO Challenge Workshop (2017)"},{"key":"7_CR70","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. IJCV 115, 211\u2013252 (2015)","journal-title":"IJCV"},{"key":"7_CR71","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: MobileNetV2: inverted residuals and linear bottlenecks. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"7_CR72","doi-asserted-by":"crossref","unstructured":"Shaw, P., Uszkoreit, J., Vaswani, A.: Self-attention with relative position representations. In: NAACL (2018)","DOI":"10.18653\/v1\/N18-2074"},{"key":"7_CR73","unstructured":"Shen, Z., Zhang, M., Zhao, H., Yi, S., Li, H.: Efficient attention: attention with linear complexities. arXiv:1812.01243 (2018)"},{"issue":"10","key":"7_CR74","doi-asserted-by":"publisher","first-page":"2464","DOI":"10.1109\/78.157290","volume":"40","author":"MJ Shensa","year":"1992","unstructured":"Shensa, M.J.: The discrete wavelet transform: wedding the a trous and mallat algorithms. IEEE Trans. Signal Process. 40(10), 2464\u20132482 (1992)","journal-title":"IEEE Trans. Signal Process."},{"key":"7_CR75","unstructured":"Sifre, L.: Rigid-motion scattering for image classification. Ph.D. thesis (2014)"},{"key":"7_CR76","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556 (2014)"},{"key":"7_CR77","doi-asserted-by":"crossref","unstructured":"Sofiiuk, K., Barinova, O., Konushin, A.: AdaptiS: adaptive instance selection network. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00745"},{"key":"7_CR78","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"7_CR79","doi-asserted-by":"crossref","unstructured":"Uhrig, J., Rehder, E., Fr\u00f6hlich, B., Franke, U., Brox, T.: Box2pix: single-shot instance segmentation by assigning pixels to object boxes. In: IEEE Intelligent Vehicles Symposium (IV) (2018)","DOI":"10.1109\/IVS.2018.8500621"},{"key":"7_CR80","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS (2017)"},{"key":"7_CR81","doi-asserted-by":"crossref","unstructured":"Vincent, L., Soille, P.: Watersheds in digital spaces: an efficient algorithm based on immersion simulations. IEEE TPAMI (1991)","DOI":"10.1109\/34.87344"},{"key":"7_CR82","doi-asserted-by":"crossref","unstructured":"Wang, H., Kembhavi, A., Farhadi, A., Yuille, A.L., Rastegari, M.: Elastic: improving CNNs with dynamic scaling policies. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00236"},{"key":"7_CR83","unstructured":"Wang, J., et al.: Deep high-resolution representation learning for visual recognition. arXiv:1908.07919 (2019)"},{"key":"7_CR84","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"7_CR85","unstructured":"Wu, Y., et al.: Google\u2019s neural machine translation system: bridging the gap between human and machine translation. arXiv:1609.08144 (2016)"},{"key":"7_CR86","doi-asserted-by":"crossref","unstructured":"Xie, C., Wu, Y., Maaten, L.v.d., Yuille, A.L., He, K.: Feature denoising for improving adversarial robustness. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00059"},{"key":"7_CR87","doi-asserted-by":"crossref","unstructured":"Xiong, Y., et al.: UPSNet: a unified panoptic segmentation network. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00902"},{"key":"7_CR88","unstructured":"Xu, K., et al.: Show, attend and tell: Neural image caption generation with visual attention. In: ICML (2015)"},{"key":"7_CR89","unstructured":"Yang, T.J., et al.: DeeperLab: single-shot image parser. arXiv:1902.05093 (2019)"},{"key":"7_CR90","doi-asserted-by":"crossref","unstructured":"Yang, Y., Li, H., Li, X., Zhao, Q., Wu, J., Lin, Z.: SOGNet: scene overlap graph network for panoptic segmentation. arXiv:1911.07527 (2019)","DOI":"10.1609\/aaai.v34i07.6955"},{"key":"7_CR91","unstructured":"Zhang, H., Goodfellow, I., Metaxas, D., Odena, A.: Self-attention generative adversarial networks. arXiv:1805.08318 (2018)"},{"key":"7_CR92","unstructured":"Zhang, M., Lucas, J., Ba, J., Hinton, G.E.: Lookahead optimizer: k steps forward, 1 step back. In: NeurIPS (2019)"},{"key":"7_CR93","unstructured":"Zhang, R.: Making convolutional networks shift-invariant again. In: ICML (2019)"},{"key":"7_CR94","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"7_CR95","doi-asserted-by":"crossref","unstructured":"Zhu, X., Cheng, D., Zhang, Z., Lin, S., Dai, J.: An empirical study of spatial attention mechanisms in deep networks. In: ICCV, pp. 6688\u20136697 (2019)","DOI":"10.1109\/ICCV.2019.00679"},{"key":"7_CR96","doi-asserted-by":"crossref","unstructured":"Zhu, X., Hu, H., Lin, S., Dai, J.: Deformable ConvNets v2: more deformable, better results. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00953"},{"key":"7_CR97","doi-asserted-by":"crossref","unstructured":"Zhu, Y., et al.: Improving semantic segmentation via video propagation and label relaxation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00906"},{"key":"7_CR98","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Xu, M., Bai, S., Huang, T., Bai, X.: Asymmetric non-local neural networks for semantic segmentation. In: CVPR (2019)","DOI":"10.1109\/ICCV.2019.00068"},{"key":"7_CR99","unstructured":"Zoph, B., Le, Q.V.: Neural architecture search with reinforcement learning. In: ICLR (2017)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58548-8_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:04:50Z","timestamp":1730160290000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58548-8_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585471","9783030585488"],"references-count":99,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58548-8_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}