{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:39:32Z","timestamp":1740123572830,"version":"3.37.3"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"8-9","license":[{"start":{"date-parts":[[2019,12,9]],"date-time":"2019-12-09T00:00:00Z","timestamp":1575849600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,12,9]],"date-time":"2019-12-09T00:00:00Z","timestamp":1575849600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2020,9]]},"DOI":"10.1007\/s11263-019-01269-y","type":"journal-article","created":{"date-parts":[[2019,12,9]],"date-time":"2019-12-09T14:02:51Z","timestamp":1575900171000},"page":"2107-2125","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["SSN: Learning Sparse Switchable Normalization via SparsestMax"],"prefix":"10.1007","volume":"128","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3781-4086","authenticated-orcid":false,"given":"Wenqi","family":"Shao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingyu","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiamin","family":"Ren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruimao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaogang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ping","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,12,9]]},"reference":[{"key":"1269_CR1","unstructured":"Advani, M. S., & Saxe, A. M. (2017). High-dimensional dynamics of generalization error in neural networks. arXiv preprint arXiv:1710.03667."},{"key":"1269_CR2","unstructured":"Ba, J. L., Kiros, J. R., & Hinton, G. E. (2016). Layer normalization. arXiv preprint arXiv:1607.06450."},{"key":"1269_CR3","unstructured":"Bartlett, P. L., Maiorov, V., & Meir, R. (1999). Almost linear vc dimension bounds for piecewise polynomial networks. In Advances in neural information processing systems (pp. 190\u2013196)."},{"key":"1269_CR4","first-page":"1249","volume":"23","author":"JL Bentley","year":"1993","unstructured":"Bentley, J. L., & McIlroy, M. D. (1993). Engineering a sort function. Software: Practice and Experience, 23, 1249\u20131265.","journal-title":"Software: Practice and Experience"},{"key":"1269_CR5","volume-title":"Constrained optimization and Lagrange multiplier methods","author":"DP Bertsekas","year":"2014","unstructured":"Bertsekas, D. P. (2014). Constrained optimization and Lagrange multiplier methods. New York: Academic Press."},{"key":"1269_CR6","doi-asserted-by":"crossref","unstructured":"Carreira, J., & Zisserman, A. (2017). Quo vadis, action recognition? a new model and he kinetics dataset. In 2017 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 4724\u20134733). IEEE.","DOI":"10.1109\/CVPR.2017.502"},{"issue":"4","key":"1269_CR7","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2018","unstructured":"Chen, L. C., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A. L. (2018). Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(4), 834\u2013848.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1269_CR8","doi-asserted-by":"crossref","first-page":"575","DOI":"10.1007\/s10107-015-0946-6","volume":"158","author":"L Condat","year":"2016","unstructured":"Condat, L. (2016). Fast projection onto the simplex and the $$\\ell _1$$ ball. Mathematical Programming, 158, 575\u2013585.","journal-title":"Mathematical Programming"},{"key":"1269_CR9","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., & Schiele, B. (2016). The cityscapes dataset for semantic urban scene understanding. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.350"},{"key":"1269_CR10","unstructured":"Deng, J., Guo, J., Xue, N., & Zafeiriou, S. (2018). Arcface: Additive angular margin loss for deep face recognition. arXiv preprint arXiv:1801.07698."},{"key":"1269_CR11","unstructured":"Denton, E. L., Zaremba, W., Bruna, J., LeCun, Y., & Fergus, R. (2014). Exploiting linear structure within convolutional networks for efficient evaluation. In Advances in neural information processing systems (pp. 1269\u20131277)."},{"key":"1269_CR12","doi-asserted-by":"crossref","unstructured":"Girshick, R. (2015). Fast R-CNN. In Proceedings of the IEEE international conference on computer vision (pp. 1440\u20131448).","DOI":"10.1109\/ICCV.2015.169"},{"key":"1269_CR13","unstructured":"Girshick, R., Radosavovic, I., Gkioxari, G., Doll\u00e1r, P., & He, K. (2018). Detectron. https:\/\/github.com\/facebookresearch\/detectron."},{"key":"1269_CR14","unstructured":"Goyal, P., Doll\u00e1r, P., Girshick, R., Noordhuis, P., Wesolowski, L., Kyrola, A., Tulloch, A., Jia, Y., & He, K. (2017). Accurate, large minibatch SGD: training imagenet in 1 hour. arXiv preprint arXiv:1706.02677."},{"key":"1269_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask R-CNN. In Proceedings of the IEEE international conference on computer vision (pp. 2961\u20132969).","DOI":"10.1109\/ICCV.2017.322"},{"key":"1269_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"issue":"1","key":"1269_CR17","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1007\/BF01580223","volume":"6","author":"M Held","year":"1974","unstructured":"Held, M., Wolfe, P., & Crowder, H. P. (1974). Validation of subgradient optimization. Mathematical Programming, 6(1), 62\u201388.","journal-title":"Mathematical Programming"},{"issue":"2","key":"1269_CR18","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/0893-6080(91)90009-T","volume":"4","author":"K Hornik","year":"1991","unstructured":"Hornik, K. (1991). Approximation capabilities of multilayer feedforward networks. Neural Networks, 4(2), 251\u2013257.","journal-title":"Neural Networks"},{"issue":"5","key":"1269_CR19","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1016\/0893-6080(89)90020-8","volume":"2","author":"K Hornik","year":"1989","unstructured":"Hornik, K., Stinchcombe, M., & White, H. (1989). Multilayer feedforward networks are universal approximators. Neural Networks, 2(5), 359\u2013366.","journal-title":"Neural Networks"},{"key":"1269_CR20","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., & Weinberger, K. Q. (2017). Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (vol. 1, p. 3)."},{"key":"1269_CR21","unstructured":"Ioffe, S., & Szegedy, C. (2015). Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv preprint arXiv:1502.03167."},{"key":"1269_CR22","unstructured":"Jang, E., Gu, S., & Poole, B. (2016). Categorical reparameterization with gumbel-softmax. arXiv preprint arXiv:1611.01144."},{"key":"1269_CR23","unstructured":"Kay, W., Carreira, J., Simonyan, K., Zhang, B., Hillier, C., Vijayanarasimhan, S., Viola, F., Green, T., Back, T., Natsev, P., et al. (2017). The kinetics human action video dataset. arXiv preprint arXiv:1705.06950."},{"key":"1269_CR24","doi-asserted-by":"crossref","unstructured":"Kemelmacher-Shlizerman, I., Seitz, S. M., Miller, D., & Brossard, E. (2016). The megaface benchmark: 1 million faces for recognition at scale. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4873\u20134882).","DOI":"10.1109\/CVPR.2016.527"},{"key":"1269_CR25","unstructured":"Laurent, C., Pereyra, G., Brakel, P., Zhang, Y., & Bengio, Y. (2016). Batch normalized recurrent neural networks. 2016 IEEE international conference on acoustics (pp. 2657\u20132661). IEEE: Speech and Signal Processing (ICASSP)."},{"key":"1269_CR26","unstructured":"Li, Y., Wang, N., Shi, J., Liu, J., & Hou, X. (2016). Revisiting batch normalization for practical domain adaptation. arXiv preprint arXiv:1603.04779."},{"key":"1269_CR27","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., & Belongie, S. (2017), Feature pyramid networks for object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2117\u20132125)","DOI":"10.1109\/CVPR.2017.106"},{"key":"1269_CR28","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C. L. (2014). Microsoft coco: Common objects in context. In European conference on computer vision (pp. 740\u2013755.) Springer.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1269_CR29","unstructured":"Liu, H., Simonyan, K., & Yang, Y. (2018). Darts: Differentiable architecture search. arXiv preprint arXiv:1806.09055."},{"key":"1269_CR30","unstructured":"Liu, B., Wang, M., Foroosh, H., Tappen, M., & Pensky, M. (2015). Sparse convolutional neural networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 806\u2013814)."},{"key":"1269_CR31","unstructured":"Louizos, C., Welling, M., & Kingma, D. P. (2017). Learning sparse neural networks through $$ l\\_0 $$ regularization. arXiv preprint arXiv:1712.01312."},{"key":"1269_CR32","unstructured":"Luo, P., Ren, J., & Peng, Z. (2018). Differentiable learning-to-normalize via switchable normalization. arXiv preprint arXiv:1806.10779."},{"key":"1269_CR33","unstructured":"Luo, P., Wang, X., Shao, W., & Peng, Z. (2018). Understanding regularization in batch normalization. arXiv preprint arXiv:1809.00846."},{"key":"1269_CR34","doi-asserted-by":"crossref","unstructured":"Ma, N., Zhang, X., Zheng, H. T., & Sun, J. (2018). Shufflenet v2: Practical guidelines for efficient cnn architecture design. In Proceedings of the European conference on computer vision (ECCV) (pp. 116\u2013131).","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"1269_CR35","unstructured":"Maddison, C. J., Mnih, A., & Teh, Y. W. (2016). The concrete distribution: A continuous relaxation of discrete random variables. arXiv preprint arXiv:1611.00712."},{"key":"1269_CR36","unstructured":"Malaviya, C., Ferreira, P., & Martins, A. F. (2018). Sparse and constrained attention for neural machine translation. arXiv preprint arXiv:1805.08241."},{"key":"1269_CR37","unstructured":"Martins, A. F. T., & Astudillo, R. F. (2016). From softmax to sparsemax: A sparse model of attention and multi-label classification. CoRR arXiv:1602.02068."},{"key":"1269_CR38","doi-asserted-by":"crossref","unstructured":"Martins, A. F., & Kreutzer, J. (2017). Learning what\u2019s easy: Fully differentiable neural easy-first taggers. In Proceedings of the 2017 conference on empirical methods in natural language processing (pp. 349\u2013362).","DOI":"10.18653\/v1\/D17-1036"},{"key":"1269_CR39","unstructured":"Miyato, T., Kataoka, T., Koyama, M., & Yoshida, Y. (2018). Spectral normalization for generative adversarial networks. arXiv preprint arXiv:1802.05957."},{"key":"1269_CR40","unstructured":"Pan, X., Luo, P., Shi, J., & Tang, X. (2018). Two at once: Enhancing learning and generalization capacities via ibn-net. arXiv preprint arXiv:1807.09441."},{"key":"1269_CR41","unstructured":"Pascanu, R., Montufar, G., & Bengio, Y. (2013). On the number of response regions of deep feed forward networks with piece-wise linear activations. arXiv preprint arXiv:1312.6098."},{"key":"1269_CR42","unstructured":"Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., & Lerer, A. (2017). Automatic differentiation in pytorch. In NIPS-W."},{"key":"1269_CR43","unstructured":"Raghu, M., Poole, B., Kleinberg, J., Ganguli, S., & Dickstein, J. S. (2017). On the expressive power of deep neural networks. In Proceedings of the 34th international conference on machine learning\u2014Volume 70 (pp. 2847\u20132854). JMLR. org."},{"key":"1269_CR44","unstructured":"Real, E., Aggarwal, A., Huang, Y., & Le, Q. V. (2018). Regularized evolution for image classifier architecture search. arXiv preprint arXiv:1802.01548."},{"key":"1269_CR45","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster R-CNN: Towards real-time object detection with region proposal networks. In Advances in neural information processing systems (pp. 91\u201399)."},{"issue":"3","key":"1269_CR46","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., et al. (2015). Imagenet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211\u2013252.","journal-title":"International Journal of Computer Vision"},{"key":"1269_CR47","unstructured":"Salimans, T., & Kingma, D. P. (2016). Weight normalization: A simple reparameterization to accelerate training of deep neural networks. In Advances in neural information processing systems (pp. 901\u2013909)."},{"key":"1269_CR48","unstructured":"Santurkar, S., Tsipras, D., Ilyas, A., & Madry, A. (2018). How does batch normalization help optimization?(no, it is not about internal covariate shift). arXiv preprint arXiv:1805.11604."},{"key":"1269_CR49","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/j.neucom.2017.02.029","volume":"241","author":"S Scardapane","year":"2017","unstructured":"Scardapane, S., Comminiello, D., Hussain, A., & Uncini, A. (2017). Group sparse regularization for deep neural networks. Neurocomputing, 241, 81\u201389.","journal-title":"Neurocomputing"},{"key":"1269_CR50","unstructured":"Sun, S., Pang, J., Shi, J., Yi, S., & Ouyang, W. (2018). Fishnet: A versatile backbone for image, region, and pixel level prediction. In Advances in neural information processing systems (pp. 762\u2013772)."},{"key":"1269_CR51","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V., & Alemi, A. A. (2017). Inception-v4, inception-resnet and the impact of residual connections on learning. In Thirty-first AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"1269_CR52","unstructured":"Tai, C., Xiao, T., Zhang, Y., Wang, X., et al. (2015). Convolutional neural networks with low-rank regularization. arXiv preprint arXiv:1511.06067."},{"key":"1269_CR53","unstructured":"Tartaglione, E., Leps\u00f8y, S., Fiandrotti, A., & Francini, G. (2018), Learning sparse neural networks via sensitivity-driven regularization. In Advances in neural information processing systems (pp. 3882\u20133892)."},{"key":"1269_CR54","unstructured":"Teye, M., Azizpour, H., & Smith, K. (2018). Bayesian uncertainty estimation for batch normalized deep networks. arXiv preprint arXiv:1802.06455."},{"key":"1269_CR55","unstructured":"Ulyanov, D., Vedaldi, A., & Lempitsky, V. (2017), Instance normalization: the missing ingredient for fast stylization. cscv. arXiv preprint arXiv:1607.08022."},{"issue":"2","key":"1269_CR56","doi-asserted-by":"publisher","first-page":"890","DOI":"10.1137\/080714488","volume":"31","author":"E Van Den Berg","year":"2008","unstructured":"Van Den Berg, E., & Friedlander, M. P. (2008). Probing the pareto frontier for basis pursuit solutions. SIAM Journal on Scientific Computing, 31(2), 890\u2013912.","journal-title":"SIAM Journal on Scientific Computing"},{"key":"1269_CR57","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., & He, K. (2018). Non-local neural networks. In CVPR.","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1269_CR58","unstructured":"Wen, W., Wu, C., Wang, Y., Chen, Y., & Li, H. (2016). Learning structured sparsity in deep neural networks. In Advances in neural information processing systems (pp. 2074\u20132082)."},{"key":"1269_CR59","unstructured":"Wu, Y., & He, K. (2018). Group normalization. arXiv preprint arXiv:1803.08494."},{"key":"1269_CR60","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., & He, K. (2017). Aggregated residual transformations for deep neural networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1492\u20131500).","DOI":"10.1109\/CVPR.2017.634"},{"key":"1269_CR61","unstructured":"Xie, S., Zheng, H., Liu, C., & Lin, L. (2018) SNAS: stochastic neural architecture search. arXiv preprint arXiv:1812.09926."},{"key":"1269_CR62","unstructured":"Yang, G., Pennington, J., Rao, V., Sohl-Dickstein, J., Schoenholz, S. S. (2019). A mean field theory of batch normalization. arXiv preprint arXiv:1902.08129."},{"key":"1269_CR63","unstructured":"Zagoruyko, S., & Komodakis, N. (2016). Wide residual networks. arXiv preprint arXiv:1605.07146."},{"key":"1269_CR64","unstructured":"Zhang, C., Bengio, S., & Singer, Y. (2019). Are all layers created equal? arXiv preprint arXiv:1902.01996."},{"key":"1269_CR65","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, X., Lin, M., & Sun, J. (2018). Shufflenet: An extremely efficient convolutional neural network for mobile devices. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6848\u20136856).","DOI":"10.1109\/CVPR.2018.00716"},{"key":"1269_CR66","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., & Jia, J. (2017). Pyramid scene parsing network. In Proceedings of IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.660"},{"key":"1269_CR67","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., & Torralba, A. (2017). Scene parsing through ade20k dataset. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2017.544"},{"key":"1269_CR68","unstructured":"Zoph, B., Le, Q. V. (2016). Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578."},{"key":"1269_CR69","doi-asserted-by":"crossref","unstructured":"Zoph, B., Vasudevan, V., Shlens, J., & Le, Q. V. (2018). Learning transferable architectures for scalable image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 8697\u20138710).","DOI":"10.1109\/CVPR.2018.00907"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01269-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-019-01269-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01269-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,8]],"date-time":"2022-10-08T00:47:35Z","timestamp":1665190055000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-019-01269-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12,9]]},"references-count":69,"journal-issue":{"issue":"8-9","published-print":{"date-parts":[[2020,9]]}},"alternative-id":["1269"],"URL":"https:\/\/doi.org\/10.1007\/s11263-019-01269-y","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2019,12,9]]},"assertion":[{"value":"21 March 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 November 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}