{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T19:32:59Z","timestamp":1774121579790,"version":"3.50.1"},"reference-count":217,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,6,16]],"date-time":"2021-06-16T00:00:00Z","timestamp":1623801600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,6,16]],"date-time":"2021-06-16T00:00:00Z","timestamp":1623801600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1007\/s42979-021-00735-0","type":"journal-article","created":{"date-parts":[[2021,6,16]],"date-time":"2021-06-16T04:09:17Z","timestamp":1623816557000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":61,"title":["CNN Architectures for Geometric Transformation-Invariant Feature Representation in Computer Vision: A Review"],"prefix":"10.1007","volume":"2","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6103-1925","authenticated-orcid":false,"given":"Alhassan","family":"Mumuni","sequence":"first","affiliation":[]},{"given":"Fuseini","family":"Mumuni","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,16]]},"reference":[{"key":"735_CR1","doi-asserted-by":"crossref","unstructured":"Alcorn MA, Li Q, Gong Z, Wang C, Mai L, Ku WS, et al. Strike (with) a pose: neural networks are easily fooled by strange poses of familiar objects. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2019. pp. 4845\u201354.","DOI":"10.1109\/CVPR.2019.00498"},{"key":"735_CR2","doi-asserted-by":"crossref","unstructured":"Lenc K, Vedaldi A. Understanding image representations by measuring their equivariance and equivalence. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2015. pp. 991\u201399.","DOI":"10.1109\/CVPR.2015.7298701"},{"issue":"1","key":"735_CR3","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1109\/T-C.1973.223602","volume":"100","author":"MA Fischler","year":"1973","unstructured":"Fischler MA, Elschlager RA. The representation and matching of pictorial structures. IEEE Trans Comput. 1973;100(1):67\u201392.","journal-title":"IEEE Trans Comput"},{"key":"735_CR4","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/11957959_1","volume-title":"Object recognition in the geometric era: A retrospective. In: Toward category-level object recognition","author":"JL Mundy","year":"2006","unstructured":"Mundy JL. Object recognition in the geometric era: A retrospective. In: Toward category-level object recognition. Springer; 2006. p. 3\u201328."},{"key":"735_CR5","unstructured":"Alom MZ, Taha TM, Yakopcic C, Westberg S, Sidike P, Nasrin MS, et al. The history began from alexnet: a comprehensive survey on deep learning approaches. arXiv preprint. 2018. arXiv:1803.01164."},{"key":"735_CR6","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J. Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2016. p.p 770\u201378.","DOI":"10.1109\/CVPR.2016.90"},{"issue":"6","key":"735_CR7","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE. Imagenet classification with deep convolutional neural networks. Commun ACM. 2017;60(6):84\u201390.","journal-title":"Commun ACM"},{"key":"735_CR8","doi-asserted-by":"publisher","unstructured":"Voulodimos A, Doulamis N, Doulamis A, Protopapadakis E. Deep learning for computer vision: a brief review. Comput Intell Neurosci. 2018;2018:1\u201313. https:\/\/doi.org\/10.1155\/2018\/7068349.","DOI":"10.1155\/2018\/7068349"},{"key":"735_CR9","first-page":"267","volume-title":"Neocognitron: a self- organizing neural network model for a mechanism of visual pattern recognition. In: Competition and cooperation in neural nets","author":"K Fukushima","year":"1982","unstructured":"Fukushima K, Miyake S. Neocognitron: a self- organizing neural network model for a mechanism of visual pattern recognition. In: Competition and cooperation in neural nets. Springer; 1982. p. 267\u201385."},{"issue":"11","key":"735_CR10","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Bengio Y, Haffner P. Gradient- based learning applied to document recognition. Proc IEEE. 1998;86(11):2278\u2013324.","journal-title":"Proc IEEE"},{"key":"735_CR11","unstructured":"Hinton GE, Srivastava N, Krizhevsky A, Sutskever I, Salakhutdinov RR. Improving neural networks by preventing co-adaptation of feature detectors. arXiv preprint. 2012. arXiv:1207.0580."},{"key":"735_CR12","first-page":"433","volume-title":"Learning rate adaptation in stochastic gradient descent. In: Advances in convex analysis and global optimization","author":"V Plagianakos","year":"2001","unstructured":"Plagianakos V, Magoulas G, Vrahatis M. Learning rate adaptation in stochastic gradient descent. In: Advances in convex analysis and global optimization. Springer; 2001. p. 433\u201344."},{"key":"735_CR13","volume-title":"Rectified linear units improve restricted boltzmann machines","author":"V Nair","year":"2010","unstructured":"Nair V, Hinton GE. Rectified linear units improve restricted boltzmann machines. Berlin: ICML; 2010."},{"key":"735_CR14","first-page":"92","volume-title":"Evaluation of pooling operations in convolutional architectures for object recognition. In: International conference on artificial neural networks","author":"D Scherer","year":"2010","unstructured":"Scherer D, M\u00fcller A, Behnke S. Evaluation of pooling operations in convolutional architectures for object recognition. In: International conference on artificial neural networks. Springer; 2010. p. 92\u2013101."},{"key":"735_CR15","unstructured":"Moody J, Hanson S, Krogh A, Hertz JA. A simple weight decay can improve generalization. Adv Neural Inf Process Syst. 1992;4:950\u201357."},{"issue":"1","key":"735_CR16","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1113\/jphysiol.1962.sp006837","volume":"160","author":"DH Hubel","year":"1962","unstructured":"Hubel DH, Wiesel TN. Receptive fields, binocular interaction and functional architecture in the cat\u2019s visual cortex. J Physiol. 1962;160(1):106.","journal-title":"J Physiol"},{"issue":"11","key":"735_CR17","doi-asserted-by":"publisher","first-page":"1019","DOI":"10.1038\/14819","volume":"2","author":"M Riesenhuber","year":"1999","unstructured":"Riesenhuber M, Poggio T. Hierarchical models of object recognition in cortex. Nat Neurosci. 1999;2(11):1019\u201325.","journal-title":"Nat Neurosci"},{"key":"735_CR18","first-page":"392","volume-title":"Multi-scale orderless pooling of deep convolutional activation features. In: European conference on computer vision","author":"Y Gong","year":"2014","unstructured":"Gong Y, Wang L, Guo R, Lazebnik S. Multi-scale orderless pooling of deep convolutional activation features. In: European conference on computer vision. Springer; 2014. p. 392\u2013407."},{"key":"735_CR19","doi-asserted-by":"publisher","first-page":"32672","DOI":"10.1038\/srep32672","volume":"6","author":"SR Kheradpisheh","year":"2016","unstructured":"Kheradpisheh SR, Ghodrati M, Ganjtabesh M, Masquelier T. Deep networks can resemble human feed-forward vision in invariant object recognition. Sci Rep. 2016;6:32672.","journal-title":"Sci Rep"},{"issue":"2","key":"735_CR20","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/0893-6080(88)90014-7","volume":"1","author":"K Fukushima","year":"1988","unstructured":"Fukushima K. Neocognitron: a hierarchical neural network capable of visual pattern recognition. Neural Netw. 1988;1(2):119\u201330.","journal-title":"Neural Netw"},{"issue":"2","key":"735_CR21","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1007\/s41095-020-0174-8","volume":"6","author":"YP Xiao","year":"2020","unstructured":"Xiao YP, Lai YK, Zhang FL, Li C, Gao L. A survey on deep geometry learning: from a representation perspective. Comput Vis Media. 2020;6(2):113\u201333.","journal-title":"Comput Vis Media"},{"issue":"9","key":"735_CR22","doi-asserted-by":"publisher","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"PF Felzenszwalb","year":"2009","unstructured":"Felzenszwalb PF, Girshick RB, McAllester D, Ramanan D. Object detection with discriminatively trained part-based models. IEEE Trans Pattern Anal Mach Intell. 2009;32(9):1627\u201345.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"9","key":"735_CR23","doi-asserted-by":"publisher","first-page":"902","DOI":"10.1007\/s11263-018-1073-7","volume":"126","author":"M M\u00fcller","year":"2018","unstructured":"M\u00fcller M, Casser V, Lahoud J, Smith N, Ghanem B. Sim4cv: a photo-realistic simulator for computer vision applications. Int J Comput Vis. 2018;126(9):902\u201319.","journal-title":"Int J Comput Vis"},{"issue":"3","key":"735_CR24","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1109\/81.222799","volume":"40","author":"T Roska","year":"1993","unstructured":"Roska T, Hamori J, Labos E, Lotz K, Orzo L, Takacs J, et al. The use of CNN models in the subcortical visual pathway. IEEE Trans Circ Syst I. 1993;40(3):182\u201395.","journal-title":"IEEE Trans Circ Syst I"},{"key":"735_CR25","doi-asserted-by":"crossref","unstructured":"Albawi S, Mohammed TA, Al-Zawi S. Understanding of a convolutional neural network. In: 2017 International Conference on Engineering and Technology (ICET). IEEE; 2017. pp. 1\u20136.","DOI":"10.1109\/ICEngTechnol.2017.8308186"},{"key":"735_CR26","unstructured":"Zaniolo L, Marques O. On the use of variable stride in convolutional neural networks. Multimedia Tools Appl. 2020;1\u201318."},{"key":"735_CR27","doi-asserted-by":"crossref","unstructured":"Murray N, Perronnin F. Generalized max pooling. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2014. pp. 2473\u201380.","DOI":"10.1109\/CVPR.2014.317"},{"key":"735_CR28","doi-asserted-by":"crossref","unstructured":"Kuan K, Manek G, Lin J, Fang Y, Chandrasekhar V. Region average pooling for context-aware object detection. In: 2017 IEEE International Conference on Image Processing (ICIP). IEEE; 2017. pp. 1347\u201351.","DOI":"10.1109\/ICIP.2017.8296501"},{"issue":"8","key":"735_CR29","doi-asserted-by":"publisher","first-page":"5455","DOI":"10.1007\/s10462-020-09825-6","volume":"53","author":"A Khan","year":"2020","unstructured":"Khan A, Sohail A, Zahoora U, Qureshi AS. A survey of the recent architectures of deep convolutional neural networks. Artif Intell Rev. 2020;53(8):5455\u2013516.","journal-title":"Artif Intell Rev"},{"issue":"7553","key":"735_CR30","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G. Deep learning. Nature. 2015;521(7553):436\u201344.","journal-title":"Nature"},{"key":"735_CR31","first-page":"4898","volume":"29","author":"W Luo","year":"2016","unstructured":"Luo W, Li Y, Urtasun R, Zemel R. Understanding the effective receptive field in deep convolutional neural networks. Adv Neural Inf Process Syst. 2016;29:4898\u2013906.","journal-title":"Adv Neural Inf Process Syst"},{"issue":"11","key":"735_CR32","doi-asserted-by":"publisher","first-page":"e21","DOI":"10.23915\/distill.00021","volume":"4","author":"A Araujo","year":"2019","unstructured":"Araujo A, Norris W, Sim J. Computing receptive fields of convolutional neural networks. Distill. 2019;4(11):e21.","journal-title":"Distill."},{"issue":"10","key":"735_CR33","doi-asserted-by":"publisher","first-page":"27","DOI":"10.2352\/ISSN.2470-1173.2017.10.IMAWM-163","volume":"2017","author":"DM Montserrat","year":"2017","unstructured":"Montserrat DM, Lin Q, Allebach J, Delp EJ. Training object detection and recognition CNN models using data augmentation. Electron Imaging. 2017;2017(10):27\u201336.","journal-title":"Electron Imaging"},{"key":"735_CR34","unstructured":"Savalle PA, Tsogkas S, Papandreou G, Kokkinos I. Deformable part models with cnn features. In: Deformable Part Models with CNN Features. European Conference on Computer Vision, Parts and Attributes Workshop, Sep 6, 2014, Zurich, Switzerland (hal-01109290)."},{"key":"735_CR35","doi-asserted-by":"crossref","unstructured":"Tang W, Yu P, Zhou J, Wu Y. Towards a unified compositional model for visual pattern modeling. In: Proceedings of the IEEE International Conference on Computer Vision; 2017. pp. 2784\u201393.","DOI":"10.1109\/ICCV.2017.303"},{"key":"735_CR36","doi-asserted-by":"crossref","unstructured":"Kortylewski A, He J, Liu Q, Yuille AL. Compositional convolutional neural networks: a deep architecture with innate robustness to partial occlusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition; 2020. pp. 8940\u201349.","DOI":"10.1109\/CVPR42600.2020.00896"},{"key":"735_CR37","doi-asserted-by":"crossref","unstructured":"Jack D, Maire F, Shirazi S, Eriksson A. IGE- Net: Inverse graphics energy networks for human pose estimation and single-view reconstruction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2019. pp. 7075\u201384.","DOI":"10.1109\/CVPR.2019.00724"},{"key":"735_CR38","doi-asserted-by":"crossref","unstructured":"Halder SS, Lalonde JF, Charette Rd. Physics-based rendering for improving robustness to rain. In: Proceedings of the IEEE International Conference on Computer Vision; 2019. pp. 10203\u201312.","DOI":"10.1109\/ICCV.2019.01030"},{"key":"735_CR39","unstructured":"Clevert DA, Unterthiner T, Hochreiter S. Fast and accurate deep network learning by exponential linear units (elus). arXiv preprint. 2015. arXiv:1511.07289."},{"key":"735_CR40","unstructured":"Maas AL, Hannun AY, Ng AY. Rectifier nonlinearities improve neural network acoustic models. In: Proc. icml. vol. 30; 2013. p. 3."},{"key":"735_CR41","unstructured":"Goodfellow I, Warde-Farley D, Mirza M, Courville A, Bengio Y. Maxout networks. In: International conference on machine learning. PMLR; 2013. pp. 1319\u201327."},{"issue":"9","key":"735_CR42","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2015","unstructured":"He K, Zhang X, Ren S, Sun J. Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Trans Pattern Anal Mach Intell. 2015;37(9):1904\u201316.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"735_CR43","doi-asserted-by":"crossref","unstructured":"Laptev D, Savinov N, Buhmann JM, Pollefeys M. TI-POOLING: transformation-invariant pooling for feature learning in convolutional neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2016. pp. 289\u201397.","DOI":"10.1109\/CVPR.2016.38"},{"key":"735_CR44","first-page":"364","volume-title":"Mixed pooling for convolutional neural networks. In: International conference on rough sets and knowledge technology","author":"D Yu","year":"2014","unstructured":"Yu D, Wang H, Chen P, Wei Z. Mixed pooling for convolutional neural networks. In: International conference on rough sets and knowledge technology. Springer; 2014. p. 364\u201375."},{"key":"735_CR45","unstructured":"Zeiler MD, Fergus R. Stochastic pooling for regularization of deep convolutional neural networks. arXiv preprint. 2013. arXiv:1301.3557."},{"key":"735_CR46","unstructured":"Wan L, Zeiler M, Zhang S, Le Cun Y, Fergus R. Regularization of neural networks using dropconnect. In: International conference on machine learning; 2013. pp. 1058\u201366."},{"key":"735_CR47","unstructured":"Larsson G, Maire M, Shakhnarovich G. Fractalnet: Ultra-deep neural networks without residuals. arXiv preprint. 2016. arXiv:1605.07648."},{"key":"735_CR48","unstructured":"Ioffe S, Szegedy C. Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv preprint. 2015. arXiv:1502.03167."},{"key":"735_CR49","doi-asserted-by":"crossref","unstructured":"Wei Z, Zhang J, Liu L, Zhu F, Shen F, Zhou Y, et al. Building detail-sensitive semantic segmentation networks with polynomial pooling. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2019. pp. 7115\u201323.","DOI":"10.1109\/CVPR.2019.00728"},{"key":"735_CR50","unstructured":"Estrach JB, Szlam A, LeCun Y. Signal recovery from pooling representations. In: International conference on machine learning. PMLR; 2014. pp. 307\u201315."},{"key":"735_CR51","doi-asserted-by":"crossref","unstructured":"Ouyang W, Luo P, Zeng X, Qiu S, Tian Y, Li H, et al. Deepid-net: multi-stage and deformable deep convolutional neural networks for object detection. arXiv preprint. 2014. arXiv:1409.3505.","DOI":"10.1109\/CVPR.2015.7298854"},{"key":"735_CR52","doi-asserted-by":"crossref","unstructured":"Girshick R. Fast R-CNN object detection with Caffe. Microsoft Res. 2015.","DOI":"10.1109\/ICCV.2015.169"},{"issue":"1","key":"735_CR53","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1186\/s40537-019-0197-0","volume":"6","author":"C Shorten","year":"2019","unstructured":"Shorten C, Khoshgoftaar TM. A survey on image data augmentation for deep learning. J Big Data. 2019;6(1):60.","journal-title":"J Big Data"},{"key":"735_CR54","doi-asserted-by":"crossref","unstructured":"Paulin M, Revaud J, Harchaoui Z, Perronnin F, Schmid C. Transformation pursuit for image classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2014. pp. 3646\u201353.","DOI":"10.1109\/CVPR.2014.466"},{"key":"735_CR55","unstructured":"Azulay A, Weiss Y. Why do deep convolutional networks generalize so poorly to small image transformations? arXiv preprint. 2018. arXiv:1805.12177."},{"key":"735_CR56","unstructured":"Engstrom L, Tsipras D, Schmidt L, Madry A. A rotation and a translation suffice: fooling CNNs with simple transformations. arXiv preprint. 2017;1(2):3. arXiv:1712.02779"},{"key":"735_CR57","unstructured":"Sabour S, Frosst N, Hinton GE. Dynamic routing between capsules. In: Advances in neural information processing systems; 2017. pp. 3856\u201366."},{"key":"735_CR58","doi-asserted-by":"crossref","unstructured":"Dai J, Qi H, Xiong Y, Li Y, Zhang G, Hu H, et al. Deformable convolutional networks. In: Proceedings of the IEEE international conference on computer vision; 2017. pp. 764\u201373.","DOI":"10.1109\/ICCV.2017.89"},{"key":"735_CR59","unstructured":"Jia X, De Brabandere B, Tuytelaars T, Gool LV. Dynamic filter networks. In: Advances in neural information processing systems; 2016. pp. 667\u201375."},{"issue":"3","key":"735_CR60","first-page":"33","volume":"24","author":"P Tarasiuk","year":"2016","unstructured":"Tarasiuk P, Pryczek M. Geometric transformations embedded into convolutional neural networks. J Appl Comput Sci. 2016;24(3):33\u201348.","journal-title":"J Appl Comput Sci"},{"key":"735_CR61","unstructured":"Cohen T, Welling M. Group equivariant convolutional networks. In: International conference on machine learning; 2016. pp. 2990\u20139."},{"key":"735_CR62","unstructured":"Dieleman S, De Fauw J, Kavukcuoglu K. Exploiting cyclic symmetry in convolutional neural networks. arXiv preprint. 2016. arXiv:1602.02660."},{"key":"735_CR63","doi-asserted-by":"crossref","unstructured":"Marcos D, Volpi M, Komodakis N, Tuia D. Rotation equivariant vector field networks. In: Proceedings of the IEEE International Conference on Computer Vision; 2017. pp. 5048\u201357.","DOI":"10.1109\/ICCV.2017.540"},{"key":"735_CR64","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1016\/j.patcog.2016.06.005","volume":"61","author":"N Van Noord","year":"2017","unstructured":"Van Noord N, Postma E. Learning scale-variant and scale-invariant features for deep image classification. Pattern Recogn. 2017;61:583\u201392.","journal-title":"Pattern Recogn"},{"key":"735_CR65","unstructured":"Ghosh R, Gupta AK. Scale steerable filters for locally scale-invariant convolutional neural networks. arXiv preprint. 2019. arXiv:1906.03861."},{"issue":"4","key":"735_CR66","first-page":"985","volume":"20","author":"J Li","year":"2017","unstructured":"Li J, Liang X, Shen S, Xu T, Feng J, Yan S. Scale- aware fast R-CNN for pedestrian detection. IEEE Trans Multimedia. 2017;20(4):985\u201396.","journal-title":"IEEE Trans Multimedia"},{"key":"735_CR67","doi-asserted-by":"crossref","unstructured":"Marcos D, Volpi M, Tuia D. Learning rotation invariant convolutional filters for texture classification. In: 2016 23rd International Conference on Pattern Recognition (ICPR). IEEE; 2016. pp. 2012\u20137.","DOI":"10.1109\/ICPR.2016.7899932"},{"key":"735_CR68","doi-asserted-by":"crossref","unstructured":"Zhou Y, Ye Q, Qiu Q, Jiao J. Oriented response networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2017. pp. 519\u201328.","DOI":"10.1109\/CVPR.2017.527"},{"key":"735_CR69","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J. Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2014. pp. 580\u20137.","DOI":"10.1109\/CVPR.2014.81"},{"key":"735_CR70","doi-asserted-by":"crossref","unstructured":"Lin TY, Doll\u00b4ar P, Girshick R, He K, Hariharan B, Belongie S. Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2017. pp. 2117\u201325.","DOI":"10.1109\/CVPR.2017.106"},{"key":"735_CR71","doi-asserted-by":"crossref","unstructured":"Jeon Y, Kim J. Active convolution: learning the shape of convolution for image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2017. pp. 4201\u20139.","DOI":"10.1109\/CVPR.2017.200"},{"key":"735_CR72","doi-asserted-by":"crossref","unstructured":"Chen LC, Papandreou G, Schroff F, Adam H. Rethinking atrous convolution for semantic image segmentation. arXiv preprint. 2017. arXiv:1706.05587.","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"735_CR73","unstructured":"Yu F, Koltun V. Multi-scale context aggregation by dilated convolutions. arXiv preprint. 2015. arXiv:1511.07122."},{"key":"735_CR74","first-page":"44","volume-title":"Transforming auto-encoders. In: International conference on artificial neural networks","author":"GE Hinton","year":"2011","unstructured":"Hinton GE, Krizhevsky A, Wang SD. Transforming auto-encoders. In: International conference on artificial neural networks. Springer; 2011. p. 44\u201351."},{"key":"735_CR75","unstructured":"Hinton GE, Sabour S, Frosst N. Matrix capsules with EM routing. In: International conference on learning representations; 2018."},{"key":"735_CR76","unstructured":"Zhao W, Ye J, Yang M, Lei Z, Zhang S, Zhao Z. Investigating capsule networks with dynamic routing for text classification. arXiv preprint. 2018. arXiv:1804.00538."},{"key":"735_CR77","unstructured":"Venkatraman S, Balasubramanian S, Sarma RR. Building deep, equivariant capsule networks. arXiv preprint. 2019. arXiv:1908.01300."},{"key":"735_CR78","doi-asserted-by":"crossref","unstructured":"Phaye SSR, Sikka A, Dhall A, Bathula D. Dense and diverse capsule networks: making the capsules learn better. arXiv preprint. 2018. arXiv:1805.04001.","DOI":"10.1007\/978-3-030-20873-8_37"},{"key":"735_CR79","doi-asserted-by":"crossref","unstructured":"Ramasinghe S, Athuraliya C, Khan SH. A context- aware capsule network for multi-label classification. In: Proceedings of the European Conference on Computer Vision (ECCV); 2018. pp. 0\u20130.","DOI":"10.1007\/978-3-030-11015-4_40"},{"key":"735_CR80","unstructured":"Zhang L, Edraki M, Qi GJ. Cappronet: Deep feature learning via orthogonal projections onto capsule subspaces. In: Advances in Neural Information Processing Systems; 2018. pp. 5814\u201323."},{"key":"735_CR81","doi-asserted-by":"crossref","unstructured":"Rodr\u0131guez-Sanchez A, Dick T. Capsule Networks for Attention Under Occlusion. In: International Conference on Artificial Neural Networks. Springer; 2019. pp. 523\u201334.","DOI":"10.1007\/978-3-030-30493-5_50"},{"key":"735_CR82","unstructured":"Prakash S, Gu G. Simultaneous localization and mapping with depth prediction using capsule networks for uavs. arXiv preprint. 2018. arXiv:1808.05336."},{"issue":"14","key":"735_CR83","doi-asserted-by":"publisher","first-page":"1694","DOI":"10.3390\/rs11141694","volume":"11","author":"ML Mekhalfi","year":"2019","unstructured":"Mekhalfi ML, Bejiga MB, Soresina D, Melgani F, Demir B. Capsule networks for object detection in UAV imagery. Remote Sensing. 2019;11(14):1694.","journal-title":"Remote Sensing"},{"key":"735_CR84","unstructured":"Kumar AD. Novel deep learning model for traffic sign detection using capsule networks. arXiv preprint. 2018. arXiv:1805.04424."},{"key":"735_CR85","unstructured":"LaLonde R, Bagci U. Capsules for object segmentation. arXiv preprint. 2018. arXiv:1804.04241."},{"key":"735_CR86","unstructured":"Duarte K, Rawat Y, Shah M. Videocapsulenet: a simplified network for action detection. In: Advances in Neural Information Processing Systems; 2018. pp. 7610\u20139."},{"key":"735_CR87","doi-asserted-by":"crossref","unstructured":"Zhao Y, Birdal T, Deng H, Tombari F. 3D point capsule networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2019. pp. 1009\u201318.","DOI":"10.1109\/CVPR.2019.00110"},{"key":"735_CR88","doi-asserted-by":"crossref","unstructured":"Ahmad A, Kakillioglu B, Velipasalar S. 3D capsule networks for object classification from 3D model data. In: 2018 52nd Asilomar Conference on Signals, Systems, and Computers. IEEE; 2018. pp. 2225\u20139.","DOI":"10.1109\/ACSSC.2018.8645256"},{"key":"735_CR89","unstructured":"Jaderberg M, Simonyan K, Zisserman A, et al. Spatial transformer networks. In: Advances in neural information processing systems; 2015. pp. 2017\u201325."},{"key":"735_CR90","doi-asserted-by":"crossref","unstructured":"Worrall DE, Garbin SJ, Turmukhambetov D, Brostow GJ. Harmonic networks: deep translation and rotation equivariance. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2017. pp. 5028\u201337.","DOI":"10.1109\/CVPR.2017.758"},{"key":"735_CR91","first-page":"818","volume-title":"Visualizing and understanding convolutional networks. In: European conference on computer vision","author":"MD Zeiler","year":"2014","unstructured":"Zeiler MD, Fergus R. Visualizing and understanding convolutional networks. In: European conference on computer vision. Springer; 2014. p. 818\u201333."},{"key":"735_CR92","unstructured":"Doersch C, Gupta A, Efros AA. Mid-level visual element discovery as discriminative mode seeking. In: Advances in neural information processing systems; 2013. pp. 494\u2013502."},{"key":"735_CR93","unstructured":"Parizi SN, Vedaldi A, Zisserman A, Felzenszwalb P. Automatic discovery and optimization of parts for image classification. arXiv preprint. 2014. arXiv:1412.6598."},{"issue":"3","key":"735_CR94","doi-asserted-by":"publisher","first-page":"344","DOI":"10.1007\/s11263-016-0945-y","volume":"121","author":"Y Li","year":"2017","unstructured":"Li Y, Liu L, Shen C, Van Den Hengel A. Mining mid-level visual patterns with deep CNN activations. Int J Comput Vision. 2017;121(3):344\u201364.","journal-title":"Int J Comput Vision"},{"key":"735_CR95","doi-asserted-by":"crossref","unstructured":"Yang L, Xie X, Li P, Zhang D, Zhang L. Part-based convolutional neural network for visual recognition. In: 2017 IEEE International Conference on Image Processing (ICIP). IEEE; 2017. pp. 1772\u20136.","DOI":"10.1109\/ICIP.2017.8296586"},{"key":"735_CR96","doi-asserted-by":"crossref","unstructured":"Kortylewski A, Liu Q, Wang H, Zhang Z, Yuille A. Combining compositional models and deep networks for robust object classification under occlusion. In: The IEEE Winter Conference on Applications of Computer Vision; 2020. pp. 1333\u201341.","DOI":"10.1109\/WACV45572.2020.9093560"},{"key":"735_CR97","doi-asserted-by":"publisher","unstructured":"Sun Y, Zheng L, Li Y, Yang Y, Tian Q, Wang S. Learning part-based convolutional features for person re-identification. IEEE Trans Pattern Anal Mach Intell. 2019;43(3):902\u201317. https:\/\/doi.org\/10.1109\/TPAMI.2019.2938523.","DOI":"10.1109\/TPAMI.2019.2938523"},{"key":"735_CR98","doi-asserted-by":"crossref","unstructured":"Hsieh PJ, Lin YL, Chen YH, Hsu W. Egocentric activity recognition by leveraging multiple mid- level representations. In: 2016 IEEE International Conference on Multimedia and Expo (ICME). IEEE; 2016. pp. 1\u20136.","DOI":"10.1109\/ICME.2016.7552937"},{"key":"735_CR99","doi-asserted-by":"crossref","unstructured":"Tang W, Yu P, Wu Y. Deeply learned compositional models for human pose estimation. In: Proceedings of the European Conference on Computer Vision (ECCV); 2018. pp. 190\u2013206.","DOI":"10.1007\/978-3-030-01219-9_12"},{"key":"735_CR100","doi-asserted-by":"crossref","unstructured":"Zhang Z, Xie C, Wang J, Xie L, Yuille AL. Deepvoting: a robust and explainable deep network for semantic part detection under partial occlusion. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2018. pp. 1372\u201380.","DOI":"10.1109\/CVPR.2018.00149"},{"issue":"4","key":"735_CR101","doi-asserted-by":"publisher","first-page":"627","DOI":"10.1109\/TPAMI.2016.2578328","volume":"39","author":"B Hariharan","year":"2016","unstructured":"Hariharan B, Arbelaez P, Girshick R, Malik J. Object instance segmentation and fine-grained localization using hypercolumns. IEEE Trans Pattern Anal Mach Intell. 2016;39(4):627\u201339.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"735_CR102","unstructured":"Johnson J. Deep, skinny neural networks are not universal approximators. arXiv preprint. 2018. arXiv:1810.00393."},{"key":"735_CR103","unstructured":"Marcus G. Deep learning: a critical appraisal. arXiv preprint. 2018. arXiv:1801.00631."},{"key":"735_CR104","doi-asserted-by":"crossref","unstructured":"Shen X, Tian X, He A, Sun S, Tao D. Transform- invariant convolutional neural networks for image classification and search. In: Proceedings of the 24th ACM international conference on Multimedia; 2016. pp. 1345\u201354.","DOI":"10.1145\/2964284.2964316"},{"key":"735_CR105","unstructured":"Shu C, Chen X, Xie Q, Han H. Hierarchical Spatial Transformer Network. arXiv preprint. 2018. arXiv:1801.09467."},{"key":"735_CR106","doi-asserted-by":"crossref","unstructured":"Wang X, Shrivastava A, Gupta A. A-fast-rcnn: Hard positive generation via adversary for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2017. pp. 2606\u201315.","DOI":"10.1109\/CVPR.2017.324"},{"key":"735_CR107","doi-asserted-by":"crossref","unstructured":"Girdhar R, Carreira J, Doersch C, Zisserman A. Video action transformer network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2019. pp. 244\u201353.","DOI":"10.1109\/CVPR.2019.00033"},{"key":"735_CR108","unstructured":"Yan X, Yang J, Yumer E, Guo Y, Lee H. Perspective transformer nets: Learning single-view 3d object reconstruction without 3d supervision. In: Proceedings of the 30th International Conference on Neural Information Processing Systems; 2016. pp. 1704\u201312."},{"key":"735_CR109","doi-asserted-by":"crossref","unstructured":"Bhagavatula C, Zhu C, Luu K, Savvides M. Faster than real-time facial alignment: a 3D spatial transformer network approach in unconstrained poses. In: Proceedings of the IEEE International Conference on Computer Vision; 2017. pp. 3980\u201389.","DOI":"10.1109\/ICCV.2017.429"},{"key":"735_CR110","doi-asserted-by":"crossref","unstructured":"Lin CH, Lucey S. Inverse compositional spatial transformer networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2017. pp. 2568\u201376.","DOI":"10.1109\/CVPR.2017.242"},{"issue":"12","key":"735_CR111","doi-asserted-by":"publisher","first-page":"2496","DOI":"10.1109\/TPAMI.2016.2646685","volume":"39","author":"O Freifeld","year":"2017","unstructured":"Freifeld O, Hauberg S, Batmanghelich K, Fisher JW. Transformations based on continuous piecewise-affine velocity fields. IEEE Trans Pattern Anal Mach Intell. 2017;39(12):2496\u2013509.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"3","key":"735_CR112","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/s41095-018-0112-1","volume":"4","author":"Z Wei","year":"2018","unstructured":"Wei Z, Sun Y, Lin J, Liu S. Learning adaptive receptive fields for deep image parsing networks. Comput Vis Media. 2018;4(3):231\u201344.","journal-title":"Comput Vis Media"},{"key":"735_CR113","doi-asserted-by":"crossref","unstructured":"Jing Y, Liu Y, Yang Y, Feng Z, Yu Y, Tao D, et al. Stroke controllable fast style transfer with adaptive receptive fields. In: Proceedings of the European Conference on Computer Vision (ECCV); 2018. pp. 238\u201354.","DOI":"10.1007\/978-3-030-01261-8_15"},{"key":"735_CR114","doi-asserted-by":"crossref","unstructured":"Ciresan D, Meier U, Schmidhuber J. Multi-column deep neural networks for image classification. In: 2012 IEEE conference on computer vision and pattern recognition. IEEE; 2012. pp. 3642\u20139.","DOI":"10.1109\/CVPR.2012.6248110"},{"key":"735_CR115","unstructured":"Simonyan K, Zisserman A. Two-stream convolutional networks for action recognition in videos. In: Advances in neural information processing systems; 2014. pp. 568\u201376."},{"key":"735_CR116","doi-asserted-by":"crossref","unstructured":"Ciresan D, Meier U. Multi-column deep neural networks for offline handwritten Chinese character classification. In: 2015 international joint conference on neural networks (IJCNN). IEEE; 2015. pp. 1\u20136.","DOI":"10.1109\/IJCNN.2015.7280516"},{"issue":"10","key":"735_CR117","doi-asserted-by":"publisher","first-page":"1396","DOI":"10.1049\/iet-its.2018.5171","volume":"12","author":"S Natarajan","year":"2018","unstructured":"Natarajan S, Annamraju AK, Baradkar CS. Traffic sign recognition using weighted multi-convolutional neural network. IET Intel Transport Syst. 2018;12(10):1396\u2013405.","journal-title":"IET Intel Transport Syst"},{"key":"735_CR118","first-page":"593","volume-title":"Multi-column spatial transformer convolution neural network for traffic sign recognition. In: International Symposium on Neural Networks","author":"J Zhang","year":"2018","unstructured":"Zhang J, Duan S, Wang L, Zou X. Multi-column spatial transformer convolution neural network for traffic sign recognition. In: International Symposium on Neural Networks. Springer; 2018. p. 593\u2013600."},{"key":"735_CR119","doi-asserted-by":"publisher","first-page":"73357","DOI":"10.1109\/ACCESS.2018.2850965","volume":"6","author":"C Fan","year":"2018","unstructured":"Fan C, Li Y, Wang G, Li Y. Learning transformation- invariant representations for image recognition with drop transformation networks. IEEE Access. 2018;6:73357\u201369.","journal-title":"IEEE Access"},{"issue":"22","key":"735_CR120","doi-asserted-by":"publisher","first-page":"29407","DOI":"10.1007\/s11042-018-5691-4","volume":"77","author":"Y Liu","year":"2018","unstructured":"Liu Y, Guo Y, Georgiou T, Lew MS. Fusion that matters: convolutional fusion networks for visual recognition. Multimedia Tools Appl. 2018;77(22):29407\u201334.","journal-title":"Multimedia Tools Appl"},{"key":"735_CR121","doi-asserted-by":"crossref","unstructured":"Lu X, Lin Z, Shen X, Mech R, Wang JZ. Deep multi- patch aggregation network for image style, aesthetics, and quality estimation. In: Proceedings of the IEEE International Conference on Computer Vision; 2015. pp. 990\u20138.","DOI":"10.1109\/ICCV.2015.119"},{"issue":"5","key":"735_CR122","doi-asserted-by":"publisher","first-page":"597","DOI":"10.1007\/s12559-017-9472-6","volume":"9","author":"G Wen","year":"2017","unstructured":"Wen G, Hou Z, Li H, Li D, Jiang L, Xun E. Ensemble of deep neural networks with probability-based fusion for facial expression recognition. Cogn Comput. 2017;9(5):597\u2013610.","journal-title":"Cogn Comput"},{"key":"735_CR123","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1016\/j.inffus.2020.04.002","volume":"62","author":"S Tabik","year":"2020","unstructured":"Tabik S, Alvear-Sandoval RF, Ruiz MM, Sancho-G\u00f3mez JL, Figueiras-Vidal AR, Herrera F. MNIST- NET10: a heterogeneous deep networks fusion based on the degree of certainty to reach 0.1% error rate. Ensembles overview and proposal. Inf Fus. 2020;62:73\u201380.","journal-title":"Inf Fus"},{"key":"735_CR124","doi-asserted-by":"crossref","unstructured":"Hong X, Xiong P, Ji R, Fan H. Deep fusion network for image completion. In: Proceedings of the 27th ACM International Conference on Multimedia; 2019. pp. 2033\u201342.","DOI":"10.1145\/3343031.3351002"},{"key":"735_CR125","doi-asserted-by":"crossref","unstructured":"Gallo I, Calefati A, Nawaz S. Multimodal classification fusion in real-world scenarios. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR). vol. 5. IEEE; 2017. pp. 36\u201341.","DOI":"10.1109\/ICDAR.2017.326"},{"key":"735_CR126","unstructured":"Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition. arXiv preprint. 2014. arXiv:1409.1556."},{"key":"735_CR127","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, et al. Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2015. pp. 1\u20139.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"735_CR128","unstructured":"Xu Y, Xiao T, Zhang J, Yang K, Zhang Z. Scale- invariant convolutional neural networks. arXiv preprint. 2014. arXiv:1411.6369."},{"key":"735_CR129","unstructured":"Liao Z, Carneiro G. Competitive multi-scale convolution. arXiv preprint. 2015. arXiv:1511.05635."},{"issue":"3","key":"735_CR130","doi-asserted-by":"publisher","first-page":"789","DOI":"10.3390\/s18030789","volume":"18","author":"X Du","year":"2018","unstructured":"Du X, Qu X, He Y, Guo D. Single image super- resolution based on multi-scale competitive convolutional neural network. Sensors. 2018;18(3):789.","journal-title":"Sensors"},{"key":"735_CR131","doi-asserted-by":"crossref","unstructured":"Chen X, Bin Y, Sang N, Gao C. Scale pyramid network for crowd counting. In: 2019 IEEE Winter Conference on Applications of Computer Vision (WACV). IEEE; 2019. pp. 1941\u201350.","DOI":"10.1109\/WACV.2019.00211"},{"key":"735_CR132","unstructured":"Szegedy C, Toshev A, Erhan D. Deep neural networks for object detection. In: Advances in neural information processing systems; 2013. pp. 2553\u201361."},{"key":"735_CR133","unstructured":"Iandola F, Moskewicz M, Karayev S, Girshick R, Darrell T, Keutzer K. Densenet: implementing efficient convnet descriptor pyramids. arXiv preprint. 2014. arXiv:1404.1869."},{"key":"735_CR134","unstructured":"Sermanet P, Eigen D, Zhang X, Mathieu M, Fergus R, LeCun Y. Overfeat: Integrated recognition, localization and detection using convolutional networks. arXiv preprint. 2013. arXiv:1312.6229."},{"key":"735_CR135","unstructured":"Wu R, Yan S, Shan Y, Dang Q, Sun G. Deep image: scaling up image recognition. arXiv preprint. 2015;7(8). arXiv:1501.02876."},{"key":"735_CR136","doi-asserted-by":"crossref","unstructured":"Kong T, Yao A, Chen Y, Sun F. Hypernet: Towards accurate region proposal generation and joint object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2016. pp. 845\u201353.","DOI":"10.1109\/CVPR.2016.98"},{"key":"735_CR137","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T. Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2015. pp. 3431\u201340.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"735_CR138","doi-asserted-by":"crossref","unstructured":"Bell S, Lawrence Zitnick C, Bala K, Girshick R. Inside-outside net: Detecting objects in context with skip pooling and recurrent neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2016. pp. 2874\u201383.","DOI":"10.1109\/CVPR.2016.314"},{"key":"735_CR139","first-page":"354","volume-title":"A unified multi-scale deep convolutional neural network for fast object detection. In: European conference on computer vision","author":"Z Cai","year":"2016","unstructured":"Cai Z, Fan Q, Feris RS, Vasconcelos N. A unified multi-scale deep convolutional neural network for fast object detection. In: European conference on computer vision. Springer; 2016. p. 354\u201370."},{"key":"735_CR140","doi-asserted-by":"crossref","unstructured":"Li Y, Chen Y, Wang N, Zhang Z. Scale-aware trident networks for object detection. In: Proceedings of the IEEE international conference on computer vision; 2019. pp. 6054\u201363.","DOI":"10.1109\/ICCV.2019.00615"},{"key":"735_CR141","doi-asserted-by":"crossref","unstructured":"Zhang Y, Zhou D, Chen S, Gao S, Ma Y. Single-image crowd counting via multi-column convolutional neural network. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2016. pp. 589\u201397.","DOI":"10.1109\/CVPR.2016.70"},{"key":"735_CR142","doi-asserted-by":"crossref","unstructured":"Cui J, Chen P, Li R, Liu S, Shen X, Jia J. Fast and practical neural architecture search. In: Proceedings of the IEEE International Conference on Computer Vision; 2019. pp. 6509\u201318.","DOI":"10.1109\/ICCV.2019.00661"},{"key":"735_CR143","unstructured":"Cai H, Zhu L, Han S. Proxylessnas: Direct neural architecture search on target task and hardware. arXiv preprint. 2018. arXiv:1812.00332."},{"issue":"1","key":"735_CR144","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1109\/TIP.2018.2867198","volume":"28","author":"G Cheng","year":"2018","unstructured":"Cheng G, Han J, Zhou P, Xu D. Learning rotation- invariant and fisher discriminative convolutional neural networks for object detection. IEEE Trans Image Process. 2018;28(1):265\u201378.","journal-title":"IEEE Trans Image Process"},{"key":"735_CR145","unstructured":"Wu F, Hu P, Kong D. Flip-rotate-pooling convolution and split dropout on convolution neural networks for image classification. arXiv preprint. 2015. arXiv:1507.08754."},{"key":"735_CR146","doi-asserted-by":"crossref","unstructured":"Jiang R, Mei S. Polar coordinate convolutional neural network: from rotation-invariance to translation-invariance. In: 2019 IEEE International Conference on Image Processing (ICIP). IEEE; 2019. pp. 355\u201359.","DOI":"10.1109\/ICIP.2019.8802940"},{"issue":"2","key":"735_CR147","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/TMM.2018.2856121","volume":"21","author":"J Chen","year":"2018","unstructured":"Chen J, Luo Z, Zhang Z, Huang F, Ye Z, Takiguchi T, et al. Polar transformation on image features for orientation-invariant representations. IEEE Trans Multimedia. 2018;21(2):300\u201313.","journal-title":"IEEE Trans Multimedia"},{"key":"735_CR148","unstructured":"Kim J, Jung W, Kim H, Lee J. CyCNN: a rotation invariant CNN using polar mapping and cylindrical convolution layers. arXiv preprint. 2020. arXiv:2007.10588."},{"key":"735_CR149","unstructured":"Esteves C, Allen-Blanchette C, Zhou X, Daniilidis K. Polar transformer networks. arXiv preprint. 2017. arXiv:1709.01889."},{"key":"735_CR150","unstructured":"Henriques JF, Vedaldi A. Warped convolutions: efficient invariance to spatial transformations. In: International Conference on Machine Learning. PMLR; 2017. pp. 1461\u20139."},{"key":"735_CR151","doi-asserted-by":"crossref","unstructured":"Schmidt U, Roth S. Learning rotation-aware features: from invariant priors to equivariant descriptors. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition. IEEE; 2012. pp. 2050\u20137.","DOI":"10.1109\/CVPR.2012.6247909"},{"key":"735_CR152","doi-asserted-by":"crossref","unstructured":"Amorim M, Bortoloti F, Ciarelli PM, de Oliveira E, de Souza AF. Analysing rotation-invariance of a log-polar transformation in convolutional neural networks. In: 2018 International Joint Conference on Neural Networks (IJCNN). IEEE; 2018. pp. 1\u20136.","DOI":"10.1109\/IJCNN.2018.8489295"},{"key":"735_CR153","doi-asserted-by":"crossref","unstructured":"Remmelzwaal LA, Mishra AK, Ellis GF. Human eye inspired log-polar pre-processing for neural networks. In: 2020 International SAUPEC\/RobMech\/PRASA Conference. IEEE; 2020. pp. 1\u20136.","DOI":"10.1109\/SAUPEC\/RobMech\/PRASA48453.2020.9041103"},{"issue":"9","key":"735_CR154","doi-asserted-by":"publisher","first-page":"891","DOI":"10.1109\/34.93808","volume":"13","author":"WT Freeman","year":"1991","unstructured":"Freeman WT, Adelson EH, et al. The design and use of steerable filters. IEEE Trans Pattern Anal Mach Intell. 1991;13(9):891\u2013906.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"735_CR155","unstructured":"Cohen TS, Welling M. Steerable CNNs. arXiv preprint. 2016. arXiv:1612.08498."},{"key":"735_CR156","doi-asserted-by":"crossref","unstructured":"Jacobsen JH, De Brabandere B, Smeulders AW. Dynamic steerable blocks in deep residual networks. arXiv preprint. 2017. arXiv:1706.00598.","DOI":"10.5244\/C.31.145"},{"key":"735_CR157","doi-asserted-by":"crossref","unstructured":"Weiler M, Hamprecht FA, Storath M. Learning steerable filters for rotation equivariant CNNs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2018. pp. 849\u201358.","DOI":"10.1109\/CVPR.2018.00095"},{"issue":"9","key":"735_CR158","doi-asserted-by":"publisher","first-page":"4357","DOI":"10.1109\/TIP.2018.2835143","volume":"27","author":"S Luan","year":"2018","unstructured":"Luan S, Chen C, Zhang B, Han J, Liu J. Gabor convolutional networks. IEEE Trans Image Process. 2018;27(9):4357\u201366.","journal-title":"IEEE Trans Image Process"},{"key":"735_CR159","doi-asserted-by":"crossref","unstructured":"Su YC, Grauman K. Making 360 video watchable in 2d: learning videography for click free viewing. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). IEEE; 2017. pp. 1368\u201376.","DOI":"10.1109\/CVPR.2017.150"},{"key":"735_CR160","first-page":"26","volume":"69","author":"R Monroy","year":"2018","unstructured":"Monroy R, Lutz S, Chalasani T, Smolic A. Salnet360: saliency maps for omni-directional images with CNN. Signal Process. 2018;69:26\u201334.","journal-title":"Signal Process"},{"key":"735_CR161","unstructured":"Khasanova R, Frossard P. Graph-based isometry invariant representation learning. arXiv preprint. 2017. arXiv:1703.00356."},{"key":"735_CR162","doi-asserted-by":"crossref","unstructured":"Khasanova R, Frossard P. Graph-based classification of omnidirectional images. In: Proceedings of the IEEE International Conference on Computer Vision Workshops; 2017. pp. 869\u201378.","DOI":"10.1109\/ICCVW.2017.106"},{"key":"735_CR163","unstructured":"Cohen TS, Geiger M, K\u00f6hler J, Welling M. Spherical CNNs. arXiv preprint. 2018. arXiv:1801.10130."},{"key":"735_CR164","doi-asserted-by":"crossref","unstructured":"Zhao Q, Zhu C, Dai F, Ma Y, Jin G, Zhang Y. Distortion-aware CNNs for Spherical Images. In: IJCAI; 2018. pp. 1198\u2013204.","DOI":"10.24963\/ijcai.2018\/167"},{"key":"735_CR165","doi-asserted-by":"crossref","unstructured":"Zhang Z, Xu Y, Yu J, Gao S. Saliency detection in 360 videos. In: Proceedings of the European Conference on Computer Vision (ECCV); 2018. pp. 488\u2013503.","DOI":"10.1007\/978-3-030-01234-2_30"},{"key":"735_CR166","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.ascom.2019.03.004","volume":"27","author":"N Perraudin","year":"2019","unstructured":"Perraudin N, Defferrard M, Kacprzak T, Sgier R. DeepSphere: efficient spherical convolutional neural network with HEALPix sampling for cosmological applications. Astronomy Comput. 2019;27:130\u201346.","journal-title":"Astronomy Comput"},{"key":"735_CR167","unstructured":"Boomsma W, Frellsen J. Spherical convolutions and their application in molecular modelling. In: Advances in Neural Information Processing Systems; 2017. pp. 3433\u201343."},{"key":"735_CR168","doi-asserted-by":"crossref","unstructured":"Coors B, Paul Condurache A, Geiger A. Spherenet: learning spherical representations for detection and classification in omnidirectional images. In: Proceedings of the European Conference on Computer Vision (ECCV); 2018. pp. 518\u201333.","DOI":"10.1007\/978-3-030-01240-3_32"},{"key":"735_CR169","unstructured":"Su YC, Grauman K. Learning spherical convolution for fast features from 360 imagery. In: Advances in Neural Information Processing Systems; 2017. pp. 529\u201339."},{"key":"735_CR170","doi-asserted-by":"crossref","unstructured":"Esteves C, Allen-Blanchette C, Makadia A, Daniilidis K. Learning so (3) equivariant representations with spherical CNNs. In: Proceedings of the European Conference on Computer Vision (ECCV); 2018. pp. 52\u201368.","DOI":"10.1007\/978-3-030-01261-8_4"},{"key":"735_CR171","doi-asserted-by":"crossref","unstructured":"Su YC, Grauman K. Kernel transformer networks for compact spherical convolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2019. pp. 9442\u201351.","DOI":"10.1109\/CVPR.2019.00967"},{"key":"735_CR172","volume-title":"Augmented reality: principles and practice","author":"D Schmalstieg","year":"2016","unstructured":"Schmalstieg D, Hollerer T. Augmented reality: principles and practice. Addison-Wesley Professional; 2016."},{"key":"735_CR173","first-page":"301027","volume":"35","author":"M Hirabayashi","year":"2020","unstructured":"Hirabayashi M, Kurosawa K, Yokota R, Imoto D, Hawai Y, Akiba N, et al. Flying object detection system using an omnidirectional camera. Forensic Sci Int. 2020;35:301027.","journal-title":"Forensic Sci Int"},{"key":"735_CR174","unstructured":"Cohen TS, Geiger M, Weiler M. A general theory of equivariant cnns on homogeneous spaces. In: Advances in Neural Information Processing Systems; 2019. pp. 9145\u201356."},{"key":"735_CR175","unstructured":"Weiler M, Cesa G. General e (2)-equivariant steerable CNNs. In: Advances in Neural Information Processing Systems; 2019. pp. 14334\u201345."},{"key":"735_CR176","unstructured":"Kondor R, Trivedi S. On the generalization of equivariance and convolution in neural networks to the action of compact groups. arXiv preprint. 2018. arXiv:1802.03690."},{"key":"735_CR177","doi-asserted-by":"publisher","DOI":"10.1201\/b19172","volume-title":"A course in abstract harmonic analysis","author":"GB Folland","year":"2016","unstructured":"Folland GB. A course in abstract harmonic analysis, vol. 29. CRC Press; 2016."},{"key":"735_CR178","unstructured":"Tai KS, Bailis P, Valiant G. Equivariant transformer networks. arXiv preprint. 2019. arXiv:1901.11399."},{"key":"735_CR179","unstructured":"Lenssen JE, Fey M, Libuschewski P. Group equivariant capsule networks. In: Advances in Neural Information Processing Systems; 2018. pp. 8844\u201353."},{"key":"735_CR180","unstructured":"Romero DW, Bekkers EJ, Tomczak JM, Hoogendoorn M. Attentive group equivariant convolutional networks. arXiv preprint. 2020. arXiv:2002.03830."},{"key":"735_CR181","unstructured":"Worrall D, Welling M. Deep scale-spaces: equivariance over scale. In: Advances in Neural Information Processing Systems; 2019. pp. 7366\u201378."},{"key":"735_CR182","unstructured":"Marcos D, Kellenberger B, Lobry S, Tuia D. Scale equivariance in CNNs with vector fields. arXiv preprint. 2018. arXiv:1807.11783."},{"key":"735_CR183","unstructured":"Sosnovik I, Szmaja M, Smeulders A. Scale-equivariant steerable networks. arXiv preprint. 2019. arXiv:1910.11093."},{"key":"735_CR184","unstructured":"Romero DW, Bekkers EJ, Tomczak JM, Hoogendoorn M. Wavelet networks: scale equivariant learning from raw waveforms. arXiv preprint. 2020. arXiv:2006.05259."},{"key":"735_CR185","unstructured":"Cheng X, Qiu Q, Calderbank R, Sapiro G. RotDCF: decomposition of convolutional filters for rotation-equivariant deep networks. arXiv preprint. 2018. arXiv:1805.06846."},{"issue":"2","key":"735_CR186","doi-asserted-by":"publisher","first-page":"1441","DOI":"10.1093\/mnras\/stv632","volume":"450","author":"S Dieleman","year":"2015","unstructured":"Dieleman S, Willett KW, Dambre J. Rotation- invariant convolutional neural networks for galaxy morphology prediction. Mon Not R Astron Soc. 2015;450(2):1441\u201359.","journal-title":"Mon Not R Astron Soc"},{"key":"735_CR187","unstructured":"Cohen TS, weiler M, Kicanaoglu B, Welling M. Gauge equivariant convolutional networks and the icosahedral CNN. In: Proceedings of the 36th International Conference on Machine Learning, 2019:97:1321\u201330."},{"key":"735_CR188","doi-asserted-by":"crossref","unstructured":"Worrall D, Brostow G. Cubenet: equivariance to 3D rotation and translation. In: Proceedings of the European Conference on Computer Vision (ECCV); 2018. pp. 567\u201384.","DOI":"10.1007\/978-3-030-01228-1_35"},{"key":"735_CR189","unstructured":"Cohen TS, Welling M. Transformation properties of learned visual representations. arXiv preprint. 2014. arXiv:1412.7659."},{"key":"735_CR190","unstructured":"Smets B, Portegies J, Bekkers E, Duits R. PDE-based group equivariant convolutional neural networks. arXiv preprint. 2020. arXiv:2001.09046."},{"key":"735_CR191","unstructured":"Romero DW, Hoogendoorn M. Co-attentive equivariant neural networks: Focusing equivariance on transformations co-occurring in data. arXiv preprint. 2019. arXiv:1911.07849."},{"key":"735_CR192","unstructured":"Romero DW, Cordonnier JB. Group equivariant stand-alone self-attention for vision. arXiv preprint. 2020. arXiv:2010.00977."},{"key":"735_CR193","unstructured":"Finzi M, Stanton S, Izmailov P, Wilson AG. Generalizing convolutional neural networks for equivariance to lie groups on arbitrary continuous data. arXiv preprint. 2020. arXiv:2002.12880."},{"issue":"8","key":"735_CR194","doi-asserted-by":"publisher","first-page":"1872","DOI":"10.1109\/TPAMI.2012.230","volume":"35","author":"J Bruna","year":"2013","unstructured":"Bruna J, Mallat S. Invariant scattering convolution networks. IEEE Trans Pattern Anal Mach Intell. 2013;35(8):1872\u201386.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"735_CR195","unstructured":"Bekkers EJ. B-spline CNNs on lie groups. arXiv preprint. 2019. arXiv:1909.12057."},{"key":"735_CR196","doi-asserted-by":"crossref","unstructured":"Fey M, Eric Lenssen J, Weichert F, Mu\u00a8ller H. Splinecnn: fast geometric deep learning with continuous b-spline kernels. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2018. pp. 869\u201377.","DOI":"10.1109\/CVPR.2018.00097"},{"issue":"3","key":"735_CR197","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1038\/35058500","volume":"2","author":"L Itti","year":"2001","unstructured":"Itti L, Koch C. Computational modelling of visual attention. Nat Rev Neurosci. 2001;2(3):194\u2013203.","journal-title":"Nat Rev Neurosci"},{"key":"735_CR198","unstructured":"Dey N, Chen A, Ghafurian S. Group equivariant generative adversarial networks. arXiv preprint. 2020. arXiv:2005.01683."},{"key":"735_CR199","doi-asserted-by":"crossref","unstructured":"Shen C, Wang X, Song J, Sun L, Song M. Amalgamating knowledge towards comprehensive classification. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol. 33; 2019. pp. 3068\u201375.","DOI":"10.1609\/aaai.v33i01.33013068"},{"key":"735_CR200","doi-asserted-by":"crossref","unstructured":"Carlucci FM, D\u2019Innocente A, Bucci S, Caputo B, Tommasi T. Domain generalization by solving jigsaw puzzles. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2019. pp. 2229\u201338.","DOI":"10.1109\/CVPR.2019.00233"},{"key":"735_CR201","unstructured":"Finn C, Abbeel P, Levine S. Model-agnostic meta- learning for fast adaptation of deep networks. arXiv preprint. 2017. arXiv:1703.03400."},{"key":"735_CR202","doi-asserted-by":"crossref","unstructured":"Jarvers C, Neumann H. Incorporating feedback in convolutional neural networks. In: Proceedings of the Cognitive Computational Neuroscience Conference; 2019. pp. 395\u20138.","DOI":"10.32470\/CCN.2019.1191-0"},{"key":"735_CR203","doi-asserted-by":"publisher","first-page":"94","DOI":"10.3389\/fncom.2016.00094","volume":"10","author":"AH Marblestone","year":"2016","unstructured":"Marblestone AH, Wayne G, Kording KP. Toward an integration of deep learning and neuroscience. Front Comput Neurosci. 2016;10:94.","journal-title":"Front Comput Neurosci"},{"key":"735_CR204","doi-asserted-by":"crossref","unstructured":"Hu T, Yang P, Zhang C, Yu G, Mu Y, Snoek CG. Attention-based multi-context guiding for few-shot semantic segmentation. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol. 33; 2019. pp 8441\u20138.","DOI":"10.1609\/aaai.v33i01.33018441"},{"key":"735_CR205","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-05318-5","volume-title":"Automated machine learning: methods, systems, challenges","author":"F Hutter","year":"2019","unstructured":"Hutter F, Kotthoff L, Vanschoren J. Automated machine learning: methods, systems, challenges. Springer Nature; 2019."},{"key":"735_CR206","unstructured":"He X, Zhao K, Chu X. AutoML: a survey of the state-of-the-art. arXiv preprint. 2019. arXiv:1908.00709."},{"key":"735_CR207","unstructured":"Zoph B, Le QV. Neural architecture search with reinforcement learning. arXiv preprint. 2016. arXiv:1611.01578."},{"key":"735_CR208","unstructured":"Peng J, Sun M, ZHANG ZX, Tan T, Yan J. Efficient neural architecture transformation search in channel- level for object detection. In: Advances in Neural Information Processing Systems; 2019. pp. 14313\u201322."},{"key":"735_CR209","doi-asserted-by":"crossref","unstructured":"Nekrasov V, Chen H, Shen C, Reid I. Fast neural architecture search of compact semantic segmentation models via auxiliary cells. In: Proceedings of the IEEE Conference on computer vision and pattern recognition; 2019. pp. 9126\u201335.","DOI":"10.1109\/CVPR.2019.00934"},{"key":"735_CR210","doi-asserted-by":"crossref","unstructured":"Zhang Y, Qiu Z, Liu J, Yao T, Liu D, Mei T. Customizable architecture search for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2019. pp. 11641\u201350.","DOI":"10.1109\/CVPR.2019.01191"},{"key":"735_CR211","doi-asserted-by":"crossref","unstructured":"Liu C, Chen LC, Schroff F, Adam H, Hua W, Yuille AL, et al. Auto-deeplab: Hierarchical neural architecture search for semantic image segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2019. pp. 82\u201392.","DOI":"10.1109\/CVPR.2019.00017"},{"key":"735_CR212","doi-asserted-by":"crossref","unstructured":"Elsken T, Staffler B, Metzen JH, Hutter F. Meta-learning of neural architectures for few-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition; 2020. pp. 12365\u201375.","DOI":"10.1109\/CVPR42600.2020.01238"},{"key":"735_CR213","unstructured":"Biedenkapp A, Bozkurt HF, Eimer T, Hutter F, Lindauer M. Dynamic algorithm configuration: foundation of a new meta-algorithmic framework. In: Proceedings of the Twenty-fourth European Conference on Artificial Intelligence (ECAI\u201920) (Jun 2020); 2020."},{"key":"735_CR214","unstructured":"Elsken T, Metzen JH, Hutter F. Simple and efficient architecture search for convolutional neural networks. arXiv preprint. 2017. arXiv:1711.04528."},{"key":"735_CR215","doi-asserted-by":"crossref","unstructured":"Veniat T, Denoyer L. Learning time\/memory-efficient deep architectures with budgeted super networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2018. pp. 3492\u2013500.","DOI":"10.1109\/CVPR.2018.00368"},{"key":"735_CR216","doi-asserted-by":"crossref","unstructured":"Jin H, Song Q, Hu X. Auto-keras: An efficient neural architecture search system. In: Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining; 2019. pp. 1946\u201356.","DOI":"10.1145\/3292500.3330648"},{"key":"735_CR217","first-page":"113","volume-title":"Auto-sklearn: efficient and robust automated machine learning. In: Automated machine learning","author":"M Feurer","year":"2019","unstructured":"Feurer M, Klein A, Eggensperger K, Springenberg JT, Blum M, Hutter F. Auto-sklearn: efficient and robust automated machine learning. In: Automated machine learning. Cham: Springer; 2019. p. 113\u201334."}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-021-00735-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-021-00735-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-021-00735-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T22:10:41Z","timestamp":1725228641000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-021-00735-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,16]]},"references-count":217,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,9]]}},"alternative-id":["735"],"URL":"https:\/\/doi.org\/10.1007\/s42979-021-00735-0","relation":{},"ISSN":["2662-995X","2661-8907"],"issn-type":[{"value":"2662-995X","type":"print"},{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,16]]},"assertion":[{"value":"22 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 June 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 June 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"There are no competing interests or conflict of interests associated with this work, and there has not been any support or external involvement of any sort in this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"Not applicable. No person\u2019s data have been used in this work. Consequently, consent is not needed for its publication.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to Participate"}},{"value":"Both authors agree to be fully responsible for all aspects of the work, including the content and all ethical and legal issues.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Final Comments"}}],"article-number":"340"}}