{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,29]],"date-time":"2025-08-29T10:01:33Z","timestamp":1756461693019,"version":"3.28.0"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,8,22]],"date-time":"2022-08-22T00:00:00Z","timestamp":1661126400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,8,22]],"date-time":"2022-08-22T00:00:00Z","timestamp":1661126400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,8,22]]},"DOI":"10.1109\/mlsp55214.2022.9943334","type":"proceedings-article","created":{"date-parts":[[2022,11,17]],"date-time":"2022-11-17T15:39:35Z","timestamp":1668699575000},"page":"01-06","source":"Crossref","is-referenced-by-count":1,"title":["SVIT: Hybrid Vision Transformer Models with Scattering Transform"],"prefix":"10.1109","author":[{"given":"Tianming","family":"Qiu","sequence":"first","affiliation":[{"name":"fortiss GmbH,Munich,Germany"}]},{"given":"Ming","family":"Gui","sequence":"additional","affiliation":[{"name":"Technical University of Munich,Munich,Germany"}]},{"given":"Cheng","family":"Yan","sequence":"additional","affiliation":[{"name":"Technical University of Munich,Munich,Germany"}]},{"given":"Ziqing","family":"Zhao","sequence":"additional","affiliation":[{"name":"Technical University of Munich,Munich,Germany"}]},{"given":"Hao","family":"Shen","sequence":"additional","affiliation":[{"name":"fortiss GmbH,Munich,Germany"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2855738"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00062"},{"key":"ref12","article-title":"Localvit: Bringing locality to vision transformers","author":"li","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref13","article-title":"Coatnet: Marrying convolution and attention for all data sizes","volume":"34","author":"dai","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"ref15","article-title":"Escaping the big data paradigm with compact transformers","author":"hassani","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1002\/cpa.21413"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-22482-4_59"},{"key":"ref19","first-page":"215","article-title":"An analysis of single-layer networks in unsupervised feature learning","author":"coates","year":"0","journal-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics JMLR Workshop and Conference Proceedings"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.89"},{"key":"ref3","article-title":"Imagenet classification with deep convolutional neural networks","volume":"25","author":"krizhevsky","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref6","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref5","article-title":"Multi-scale context ag-gregation by dilated convolutions","author":"yu","year":"0","journal-title":"International Con-ference on Learning Representations (ICLR)"},{"key":"ref8","article-title":"An image is worth 16x16 words: Trans-formers for image recognition at scale","author":"dosovitskiy","year":"0","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref7","article-title":"On the relationship between self-attention and convolutional layers","author":"cordonnier","year":"0","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/11744023_32"},{"key":"ref1","first-page":"1150","article-title":"Object recognition from local scaleinvariant features","volume":"2","author":"lowe","year":"0","journal-title":"Proceedings of the Seventh IEEE International Conference on Computer Vision"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.230"},{"key":"ref20","article-title":"Au-tomated flower classification over a large number of classes","author":"nilsback","year":"0","journal-title":"Indian Conference on Computer Vision Graphics and Image Processing"},{"key":"ref22","first-page":"2865","article-title":"Deep rototranslation scattering for object classification","author":"oyallon","year":"0","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2019.2918242"}],"event":{"name":"2022 IEEE 32nd International Workshop on Machine Learning for Signal Processing (MLSP)","start":{"date-parts":[[2022,8,22]]},"location":"Xi'an, China","end":{"date-parts":[[2022,8,25]]}},"container-title":["2022 IEEE 32nd International Workshop on Machine Learning for Signal Processing (MLSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9943282\/9943306\/09943334.pdf?arnumber=9943334","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T15:00:01Z","timestamp":1670857201000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9943334\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,22]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/mlsp55214.2022.9943334","relation":{},"subject":[],"published":{"date-parts":[[2022,8,22]]}}}