{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T20:55:10Z","timestamp":1774990510524,"version":"3.50.1"},"reference-count":94,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"7","license":[{"start":{"date-parts":[[2023,7,1]],"date-time":"2023-07-01T00:00:00Z","timestamp":1688169600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,7,1]],"date-time":"2023-07-01T00:00:00Z","timestamp":1688169600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,7,1]],"date-time":"2023-07-01T00:00:00Z","timestamp":1688169600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2020YFB2103402"],"award-info":[{"award-number":["2020YFB2103402"]}]},{"name":"Beijing Academy of Artificial Intelligence"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,7,1]]},"DOI":"10.1109\/tpami.2023.3236725","type":"journal-article","created":{"date-parts":[[2023,1,13]],"date-time":"2023-01-13T22:04:01Z","timestamp":1673647441000},"page":"8176-8192","source":"Crossref","is-referenced-by-count":10,"title":["Convolution-Enhanced Evolving Attention Networks"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7940-5216","authenticated-orcid":false,"given":"Yujing","family":"Wang","sequence":"first","affiliation":[{"name":"Key Laboratory of Machine Perception, MOE, School of Intelligence Science and Technology, Peking University, Beijing, China"}]},{"given":"Yaming","family":"Yang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Machine Perception, MOE, School of Intelligence Science and Technology, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4005-8181","authenticated-orcid":false,"given":"Zhuo","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Engineering Research Center of Microprocessor and System, Peking University, Beijing, China"}]},{"given":"Jiangang","family":"Bai","sequence":"additional","affiliation":[{"name":"Key Laboratory of Machine Perception, MOE, School of Intelligence Science and Technology, Peking University, Beijing, China"}]},{"given":"Mingliang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Machine Perception, MOE, School of Intelligence Science and Technology, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0550-8247","authenticated-orcid":false,"given":"Xiangtai","family":"Li","sequence":"additional","affiliation":[{"name":"Key Laboratory of Machine Perception, MOE, School of Intelligence Science and Technology, Peking University, Beijing, China"}]},{"given":"Jing","family":"Yu","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Ce","family":"Zhang","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich, Z&#x00FC;rich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7251-0988","authenticated-orcid":false,"given":"Gao","family":"Huang","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8735-2516","authenticated-orcid":false,"given":"Yunhai","family":"Tong","sequence":"additional","affiliation":[{"name":"Key Laboratory of Machine Perception, MOE, School of Intelligence Science and Technology, Peking University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref13","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"raffel","year":"2020","journal-title":"J Mach Learn Res"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_42"},{"key":"ref12","article-title":"Graph attention networks","author":"veli?kovi?","year":"2018","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_34"},{"key":"ref15","first-page":"68","article-title":"Stand-alone self-attention in vision models","author":"parmar","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref59","first-page":"213","article-title":"End-to-end object detection with transformers","author":"carion","year":"2020","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref14","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2014"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_31"},{"key":"ref53","article-title":"EATFormer: Improving vision transformer inspired by evolutionary algorithm","author":"zhang","year":"2022"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"ref11","first-page":"10 971","article-title":"Evolving attention with residual convolutions","author":"wang","year":"2021","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01828"},{"key":"ref10","article-title":"Lite transformer with long-short range attention","author":"wu","year":"2020","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00475"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467401"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5431"},{"key":"ref18","article-title":"A survey on visual transformer","author":"han","year":"2020"},{"key":"ref93","first-page":"4690","article-title":"Monarch: Expressive structured matrices for efficient and accurate training","author":"dao","year":"2022","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref92","first-page":"9438","article-title":"Sparse sinkhorn attention","author":"tay","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref50","first-page":"10 347","article-title":"Training data-efficient image transformers & distillation through attention","author":"touvron","year":"2021","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357895"},{"key":"ref91","first-page":"3340","article-title":"A regularized framework for sparse and structured neural attention","author":"niculae","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939778"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-021-0229-5"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00484"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.408"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01009"},{"key":"ref47","article-title":"LambdaNetworks: Modeling long-range interactions without attention","author":"bello","year":"2020","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00378"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.686"},{"key":"ref41","first-page":"213","article-title":"End-to-end object detection with transformers","author":"carion","year":"2020","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01132"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref43","first-page":"7354","article-title":"Self-attention generative adversarial networks","author":"zhang","year":"2019","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref87","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"van der maaten","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01214"},{"key":"ref8","first-page":"3543","article-title":"Attention is not explanation","author":"jain","year":"2019","journal-title":"Proc North Amer Chapter Assoc Comput Linguistics Hum Lang Technol"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1458"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00338"},{"key":"ref4","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref3","first-page":"5243","article-title":"Enhancing the locality and breaking the memory bottleneck of transformer on time series forecasting","author":"li","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref6","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"liu","year":"2019"},{"key":"ref5","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"2021","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.81"},{"key":"ref81","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3099369"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3302"},{"key":"ref83","first-page":"2","article-title":"Report on the 11th IWSLT evaluation campaign, IWSLT 2014","author":"cettolo","year":"2014","journal-title":"Proc Int Workshop Spoken Lang Transl"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/324"},{"key":"ref79","article-title":"The UEA multivariate time series classification archive, 2018","author":"bagnall","year":"2018"},{"key":"ref34","article-title":"Unsupervised representation learning for time series with temporal neighborhood coding","author":"tonekaboni","year":"2021","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref78","article-title":"Monash university, UEA, UCR time series extrinsic regression archive","author":"tan","year":"2020"},{"key":"ref37","first-page":"3","article-title":"CBAM: Convolutional block attention module","author":"woo","year":"2018","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20881"},{"key":"ref31","article-title":"Improving clinical predictions through unsupervised time series representation learning","author":"lyu","year":"2018"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/476"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.14778\/3342263.3342648"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11635"},{"key":"ref76","first-page":"8026","article-title":"PyTorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref2","first-page":"4055","article-title":"Image transformer","author":"parmar","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref1","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"Proc Conf North Amer Chapter Assoc Comput Linguistics Hum Lang Technol"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00326"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"ref71","first-page":"3844","article-title":"Convolutional neural networks on graphs with fast localized spectral filtering","author":"defferrard","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313562"},{"key":"ref73","article-title":"On the variance of the adaptive learning rate and beyond","author":"liu","year":"2020","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref72","article-title":"Semi-supervised classification with graph convolutional networks","author":"kipf","year":"2017","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-019-55320-6"},{"key":"ref68","first-page":"125","article-title":"WaveNet: A generative model for raw audio","author":"van den oord","year":"2016","journal-title":"Proc 9th ISCA Speech Synth Workshop"},{"key":"ref23","article-title":"LSTM-based encoder-decoder for multi-sensor anomaly detection","author":"malhotra","year":"2016"},{"key":"ref67","first-page":"5877","article-title":"The evolved transformer","author":"so","year":"2019","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref26","first-page":"4650","article-title":"Unsupervised scalable representation learning for multivariate time series","author":"franceschi","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref25","article-title":"Multi-scale context aggregation by dilated convolutions","author":"yu","year":"2016","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2010.5537907"},{"key":"ref20","article-title":"Synthesizer: Rethinking self-attention in transformer models","author":"tay","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref64","first-page":"12 437","article-title":"Poolingformer: Long document modeling with pooling attention","author":"zhang","year":"2021","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref63","article-title":"Reformer: The efficient transformer","author":"kitaev","year":"2020","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref22","first-page":"97","article-title":"Multidimensional time series anomaly detection: A GRU-based gaussian mixture variational autoencoder approach","author":"guo","year":"2018","journal-title":"Proc Asian Conf Mach Learn"},{"key":"ref66","first-page":"12 081","article-title":"Novel positional encodings to enable tree-based transformers","author":"shiv","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1002"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1032"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/366"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2017.7966039"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05815-0"},{"key":"ref60","first-page":"1877","article-title":"Language models are few-shot learners","author":"brown","year":"2020","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-2074"},{"key":"ref61","first-page":"13063","article-title":"Unified language model pre-training for natural language understanding and generation","author":"dong","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10144448\/10016752.pdf?arnumber=10016752","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,30]],"date-time":"2024-07-30T17:48:34Z","timestamp":1722361714000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10016752\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,1]]},"references-count":94,"journal-issue":{"issue":"7"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2023.3236725","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,7,1]]}}}