{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T03:03:47Z","timestamp":1772507027879,"version":"3.50.1"},"reference-count":58,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"Analytical center under the RF Government","award":["000000D730321P5Q0002"],"award-info":[{"award-number":["000000D730321P5Q0002"]}]},{"name":"Analytical center under the RF Government","award":["70-2021-00145 02.11.2021"],"award-info":[{"award-number":["70-2021-00145 02.11.2021"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2024.3418779","type":"journal-article","created":{"date-parts":[[2024,6,25]],"date-time":"2024-06-25T20:32:27Z","timestamp":1719347547000},"page":"97833-97850","source":"Crossref","is-referenced-by-count":3,"title":["Robust Representation Learning via Sparse Attention Mechanism for Similarity Models"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1392-6908","authenticated-orcid":false,"given":"Alina","family":"Ermilova","sequence":"first","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nikita","family":"Baramiia","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Valerii","family":"Kornilov","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergey","family":"Petrakov","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexey","family":"Zaytsev","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3082557"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.1016\/j.petrol.2022.110690","article-title":"Similarity learning for wells based on logging data","volume":"215","author":"Romanenkova","year":"2022","journal-title":"J. Petroleum Sci. Eng."},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref4","first-page":"1","article-title":"Input-cell attention reduces vanishing saliency of recurrent neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Ismail"},{"key":"ref5","first-page":"1","article-title":"Dont pay attention to the noise: Learning self-supervised representations of light curves with a denoising time series transformer","volume-title":"Proc. Workshop AI Earth Sci.","author":"Morvan"},{"key":"ref6","article-title":"Noisy text data: Achilles heel of popular transformer based NLP models","author":"Bagla","year":"2021","journal-title":"arXiv:2110.03353"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3530811"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17325"},{"key":"ref9","first-page":"1","article-title":"Rethinking attention with performers","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Choromanski"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3140693"},{"key":"ref11","first-page":"2422","article-title":"Mitigating transformer overconfidence via Lipschitz regularization","volume-title":"Proc. Uncertainty Artif. Intell.","author":"Ye"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26317"},{"key":"ref13","first-page":"1","article-title":"A time series is worth 64 words: Long-term forecasting with transformers","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Nie"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.2523\/IPTC-22067-MS"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.2118\/15295-PA"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.2118\/90471-MS"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.petrol.2021.108602"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2014.2317498"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.2118\/208109-MS"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.4286293"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2023.3277214"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.2118\/206537-MS"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.flowmeasinst.2021.102047"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.petrol.2021.108936"},{"key":"ref25","first-page":"318","article-title":"Learning internal representations by error propagation, in parallel distributed processing","volume":"1","author":"Rumelhart","year":"1986","journal-title":"Explor. Microstructure Cognition"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1406.1078"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref28","first-page":"88","article-title":"Acceptability judgements via examining the topology of attention maps","volume-title":"Proc. Findings Assoc. Comput. Linguistics","author":"Cherniavskii"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i14.17533"},{"key":"ref30","first-page":"1662","volume-title":"Statistical Visions in Time: A History of Time Series Analysis","author":"Klein","year":"1997"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20881"},{"key":"ref32","first-page":"17283","article-title":"Big bird: Transformers for longer sequences","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Zaheer"},{"key":"ref33","article-title":"Linformer: Self-attention with linear complexity","author":"Wang","year":"2020","journal-title":"arXiv:2006.04768"},{"key":"ref34","article-title":"Longformer: The long-document transformer","author":"Beltagy","year":"2020","journal-title":"arXiv:2004.05150"},{"key":"ref35","article-title":"Reformer: The efficient transformer","author":"Kitaev","year":"2020","journal-title":"arXiv:2001.04451"},{"key":"ref36","first-page":"1","article-title":"Long range arena: A benchmark for efficient transformers","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Tay"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210006"},{"key":"ref38","volume-title":"The New Zealand Petroleum & Minerals Online Exploration Database","year":"2015"},{"key":"ref39","volume-title":"Petlab: New Zealands National Rock, Mineral and Geoanalytical Database","author":"Science","year":"2004"},{"key":"ref40","volume-title":"Crimes in Boston","author":"Boston","year":"2018"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s10708-021-10485-4"},{"key":"ref42","doi-asserted-by":"crossref","DOI":"10.20944\/preprints202002.0108.v1","volume-title":"Machine learning algorithms for visualization and prediction modeling of Boston crime data","author":"Yin","year":"2020"},{"key":"ref43","article-title":"A classification approach to predict severity of crime on Boston city crime data","volume-title":"Data Science and SDGs: Challenges, Opportunities and Realities","author":"Tasnim","year":"2019"},{"key":"ref44","volume-title":"Using Copernicus Atmosphere Monitoring Service Information","year":"2019"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2991094"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1002\/qj.3803"},{"key":"ref47","volume-title":"Djia 30 Stock Time Series","author":"Li","year":"2018"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.5220\/0006922101420153"},{"key":"ref49","first-page":"4344","article-title":"Transformer dissection: An unified understanding for transformers attention via the lens of kernel","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process. 9th Int. Joint Conf. Natural Lang. Process. (EMNLP-IJCNLP)","author":"Tsai"},{"issue":"56","key":"ref50","first-page":"1929","article-title":"Dropout: A simple way to prevent neural networks from overfitting","volume":"15","author":"Srivastava","year":"2014","journal-title":"J. Mach. Learn. Res."},{"key":"ref51","first-page":"1","article-title":"Random feature attention","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Peng"},{"key":"ref52","first-page":"1","article-title":"Quadrature-based features for kernel approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Munkhoeva"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref54","first-page":"2931","article-title":"Is attention interpretable?","volume-title":"Proc. 57th Annu. Meeting Assoc. Comput. Linguistics","author":"Serrano"},{"key":"ref55","first-page":"1","article-title":"Attention is not explanation","volume-title":"Proc. North Amer. Chapter Assoc. Comput. Linguistics","author":"Jain"},{"issue":"11","key":"ref56","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"ref57","first-page":"1","article-title":"ScaleFace: Uncertainty-aware deep metric learning","volume-title":"Proc. IEEE 10th Int. Conf. Data Sci. Adv. Analytics (DSAA)","author":"Kail"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1002"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10380310\/10570432.pdf?arnumber=10570432","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,25]],"date-time":"2024-07-25T17:35:13Z","timestamp":1721928913000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10570432\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":58,"URL":"https:\/\/doi.org\/10.1109\/access.2024.3418779","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}