{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T03:50:18Z","timestamp":1768794618241,"version":"3.49.0"},"reference-count":69,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100002341","name":"Research Council of Finland","doi-asserted-by":"publisher","award":["286607"],"award-info":[{"award-number":["286607"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002341","name":"Research Council of Finland","doi-asserted-by":"publisher","award":["319323"],"award-info":[{"award-number":["319323"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002341","name":"Research Council of Finland","doi-asserted-by":"publisher","award":["336033"],"award-info":[{"award-number":["336033"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002341","name":"Research Council of Finland","doi-asserted-by":"publisher","award":["315896"],"award-info":[{"award-number":["315896"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"name":"BusinessFinland","award":["884\/31\/2018"],"award-info":[{"award-number":["884\/31\/2018"]}]},{"name":"EU H2020","award":["101016775"],"award-info":[{"award-number":["101016775"]}]},{"name":"Research Council of Finland Flagship Program via Finnish Center for Artificial Intelligence"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1109\/tai.2024.3353164","type":"journal-article","created":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T14:04:42Z","timestamp":1705068282000},"page":"3926-3938","source":"Crossref","is-referenced-by-count":9,"title":["Self-Supervised Forecasting in Electronic Health Records With Attention-Free Models"],"prefix":"10.1109","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7961-8596","authenticated-orcid":false,"given":"Yogesh","family":"Kumar","sequence":"first","affiliation":[{"name":"Department of Computer Science, Aalto University, Aalto, Finland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6419-3006","authenticated-orcid":false,"given":"Alexander","family":"Ilin","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Aalto University, Aalto, Finland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6374-9444","authenticated-orcid":false,"given":"Henri","family":"Salo","sequence":"additional","affiliation":[{"name":"Information Services Department, Finnish Institute for Health and Welfare, Helsinki, Finland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4542-8460","authenticated-orcid":false,"given":"Sangita","family":"Kulathinal","sequence":"additional","affiliation":[{"name":"Department of Mathematics and Statistics, University of Helsinki, Helsinki, Finland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7631-4749","authenticated-orcid":false,"given":"Maarit K.","family":"Leinonen","sequence":"additional","affiliation":[{"name":"Information Services Department, Finnish Institute for Health and Welfare, Helsinki, Finland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7078-7927","authenticated-orcid":false,"given":"Pekka","family":"Marttinen","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Aalto University, Aalto, Finland"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11429"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330701"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2019.103256"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-018-0677-8"},{"key":"ref5","article-title":"Layer normalization","author":"Ba","year":"2016"},{"key":"ref6","article-title":"Evaluating progress on machine learning for longitudinal electronic healthcare data","author":"Bellamy","year":"2020"},{"key":"ref7","first-page":"17","article-title":"Deep learning of representations for unsupervised and transfer learning","volume-title":"Proc. ICML Workshop Unsupervised Transfer Learn.","volume":"27","author":"Bengio","year":"2012"},{"key":"ref8","article-title":"Language models are few-shot learners","author":"Brown","year":"2020"},{"key":"ref9","first-page":"1691","article-title":"Generative pretraining from pixels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Chen","year":"2020"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref11","first-page":"3512","article-title":"RETAIN: An interpretable predictive model for healthcare using reverse time attention mechanism","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Choi","year":"2016"},{"key":"ref12","first-page":"301","article-title":"Doctor AI: Predicting clinical events via recurrent neural networks","volume-title":"Proc. Mach. Learn. Healthcare Conf.","author":"Choi","year":"2016"},{"key":"ref13","article-title":"Graph convolutional transformer: Learning the graphical structure of electronic health records","author":"Choi","year":"2019"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpb.2019.06.001"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-811325-7.00003-8"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1161\/01.CTR.101.23.e215"},{"key":"ref20","volume-title":"Deep Learning","author":"Goodfellow","year":"2016"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1038\/d41573-019-00180-y"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-019-0103-9"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref24","article-title":"Gaussian error linear units (GELUs)","author":"Hendrycks","year":"2016"},{"key":"ref25","volume-title":"Need Adjustment for Financing Health and Social Services in Finland","author":"H\u00e4kkinen"},{"key":"ref26","article-title":"Axial attention in multidimensional transformers","author":"Ho","year":"2019"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1031"},{"key":"ref29","article-title":"Attention is not explanation","author":"Jain","year":"2019"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06160-y"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01899-x"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1186\/s12916-019-1426-2"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16099"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1038\/s41551-022-00914-1"},{"key":"ref35","first-page":"93","article-title":"Predicting utilization of healthcare services from individual disease trajectories using RNNs with multi-headed attention","volume-title":"Proc. Mach. Learn. Health Workshop","author":"Kumar","year":"2020"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-811325-7.00005-1"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.319"},{"key":"ref38","article-title":"THL-toimenpideluokitus","author":"Lehtonen","year":"2013"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-020-62922-y"},{"key":"ref40","article-title":"Learning to diagnose with LSTM recurrent neural networks","author":"Lipton","year":"2015"},{"key":"ref41","first-page":"9204","article-title":"Pay attention to MLPs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Liu","year":"2021"},{"key":"ref42","article-title":"On the variance of the adaptive learning rate and beyond","author":"Liu","year":"2019"},{"key":"ref43","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"ref44","article-title":"A unified approach to interpreting model predictions","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Lundberg","year":"2017"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-811325-7.00001-4"},{"key":"ref46","article-title":"Do you even need attention? A stack of feed-forward layers does surprisingly well on ImageNet","author":"Melas-Kyriazi","year":"2021"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2021.3063721"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1038\/srep26094"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2022.102430"},{"key":"ref50","first-page":"8024","article-title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","volume-title":"Advances in Neural Information Processing Systems 32","author":"Paszke","year":"2019"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"issue":"8","key":"ref52","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"issue":"1","key":"ref53","first-page":"5485","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-021-00455-y"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1089\/big.2015.0020"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1111\/1475-6773.12464"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1177\/0272989X20985764"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/825"},{"key":"ref59","article-title":"GLU variants improve transformer","author":"Shazeer","year":"2020"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0225242"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1111\/1475-6773.12818"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2017.58"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-821777-1.00020-3"},{"key":"ref64","first-page":"24261","article-title":"MLP-mixer: An all-MLP architecture for vision","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Tolstikhin","year":"2021"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2022.3206148"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1002"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20204"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9078688\/10635096\/10398588.pdf?arnumber=10398588","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:09:07Z","timestamp":1755911347000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10398588\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8]]},"references-count":69,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tai.2024.3353164","relation":{},"ISSN":["2691-4581"],"issn-type":[{"value":"2691-4581","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8]]}}}