{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T21:58:30Z","timestamp":1781042310686,"version":"3.54.1"},"reference-count":56,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100004052","name":"KAUST","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004052","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.neucom.2026.134040","type":"journal-article","created":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T06:50:11Z","timestamp":1779346211000},"page":"134040","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Sparse neural sampling mixers"],"prefix":"10.1016","volume":"695","author":[{"given":"Ahmed","family":"Elsheikh","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7139-3428","authenticated-orcid":false,"given":"Mohammed E.","family":"Fouda","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1849-083X","authenticated-orcid":false,"given":"Ahmed M.","family":"Eltawil","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.134040_bib0005","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109347","article-title":"A comprehensive survey of image augmentation techniques for deep learning","author":"Xu","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.134040_bib0010","doi-asserted-by":"crossref","first-page":"464","DOI":"10.1038\/s41928-023-00985-1","article-title":"Solving the big computing problems in the twenty-first century","volume":"6","author":"Conklin","year":"2023","journal-title":"Nat. Electron."},{"key":"10.1016\/j.neucom.2026.134040_bib0015","doi-asserted-by":"crossref","first-page":"1706","DOI":"10.1109\/JPROC.2021.3098483","article-title":"Hardware acceleration of sparse and irregular tensor computations of ML models: a survey and insights","volume":"109","author":"Dave","year":"2021","journal-title":"Proc. IEEE"},{"key":"10.1016\/j.neucom.2026.134040_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.sysarc.2019.101635","article-title":"A survey of techniques for optimizing deep learning on GPUs","volume":"99","author":"Mittal","year":"2019","journal-title":"J. Syst. Archit."},{"key":"10.1016\/j.neucom.2026.134040_bib0025","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3527156","article-title":"Hardware approximate techniques for deep neural network accelerators: a survey","volume":"55","author":"Armeniakos","year":"2022","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.neucom.2026.134040_bib0030","doi-asserted-by":"crossref","first-page":"485","DOI":"10.1109\/JPROC.2020.2976475","article-title":"Model compression and hardware acceleration for neural networks: a comprehensive survey","volume":"108","author":"Deng","year":"2020","journal-title":"Proc. IEEE"},{"key":"10.1016\/j.neucom.2026.134040_bib0035","author":"Dao"},{"key":"10.1016\/j.neucom.2026.134040_bib0040","first-page":"17413","article-title":"Scatterbrain: unifying sparse and low-rank attention","volume":"34","author":"Chen","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.134040_bib0045","author":"Achiam"},{"key":"10.1016\/j.neucom.2026.134040_bib0050","article-title":"The building blocks of a brain-inspired computer","volume":"7","author":"Kendall","year":"2020","journal-title":"Appl. Phys. Rev."},{"key":"10.1016\/j.neucom.2026.134040_bib0055","doi-asserted-by":"crossref","first-page":"2571","DOI":"10.1038\/s41467-022-30305-8","article-title":"Neural sampling machine with stochastic synapse allows brain-like learning and inference","volume":"13","author":"Dutta","year":"2022","journal-title":"Nat. Commun."},{"key":"10.1016\/j.neucom.2026.134040_bib0060","doi-asserted-by":"crossref","first-page":"693","DOI":"10.1038\/nnano.2016.70","article-title":"Stochastic phase-change neurons","volume":"11","author":"Tuma","year":"2016","journal-title":"Nat. Nanotechnol."},{"key":"10.1016\/j.neucom.2026.134040_bib0065","doi-asserted-by":"crossref","first-page":"241","DOI":"10.3389\/fnins.2016.00241","article-title":"Stochastic synapses enable efficient brain-inspired learning machines","volume":"10","author":"Neftci","year":"2016","journal-title":"Front. Neurosci."},{"key":"10.1016\/j.neucom.2026.134040_bib0070","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pcbi.1002211","article-title":"Neural dynamics as sampling: a model for stochastic computation in recurrent networks of spiking neurons","volume":"7","author":"Buesing","year":"2011","journal-title":"PLOS Comput. Biol."},{"key":"10.1016\/j.neucom.2026.134040_bib0075","article-title":"Inherent weight normalization in stochastic neural networks","volume":"32","author":"Detorakis","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.134040_bib0080","article-title":"Globally trained handwritten word recognizer using spatial representation, convolutional neural networks, and hidden Markov models","volume":"6","author":"Bengio","year":"1993","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"8","key":"10.1016\/j.neucom.2026.134040_bib0085","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput. MIT-Press"},{"key":"10.1016\/j.neucom.2026.134040_bib0090","first-page":"5998","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.134040_bib0095","first-page":"24261","article-title":"MLP-mixer: an all-mlp architecture for vision","volume":"34","author":"Tolstikhin","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.134040_bib0100","first-page":"9204","article-title":"Pay attention to MLPs","volume":"34","author":"Liu","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.134040_bib0105","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"297","article-title":"S2-MLP: spatial-shift MLP architecture for vision","author":"Yu","year":"2022"},{"key":"10.1016\/j.neucom.2026.134040_bib0110","doi-asserted-by":"crossref","first-page":"5314","DOI":"10.1109\/TPAMI.2022.3206148","article-title":"Resmlp: feedforward networks for image classification with data-efficient training","volume":"45","author":"Touvron","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.134040_bib0115","author":"Chen"},{"key":"10.1016\/j.neucom.2026.134040_bib0120","first-page":"10882","article-title":"Sparsity in deep learning: pruning and growth for efficient inference and training in neural networks","volume":"22","author":"Hoefler","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2026.134040_bib0125","doi-asserted-by":"crossref","first-page":"887","DOI":"10.1109\/TAI.2022.3170001","article-title":"The new generation brain-inspired sparse learning: a comprehensive survey","volume":"3","author":"Jiao","year":"2022","journal-title":"IEEE Trans. Artif. Intell."},{"key":"10.1016\/j.neucom.2026.134040_bib0130","series-title":"2018 IEEE\/WIC\/ACM International Conference on Web Intelligence (WI)","first-page":"647","article-title":"A survey of sparse-learning methods for deep neural networks","author":"Ma","year":"2018"},{"key":"10.1016\/j.neucom.2026.134040_bib0135","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1038\/s41586-022-04992-8","article-title":"A compute-in-memory chip based on resistive random-access memory","volume":"608","author":"Wan","year":"2022","journal-title":"Nature"},{"key":"10.1016\/j.neucom.2026.134040_bib0140","series-title":"International Conference on Machine Learning","first-page":"1058","article-title":"Regularization of neural networks using dropconnect","author":"Wan","year":"2013"},{"key":"10.1016\/j.neucom.2026.134040_bib0145","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1016\/j.aiopen.2022.10.001","article-title":"A survey of transformers","volume":"3","author":"Lin","year":"2022","journal-title":"AI Open"},{"key":"10.1016\/j.neucom.2026.134040_bib0150","series-title":"CUDA Programming: A Developer\u2019s Guide to Parallel Computing with GPUs","author":"Cook","year":"2012"},{"key":"10.1016\/j.neucom.2026.134040_bib0155","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1023\/B:JMMA.0000015831.64190.1e","article-title":"A comparative study on dynamic and static sparsity patterns in parallel sparse approximate inverse preconditioning","volume":"2","author":"Wang","year":"2003","journal-title":"J. Math. Model. Algorithms"},{"key":"10.1016\/j.neucom.2026.134040_bib0160","doi-asserted-by":"crossref","first-page":"5095","DOI":"10.1109\/TNNLS.2021.3071762","article-title":"A survey of deep learning on CPUs: opportunities and co-optimizations","volume":"33","author":"Mittal","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.neucom.2026.134040_bib0165","first-page":"10882","article-title":"Sparsity in deep learning: pruning and growth for efficient inference and training in neural networks","volume":"22","author":"Hoefler","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2026.134040_bib0170","series-title":"Random Butterfly Transformations with Applications in Computational Linear Algebra","author":"Parker","year":"1995"},{"key":"10.1016\/j.neucom.2026.134040_bib0175","doi-asserted-by":"crossref","first-page":"803","DOI":"10.1109\/TASSP.1984.1164399","article-title":"Fast algorithms for the discrete W transform and for the discrete Fourier transform","volume":"32","author":"Wang","year":"1984","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"10.1016\/j.neucom.2026.134040_bib0180","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1109\/T-C.1974.223784","article-title":"Discrete cosine transform","volume":"100","author":"Ahmed","year":"1974","journal-title":"IEEE Trans. Comput."},{"key":"10.1016\/j.neucom.2026.134040_bib0185","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1023\/A:1019191114493","article-title":"Faster than the fast legendre transform, the linear-time legendre transform","volume":"16","author":"Lucet","year":"1997","journal-title":"Numer. Algorithms"},{"key":"10.1016\/j.neucom.2026.134040_bib0190","doi-asserted-by":"crossref","DOI":"10.1016\/j.patter.2022.100520","article-title":"Are we ready for a new paradigm shift? A survey on visual deep MLP","volume":"3","author":"Liu","year":"2022","journal-title":"Patterns"},{"key":"10.1016\/j.neucom.2026.134040_bib0195","series-title":"International Conference on Machine Learning","first-page":"933","article-title":"Language modeling with gated convolutional networks","author":"Dauphin","year":"2017"},{"key":"10.1016\/j.neucom.2026.134040_bib0200","doi-asserted-by":"crossref","first-page":"500","DOI":"10.1109\/JPROC.2023.3268092","article-title":"Resistive neural hardware accelerators","volume":"111","author":"Smagulova","year":"2023","journal-title":"Proc. IEEE"},{"key":"10.1016\/j.neucom.2026.134040_bib0205","series-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"10.1016\/j.neucom.2026.134040_bib0210","doi-asserted-by":"crossref","DOI":"10.1016\/j.array.2021.100116","article-title":"Modeling and simulating in-memory memristive deep learning systems: an overview of current efforts","volume":"13","author":"Lammie","year":"2022","journal-title":"Array"},{"key":"10.1016\/j.neucom.2026.134040_bib0215","author":"Watanabe"},{"key":"10.1016\/j.neucom.2026.134040_bib0220","series-title":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining","first-page":"2623","article-title":"Optuna: a next-generation hyperparameter optimization framework","author":"Akiba","year":"2019"},{"key":"10.1016\/j.neucom.2026.134040_bib0225","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1016\/j.inffus.2021.11.005","article-title":"A comprehensive survey on regularization strategies in machine learning","volume":"80","author":"Tian","year":"2022","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.neucom.2026.134040_bib0230","article-title":"Theory of gating in recurrent neural networks","volume":"12","author":"Krishnamurthy","year":"2022","journal-title":"Phys. Rev. X"},{"key":"10.1016\/j.neucom.2026.134040_bib0235","author":"Authors"},{"key":"10.1016\/j.neucom.2026.134040_bib0240","author":"Langenberg"},{"key":"10.1016\/j.neucom.2026.134040_bib0245","series-title":"2025 International Conference on Machine Intelligence and Smart Innovation (ICMISI)","first-page":"207","article-title":"Adversarial robustness and transferability on stochastic resistive accelerators","author":"Smagulova","year":"2025"},{"key":"10.1016\/j.neucom.2026.134040_bib0250","doi-asserted-by":"crossref","first-page":"260","DOI":"10.1109\/JPROC.2018.2790840","article-title":"Neuro-inspired computing with emerging nonvolatile memorys","volume":"106","author":"Yu","year":"2018","journal-title":"Proc. IEEE"},{"key":"10.1016\/j.neucom.2026.134040_bib0255","author":"Xiaoju"},{"key":"10.1016\/j.neucom.2026.134040_bib0260","series-title":"2014 IEEE International Solid-State Circuits Conference Digest of Technical Papers (ISSCC)","first-page":"10","article-title":"1.1 computing\u2019s energy problem (and what we can do about it)","author":"Horowitz","year":"2014"},{"key":"10.1016\/j.neucom.2026.134040_bib0265","doi-asserted-by":"crossref","first-page":"2295","DOI":"10.1109\/JPROC.2017.2761740","article-title":"Efficient processing of deep neural networks: a tutorial and survey","volume":"105","author":"Sze","year":"2017","journal-title":"Proc. IEEE"},{"key":"10.1016\/j.neucom.2026.134040_bib0270","author":"Loshchilov"},{"key":"10.1016\/j.neucom.2026.134040_bib0275","article-title":"When does label smoothing help?","volume":"32","author":"M\u00fcller","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.134040_bib0280","author":"Cubuk"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226014384?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226014384?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T21:27:45Z","timestamp":1781040465000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226014384"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":56,"alternative-id":["S0925231226014384"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.134040","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Sparse neural sampling mixers","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.134040","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"134040"}}