{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,30]],"date-time":"2026-07-30T22:03:10Z","timestamp":1785448990435,"version":"3.56.0"},"reference-count":176,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/ijcnn55064.2022.9891914","type":"proceedings-article","created":{"date-parts":[[2022,9,30]],"date-time":"2022-09-30T19:56:04Z","timestamp":1664567764000},"page":"1-8","source":"Crossref","is-referenced-by-count":264,"title":["Compute Trends Across Three Eras of Machine Learning"],"prefix":"10.1109","author":[{"given":"Jaime","family":"Sevilla","sequence":"first","affiliation":[{"name":"Epoch"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lennart","family":"Heim","sequence":"additional","affiliation":[{"name":"Epoch"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Anson","family":"Ho","sequence":"additional","affiliation":[{"name":"Epoch"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tamay","family":"Besiroglu","sequence":"additional","affiliation":[{"name":"Epoch"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Marius","family":"Hobbhahn","sequence":"additional","affiliation":[{"name":"Epoch"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pablo","family":"Villalobos","sequence":"additional","affiliation":[{"name":"Epoch"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref170","author":"ba","year":"2016","journal-title":"Layer Normal-ization"},{"key":"ref172","author":"dai","year":"2016","journal-title":"R-FCN Object Detection via Region-based Fully Convolutional Networks[J]"},{"key":"ref171","author":"schuster","year":"2019","journal-title":"Cross-Lingual Alignment of Contex-tual Word Embeddings with Applications to Zero-shot Dependency Parsing"},{"key":"ref174","article-title":"Translating Embeddings for Modeling Multi-relational Data","volume":"26","author":"bordes","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref173","author":"wang","year":"2019","journal-title":"KEPLER A Unified Model for Knowl-edge Embedding and Pre-trained Language Representation"},{"key":"ref176","author":"leahy","year":"2022","journal-title":"Announcing GPT-NeoX-20B"},{"key":"ref175","author":"thoppilan","year":"2022","journal-title":"LaMDA Language Models for Dialog Applications"},{"key":"ref168","author":"jia","year":"2021","journal-title":"Scaling up visual and vision-language representation learning with noisy text supervision"},{"key":"ref169","author":"lan","year":"2019","journal-title":"Albert A lite bert for self-supervised learning of language representations"},{"key":"ref39","author":"sevilla","year":"2021","journal-title":"emphParame-ter Counts in Machine Learning"},{"key":"ref38","author":"li","year":"2020","journal-title":"OpenAI's GPT-3 language model A technical overview"},{"key":"ref33","author":"deng","year":"2009","journal-title":"Deep Learning for Speech Recognition and Related Applications"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390170"},{"key":"ref31","article-title":"Flexible, High Performance Con-volutional Neural Networks for Image Classification","author":"cire?an","year":"2011","journal-title":"Proceedings of the Twenty-Second International Joint Conference on Artificial Intelligence - Volume Volume Two"},{"key":"ref30","article-title":"High Perfor-mance Convolutional Neural Networks for Document Processing","author":"chellapilla","year":"2006","journal-title":"Tenth International Workshop on Fron-tiers in Handwriting Recognition"},{"key":"ref37","author":"wang","year":"2020","journal-title":"DeepMind achieved StarCraft II GrandMas-ter Level but at what cost?"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in StarCraft II using multi-agent reinforcement learning","volume":"575","author":"vinyals","year":"2019","journal-title":"Nature"},{"key":"ref35","author":"h","year":"2020","journal-title":"How Much Did AlphaGo Zero Cost"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref28","article-title":"Multi-column Deep Neural Networks for Image Classification","author":"cire?an","year":"2012","journal-title":"ArXiv"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"3207","DOI":"10.1162\/NECO_a_00052","article-title":"Deep, big, simple neural nets for handwritten digit recognition","volume":"22","author":"cire?an","year":"2010","journal-title":"Neural Computation"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2005.251"},{"key":"ref20","author":"desislavov","year":"2021","journal-title":"Compute and Energy Consumption Trends in Deep Learning Inference"},{"key":"ref22","author":"moore","year":"1965","journal-title":"emphThe Future of Integrated Electronics"},{"key":"ref21","author":"sevilla","year":"2022","journal-title":"Estimating training compute of Deep Learning models"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2001.990517"},{"key":"ref23","author":"alom","year":"2018","journal-title":"The history began from AlexNet A comprehensive survey on deep learning approaches"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10578-9_23"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2010-343"},{"key":"ref100","article-title":"Deep Residual Learning for Image Recognition","author":"he","year":"2015","journal-title":"ArXiv"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"873","DOI":"10.1145\/1553374.1553486","article-title":"Large-Scale Deep Unsupervised Learning Using Graphics Proces","author":"raina","year":"2009","journal-title":"Proceedings of the 26th Annual International Conference on Machine Learning Ser ICML '09"},{"key":"ref50","author":"lepikhin","year":"2020","journal-title":"Gshard Scaling giant models with conditional computation and automatic sharding"},{"key":"ref51","article-title":"Hardware- Und Nachrichten-Links Des 30.\/31. Okto-ber 2021","year":"0","journal-title":"3D Center"},{"key":"ref154","author":"komatsuzaki","year":"2021","journal-title":"GPT-J-6B 6B JAX-Based Trans-former"},{"key":"ref153","author":"zeng","year":"2021","journal-title":"PanGu Large-scale Autore-gressive Pretrained Chinese Language Models with Auto-parallel Computation"},{"key":"ref156","author":"ding","year":"2021","journal-title":"Cogview Mastering text-to-image generation via transformers"},{"key":"ref155","article-title":"Naver Corporation","year":"0","journal-title":"NVE Corporation"},{"key":"ref150","author":"nad","year":"0","journal-title":"DALL E Creating Images from Text"},{"key":"ref152","article-title":"GPT-Neo","year":"0","journal-title":"Eleuther AI"},{"key":"ref151","author":"fedus","year":"2021","journal-title":"Switch Transform-ers Scaling to Trillion Parameter Models with Simple and Efficient Sparsity"},{"key":"ref146","author":"pham","year":"2020","journal-title":"Meta pseudo labels"},{"key":"ref147","author":"mudigere","year":"2021","journal-title":"Software-Hardware Co-design for Fast and Scalable Training of Deep Learning Recom-mendation Models"},{"key":"ref148","author":"so","year":"2021","journal-title":"Primer Searching for efficient transformers for language modeling"},{"key":"ref149","author":"radford","year":"2021","journal-title":"Learning Transferable Visual Models From Natural Language Supervision"},{"key":"ref59","author":"barrett","year":"2021","journal-title":"Taiwan's drought is exposing just how much water chipmakers like TSMC use (and reuse)"},{"key":"ref58","first-page":"843","article-title":"Revisiting unreasonable effectiveness of data in deep learning era","author":"sun","year":"2017","journal-title":"Proceedings of the IEEE International Conference on Computer Vision"},{"key":"ref57","author":"orme","year":"2022","journal-title":"Report Microsoft Handed OpenAI $500m in Azure Credits"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2001.990517"},{"key":"ref55","article-title":"OpenAI API Pricing","year":"2021","journal-title":"OpenAir"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2012.02.023"},{"key":"ref53","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1145\/3065386","article-title":"Ima-geN et Classification with Deep Convolutional Neural Networks","volume":"60","author":"krizhevsky","year":"2017","journal-title":"Commun ACM"},{"key":"ref52","article-title":"ImageN et Classification with Deep Convolutional Neural Networks","volume":"25","author":"krizhevsky","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref40","author":"kohs","year":"2017","journal-title":"AlphaGo Moxie Pictures Reel As Dirt"},{"key":"ref167","author":"zhang","year":"2021","journal-title":"Aggregating nested transformers"},{"key":"ref166","author":"rae","year":"0","journal-title":"Language modelling at scale"},{"key":"ref165","author":"wu","year":"2021","journal-title":"Yuan 1 0 Large-Scale Pre-trained language Model in Zero-Shot and Few-Shot Learning"},{"key":"ref164","author":"alvi","year":"2021","journal-title":"Using DeepSpeed and Megatron to Train Megatron- Turing NLG 530B the World's Largest and Most Powerful Generative language Model"},{"key":"ref163","author":"lin","year":"2021","journal-title":"M6&#x2013;10 T A Sharing-Delinking Paradigm for Efficient Multi-Trillion Parameter Pretraining"},{"key":"ref162","author":"lieber","year":"0","journal-title":"Announcing AI21 Studio and Jurassic-1 Language Models"},{"key":"ref161","author":"goyal","year":"2021","journal-title":"Self-supervised pretraining of visual features in the wild"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref4","author":"amodei","year":"2018","journal-title":"AI and Compute"},{"key":"ref3","author":"kaplan","year":"2020","journal-title":"Scaling laws for neural language models"},{"key":"ref6","author":"lyzhov","year":"2021","journal-title":"AI and Compute Trend Isn't Predictive of What Is Happening"},{"key":"ref5","author":"sastry","year":"2019","journal-title":"AI and Compute Addendum Compute Used in Older Headline Results"},{"key":"ref159","article-title":"Baidu Research","year":"0","journal-title":"Baidu Res"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref7","author":"brown","year":"2020","journal-title":"Language models are few-shot learners"},{"key":"ref157","author":"zhai","year":"2021","journal-title":"Scaling vision transformers"},{"key":"ref158","author":"ho","year":"2020","journal-title":"Denoising diffusion probabilistic models"},{"key":"ref9","first-page":"12","article-title":"The bitter lesson","volume":"13","author":"sutton","year":"2019","journal-title":"Incomplete Ideas (blog)"},{"key":"ref46","author":"wiggers","year":"2021","journal-title":"Google Trained a Trillion-Parameter AI Language Model"},{"key":"ref45","author":"ajmera","year":"2021","journal-title":"Ford to Shut Some N American Plants for Few Weeks on Chip Shortage"},{"key":"ref48","author":"shilov","year":"2020","journal-title":"GPU Shortages Hit Nvidia's Data Center Business Not Enough $15 000+ GPUs"},{"key":"ref47","first-page":"103","article-title":"GPipe: Efficient Training of Gi-ant Neural Networks Using Pipeline Parallelism","author":"huang","year":"2019","journal-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/1971\/1\/012100"},{"key":"ref41","year":"2019","journal-title":"AlphaStar Grandmaster level in StarCraft II using multiagent reinforcement learning"},{"key":"ref44","article-title":"Deep, Big, Simple Neural Nets for Handwritten Digit Recognition","volume":"22","author":"cire?an","year":"2010","journal-title":"Neural Computation"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"ref127","author":"naumov","year":"2019","journal-title":"Deep learning recommendation model for personalization and recommendation systems"},{"key":"ref126","year":"0"},{"key":"ref125","author":"cai","year":"2018","journal-title":"Proxylessnas Direct neural architecture search on target task and hardware"},{"key":"ref124","author":"radford","year":"2019","journal-title":"Better Language Models and Their Implications"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.4.541"},{"key":"ref72","author":"sejnowski","year":"0","journal-title":"Parallel Networks that Learn to Pronounce English Text"},{"key":"ref129","author":"baker","year":"2019","journal-title":"Emergent Tool Use from Multi-Agent Interaction"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1038\/323533a0"},{"key":"ref128","article-title":"ObjectNet: A large-scale bias-controlled dataset for pushing the limits of object recognition models","volume":"32","author":"barbu","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/BF00344251"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.1996.517075"},{"key":"ref130","author":"shoeybi","year":"2019","journal-title":"Megatron-LM Training Multi-Billion Parameter Language Models Using Model Par-allelism"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref74","article-title":"ALVINN: An Autonomous Land Vehicle in a Neural Network","volume":"1","author":"pomerleau","year":"1988","journal-title":"Advances in neural information processing systems"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992697"},{"key":"ref133","author":"raffel","year":"2019","journal-title":"Exploring the limits of transfer learning with a unified text-to-text transformer"},{"key":"ref134","year":"2019","journal-title":"Dota 2 with large scale deep reinforcement learning"},{"key":"ref131","author":"wang","year":"2019","journal-title":"Alphax exploring neural architectures with deep neural networks and monte carlo tree search"},{"key":"ref78","doi-asserted-by":"crossref","first-page":"226","DOI":"10.1109\/89.668817","article-title":"An RNN-based prosodic information synthesizer for Mandarin text-to-speech","volume":"6","author":"chen","year":"0","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"ref132","year":"0","journal-title":"Solving rubik's cube with a robot hand"},{"key":"ref79","doi-asserted-by":"crossref","first-page":"2278","DOI":"10.1109\/5.726791","article-title":"Gradient-based learning applied to doc-ument recognition","volume":"86","author":"lecun","year":"1998","journal-title":"Proceedings of the IEEE"},{"key":"ref136","author":"adiwardana","year":"2020","journal-title":"Towards a Conversational Agent that Can Chat About Anything"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1923-7"},{"key":"ref138","article-title":"ProGen: Language Modeling for Protein Generation","author":"madani","year":"2020","journal-title":"Technical report bioRxiv"},{"key":"ref137","author":"rosset","year":"2020","journal-title":"Turing-NLG A 17-Billion-Parameter Language Model by Microsoft"},{"key":"ref60","author":"woodie","year":"2021","journal-title":"The Chip Shortage Seems to Be Impacting AI Workloads in the Cloud"},{"key":"ref139","author":"cai","year":"2019","journal-title":"Once for all Train one network and specialize it for efficient deployment"},{"key":"ref62","author":"attinasi","year":"2021","journal-title":"The Semiconductor Shortage and Its Implication for Euro Area Trade Production and Prices"},{"key":"ref61","author":"sharir","year":"2020","journal-title":"The cost of training NLP models A concise overview"},{"key":"ref63","author":"athlur","year":"2021","journal-title":"Varuna Scalable Low-cost Training of Massive Deep Learning Models"},{"key":"ref64","author":"patel","year":"2021","journal-title":"Why the Global Chip Shortage is Making It So Hard to Buy a PS5"},{"key":"ref140","author":"chen","year":"2020","journal-title":"Image GPT"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1037\/h0042519"},{"key":"ref141","year":"0"},{"key":"ref66","author":"klein","year":"0","journal-title":"Mighty mouse"},{"key":"ref142","author":"baevski","year":"2020","journal-title":"wav2vec 2 0 A framework for self-supervised learning of speech representations"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-8716-9_14"},{"key":"ref143","author":"zhang","year":"2020","journal-title":"CPM A Large-scale Generative Chi-nese Pre-trained Language Model"},{"key":"ref68","author":"selfridge","year":"0","journal-title":"Pandemonium A Paradigm for Learning |AITopics"},{"key":"ref144","author":"antoun","year":"2020","journal-title":"AraGPT2 Pre-Trained Transformer for Arabic Language Generation"},{"key":"ref2","author":"hoffmann","year":"2022","journal-title":"Training compute-optimal large language models"},{"key":"ref69","first-page":"123","article-title":"Adaptive switching circuits","author":"widrow","year":"1988","journal-title":"Neurocomputing Foundations of Research"},{"key":"ref145","author":"dosovitskiy","year":"2020","journal-title":"An image is worth 16&#x00D7;16 words Transformers for image recognition at scale"},{"key":"ref1","author":"chowdhery","year":"2022","journal-title":"Palm Scaling language modeling with pathways"},{"key":"ref109","article-title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer","author":"shazeer","year":"2017","journal-title":"ArXiv"},{"key":"ref95","article-title":"Adam: A Method for Stochas-tic Optimization","author":"kingma","year":"2017","journal-title":"ArXiv"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1126\/science.aam6960"},{"key":"ref94","article-title":"Se-quence to Sequence Learning with Neural Networks","author":"sutskever","year":"2014","journal-title":"ArXiv"},{"key":"ref107","year":"0"},{"key":"ref93","article-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition","author":"simonyan","year":"2015","journal-title":"ArXiv"},{"key":"ref106","first-page":"5226","author":"brown","year":"2017","journal-title":"Libratus The Superhuman AI for No-Limit Poker"},{"key":"ref92","article-title":"Neural Machine Translation by Jointly Learning to Align and Translate","author":"bahdanau","year":"2016","journal-title":"ArXiv"},{"key":"ref105","article-title":"Neural Architecture Search with Reinforcement Learning","author":"zoph","year":"2017","journal-title":"ArXiv"},{"key":"ref91","article-title":"Generative Adversarial Networks","author":"goodfellow","year":"2014","journal-title":"ArXiv"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.195"},{"key":"ref90","first-page":"411","article-title":"Mitosis Detection in Breast Cancer Histology Images with Deep Neural Networks","author":"cire?an","year":"2013","journal-title":"Medical Image Computing and Computer-assisted Intervention - MICCAI 2013 Ser Lecture Notes in Computer Science"},{"key":"ref103","article-title":"Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation","author":"wu","year":"2016","journal-title":"CoRR"},{"key":"ref102","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of Go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.97"},{"key":"ref112","year":"0"},{"key":"ref110","article-title":"Attention Is All You Need","author":"vaswani","year":"2017","journal-title":"ArXiv"},{"key":"ref98","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2019","journal-title":"ArXiv"},{"key":"ref99","article-title":"Deep Speech 2: End-to-End Speech Recognition in English and Mandarin","author":"amodei","year":"2015","journal-title":"ArXiv"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref10","first-page":"1","article-title":"Train large, then compress: Rethinking model size for efficient training and inference of trans-formers","author":"li","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref11","first-page":"1","article-title":"Scaling Scaling Laws with Board Games","author":"jones","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref12","first-page":"1","article-title":"A constructive prediction of the generalization error across scales","author":"rosenfeld","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref13","first-page":"1","article-title":"Deep learning scaling is predictable, empirically","author":"hestness","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref14","author":"alayrac","year":"2022","journal-title":"Flamingo a visual language model for few-shot learning"},{"key":"ref15","author":"thompson","year":"2020","journal-title":"The computational limits of deep learning"},{"key":"ref118","article-title":"Large Scale GAN Training for High Fidelity Natural Image Synthe-sis","author":"brock","year":"2019","journal-title":"ArXiv"},{"key":"ref16","article-title":"How Much Longer Can Com-puting Power Drive Artificial Intelligence Progress?","author":"lohn","year":"2022","journal-title":"Center for Security and Technology Tech Rep"},{"key":"ref82","year":"0"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1126\/science.aau6249"},{"key":"ref17","article-title":"The Akronomicon - LightOn AI Research","year":"2022","journal-title":"Akro-nomicon"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"ref18","article-title":"Computer Progress","year":"2022","journal-title":"Comput Program"},{"key":"ref84","article-title":"Improving neural networks by preventing co-adaptation of feature detectors","author":"hinton","year":"2012","journal-title":"ArXiv"},{"key":"ref119","author":"devlin","year":"2018","journal-title":"BERT Pre-training of deep bidirectional transformers for language understanding"},{"key":"ref19","article-title":"AI Tracker","year":"2022","journal-title":"AI Tracker"},{"key":"ref83","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","author":"glorot","year":"2010","journal-title":"Pro-ceedings of the thirteenth international conference on artificial intelligence and statistics"},{"key":"ref114","article-title":"IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Ar-chitectures","author":"espeholt","year":"2018","journal-title":"ArXiv"},{"key":"ref113","year":"0"},{"key":"ref116","author":"radford","year":"2018","journal-title":"Improving language understanding with unsupervised learning"},{"key":"ref80","first-page":"1137","article-title":"A neural probabilistic language model","volume":"3","author":"bengio","year":"2003","journal-title":"The Journal of Machine Learning Research"},{"key":"ref115","article-title":"YOLOv3: An Incremental Improvement","author":"redmon","year":"2018","journal-title":"ArXiv"},{"key":"ref120","author":"liu","year":"2017","journal-title":"Progressive neural architecture search"},{"key":"ref89","article-title":"Auto-Encoding Variational Bayes","author":"kingma","year":"2014","journal-title":"ArXiv"},{"key":"ref121","author":"real","year":"2018","journal-title":"Regularized evolution for image classifier architecture search"},{"key":"ref122","author":"loshchilov","year":"2017","journal-title":"Decoupled weight decay regularization"},{"key":"ref123","author":"bard","year":"2019","journal-title":"The hanabi challenge A new frontier for ai research"},{"key":"ref85","year":"0"},{"key":"ref86","article-title":"Playing Atari with Deep Reinforcement Learning","author":"mnih","year":"2013","journal-title":"ArXiv"},{"key":"ref87","article-title":"Distributed Representations of Words and Phrases and their Compositionality","author":"mikolov","year":"2013","journal-title":"NIPS"},{"key":"ref88","article-title":"Visualizing and Un-derstanding Convolutional Networks","author":"zeiler","year":"2013","journal-title":"ArXiv"}],"event":{"name":"2022 International Joint Conference on Neural Networks (IJCNN)","location":"Padua, Italy","start":{"date-parts":[[2022,7,18]]},"end":{"date-parts":[[2022,7,23]]}},"container-title":["2022 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9891857\/9889787\/09891914.pdf?arnumber=9891914","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,28]],"date-time":"2023-11-28T07:38:07Z","timestamp":1701157087000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9891914\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":176,"URL":"https:\/\/doi.org\/10.1109\/ijcnn55064.2022.9891914","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}