{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T08:09:09Z","timestamp":1769501349207,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T00:00:00Z","timestamp":1760659200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Department of Energy","award":["DE-SC0022268"],"award-info":[{"award-number":["DE-SC0022268"]}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2107257"],"award-info":[{"award-number":["2107257"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,18]]},"DOI":"10.1145\/3725843.3756070","type":"proceedings-article","created":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T17:19:56Z","timestamp":1760721596000},"page":"691-704","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["SHADOW: Simultaneous Multi-Threading Architecture with Asymmetric Threads"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-5955-7747","authenticated-orcid":false,"given":"Ishita","family":"Chaturvedi","sequence":"first","affiliation":[{"name":"Princeton University, Princeton, New Jersey, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2739-0538","authenticated-orcid":false,"given":"Bhargav Reddy","family":"Godala","sequence":"additional","affiliation":[{"name":"AheadComputing, Portland, Oregon, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2356-0413","authenticated-orcid":false,"given":"Abiram","family":"Gangavaram","sequence":"additional","affiliation":[{"name":"Princeton University, Princeton, New Jersey, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1916-2842","authenticated-orcid":false,"given":"Daniel","family":"Flyer","sequence":"additional","affiliation":[{"name":"Princeton University, Princeton, New Jersey, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1646-7935","authenticated-orcid":false,"given":"Tyler","family":"Sorensen","sequence":"additional","affiliation":[{"name":"Microsoft, Seattle, Washington, USA and University of California Santa Cruz, Santa Cruz, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1161-692X","authenticated-orcid":false,"given":"Tor M.","family":"Aamodt","sequence":"additional","affiliation":[{"name":"University of British Columbia, Vancouver, British Columbia, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3327-1803","authenticated-orcid":false,"given":"David I.","family":"August","sequence":"additional","affiliation":[{"name":"Princeton University, Princeton, New Jersey, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,10,17]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","unstructured":"Sarita\u00a0V. Adve and Kourosh Gharachorloo. 1996. Shared Memory Consistency Models: A Tutorial. Computer 29 12 (1996) 66\u201376. 10.1109\/2.546611","DOI":"10.1109\/2.546611"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/223982.223985"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","unstructured":"A. Agarwal J. Kubiatowicz D. Kranz B.H. Lim D. Yeung G. D\u2019Souza and M. Parkin. 1993. Sparcle: an evolutionary processor design for large-scale multiprocessors. IEEE Micro (1993). 10.1109\/40.216748","DOI":"10.1109\/40.216748"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","unstructured":"Andreas Agne Markus Happe Ariane Keller Enno L\u00fcbbers Bernhard Plattner Marco Platzner and Christian Plessl. 2013. ReconOS: An operating system approach for reconfigurable computing. IEEE Micro (2013). 10.1109\/MM.2013.110","DOI":"10.1109\/MM.2013.110"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2015.11"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2019.8715034"},{"key":"e_1_3_3_1_8_2","volume-title":"Cortex-A9 Technical Reference Manual: Register Renaming","author":"Ltd. Arm","year":"2010","unstructured":"Arm Ltd.2010. Cortex-A9 Technical Reference Manual: Register Renaming. https:\/\/developer.arm.com\/documentation\/ddi0388\/h\/Functional-Description\/About-the-functions\/Register-renaming Accessed: 2025-02-20."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Nathan Binkert Bradford Beckmann Gabriel Black Steven\u00a0K Reinhardt Ali Saidi Arkaprava Basu Joel Hestness Derek\u00a0R Hower Tushar Krishna Somayeh Sardashti et\u00a0al. 2011. The gem5 simulator. ACM SIGARCH computer architecture news 39 2 (2011) 1\u20137.","DOI":"10.1145\/2024716.2024718"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/209936.209958"},{"key":"e_1_3_3_1_11_2","volume-title":"The Berkeley Out-of-Order Machine (BOOM): An Industry-Competitive, Synthesizable, Parameterized RISC-V Processor","author":"Celio Christopher","year":"2015","unstructured":"Christopher Celio, David\u00a0A. Patterson, and Krste Asanovi\u0107. 2015. The Berkeley Out-of-Order Machine (BOOM): An Industry-Competitive, Synthesizable, Parameterized RISC-V Processor. Technical Report UCB\/EECS-2015-167. EECS Department, University of California, Berkeley. http:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2015\/EECS-2015-167.html"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/300979.300995"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_3_1_14_2","unstructured":"Wikipedia contributors. 2025. ARM Neoverse. https:\/\/en.wikipedia.org\/wiki\/ARM_Neoverse Accessed: 2025-02-21."},{"key":"e_1_3_3_1_15_2","unstructured":"Brett\u00a0W. Coon John\u00a0Erik Lindholm Gary Tarolli Svetoslav\u00a0D. Tzvetkov John\u00a0R. Nickolls and Ming\u00a0Y. Siu. 2009. Register File Allocation. https:\/\/patents.google.com\/patent\/US7634621B1\/en"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","unstructured":"Steven Dalton Luke Olson and Nathan Bell. 2015. Optimizing sparse matrix\u2014matrix multiplication for the GPU. ACM Transactions on Mathematical Software (TOMS) (2015). 10.1145\/2699470","DOI":"10.1145\/2699470"},{"key":"e_1_3_3_1_17_2","unstructured":"darchr. 2025. Grace Out-of-Order CPU Implementation. https:\/\/github.com\/darchr\/novoverse\/blob\/main\/components\/processors\/grace\/gracecore\/grace_o3_cpu.py Accessed: 2025-02-21."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","unstructured":"Kaushik Datta Mark Murphy Vasily Volkov Samuel Williams Jonathan Carter Leonid Oliker David Patterson John Shalf and Katherine Yelick. 2008. Stencil Computation Optimization and Auto-tuning on Modern Microprocessors. Proceedings of the 2008 ACM\/IEEE Conference on Supercomputing (SC) (2008). 10.1109\/SC.2008.5222004","DOI":"10.1109\/SC.2008.5222004"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2002.1105971"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","unstructured":"Susan\u00a0J Eggers Joel\u00a0S Emer Henry\u00a0M Levy Jack\u00a0L Lo Rebecca\u00a0L Stamm and Dean\u00a0M Tullsen. 1997. Simultaneous multithreading: A platform for next-generation processors. IEEE Micro (1997). 10.1109\/40.621209","DOI":"10.1109\/40.621209"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00023"},{"key":"e_1_3_3_1_22_2","unstructured":"Nigel Griffiths. 2019. POWER CPU Memory Affinity 3 - Scheduling Processes to SMT and Virtual Processors. https:\/\/www.ibm.com\/support\/pages\/power-cpu-memory-affinity-3-scheduling-processes-smt-and-virtual-processors. Accessed: 2025-02-19."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.30"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","unstructured":"Fred\u00a0G Gustavson. 1978. Two Fast Algorithms for Sparse Matrices: Multiplication and Permuted Transposition. ACM Transactions on Mathematical Software (TOMS) (1978). 10.1145\/355791.355796","DOI":"10.1145\/355791.355796"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC55726.2022.00012"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.1999.744331"},{"key":"e_1_3_3_1_27_2","unstructured":"Glenn Hinton. 2001. The microarchitecture of the Pentium 4 processor. Intel technology journal (2001)."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/139669.139710"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/1250662.1250686"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","unstructured":"Yasuo Ishii Mary Inaba and Kei Hiraki. 2011. Access map pattern matching for data cache prefetch. Proceedings of the 23rd International Conference on Supercomputing (SC) (2011). 10.1145\/1542275.1542349","DOI":"10.1145\/1542275.1542349"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","unstructured":"Xin Jin and Ningmei Yu. 2021. A defense mechanism against transient execution attacks on SMT processors. IEICE Electronics Express (2021). 10.1587\/elex.18.20210041","DOI":"10.1587\/elex.18.20210041"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.41"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3243176.3243185"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","unstructured":"Poonacha Kongetira Kathirgamar Aingaran and Kunle Olukotun. 2005. Niagara: a 32-way multithreaded Sparc processor. IEEE Micro (2005). 10.1109\/MM.2005.35","DOI":"10.1109\/MM.2005.35"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2003.1253185"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","unstructured":"Hung\u00a0Q Le William\u00a0J Starke J\u00a0Stephen Fields Francis\u00a0P O\u2019Connell Dung\u00a0Q Nguyen Bruce\u00a0J Ronchetti Wolfram\u00a0M Sauer Eric\u00a0M Schwarz and Michael\u00a0T Vaden. 2007. IBM POWER6 microarchitecture. IBM Journal of Research and Development (2007). 10.1147\/rd.516.0639","DOI":"10.1147\/rd.516.0639"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.5555\/2787930"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669172"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","unstructured":"Jack\u00a0L. Lo Susan\u00a0J. Eggers Joel\u00a0S. Emer Henry\u00a0M. Levy Rebecca\u00a0L. Stamm and Dean\u00a0M. Tullsen. 1997. Converting thread-level parallelism to instruction-level parallelism via simultaneous multithreading. ACM Transactions on Computer Systems (TOCS) 15 3 (1997) 322\u2013354. 10.1145\/263326.263382","DOI":"10.1145\/263326.263382"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2008.24"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","unstructured":"Jason Lowe-Power Abdul\u00a0Mutaal Ahmad Ayaz Akram Mohammad Alian Rico Amslinger Matteo Andreozzi Adri\u00e0 Armejach Nils Asmussen Brad Beckmann Srikant Bharadwaj et\u00a0al. 2020. The gem5 simulator: Version 20.0+. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2007.03152 (2020). 10.48550\/arXiv.2007.03152","DOI":"10.48550\/arXiv.2007.03152"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3240302.3240320"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00024"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00037"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/871506.871585"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun\u00a0Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson\u00a0G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. 2020. Deep Learning Recommendation Model for Personalization and Recommendation Systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2008.07678 (2020). 10.48550\/arXiv.1906.00091","DOI":"10.48550\/arXiv.1906.00091"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00067"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.5555\/560733"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2011.7477491"},{"key":"e_1_3_3_1_51_2","volume-title":"Intel Threading Building Blocks: Outfitting C++ for Multi-core Processor Parallelism","author":"Reinders James","year":"2007","unstructured":"James Reinders. 2007. Intel Threading Building Blocks: Outfitting C++ for Multi-core Processor Parallelism. O\u2019Reilly Media. https:\/\/dl.acm.org\/doi\/10.5555\/1352079.1352134"},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"publisher","DOI":"10.5555\/829576"},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/859618.859667"},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","unstructured":"Faissal\u00a0M Sleiman and Thomas\u00a0F Wenisch. 2016. Efficiently scaling out-of-order cores for simultaneous multithreading. (2016). 10.1145\/3007787.3001183","DOI":"10.1145\/3007787.3001183"},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/378993.379244"},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2000.898058"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.36"},{"key":"e_1_3_3_1_58_2","volume-title":"31st USENIX Security Symposium","author":"Taram Mohammadkazem","year":"2022","unstructured":"Mohammadkazem Taram, Xida Ren, Ashish Venkat, and Dean Tullsen. 2022. SecSMT: Securing SMT Processors against Contention-Based Covert Channels. In 31st USENIX Security Symposium."},{"key":"e_1_3_3_1_59_2","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2019.00012"},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"publisher","DOI":"10.1145\/232973.232993"},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/223982.224449"},{"key":"e_1_3_3_1_62_2","unstructured":"Jim Turley. 2014. VISC Processor Secrets Revealed. EE Journal. https:\/\/www.eejournal.com\/article\/20141203-softmachines2\/ Online; accessed 2025-06-13."},{"key":"e_1_3_3_1_63_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"e_1_3_3_1_64_2","doi-asserted-by":"publisher","unstructured":"Yasuko Watanabe John\u00a0D Davis and David\u00a0A Wood. 2010. WiDGET: Wisconsin decoupled grid execution tiles. Proceedings of the 37th Annual International Symposium on Computer Architecture (ISCA) (2010). 10.1145\/1815961.1815965","DOI":"10.1145\/1815961.1815965"},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.1109\/HICSS.1994.323172"},{"key":"e_1_3_3_1_66_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00072"}],"event":{"name":"MICRO 2025: 58th IEEE\/ACM International Symposium on Microarchitecture","location":"Seoul Korea","acronym":"MICRO 2025","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"]},"container-title":["Proceedings of the 58th IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3725843.3756070","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3725843.3756070","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T21:43:04Z","timestamp":1769463784000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3725843.3756070"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,17]]},"references-count":65,"alternative-id":["10.1145\/3725843.3756070","10.1145\/3725843"],"URL":"https:\/\/doi.org\/10.1145\/3725843.3756070","relation":{},"subject":[],"published":{"date-parts":[[2025,10,17]]},"assertion":[{"value":"2025-10-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}