{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T17:17:35Z","timestamp":1767892655585,"version":"3.49.0"},"reference-count":23,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,11]]},"DOI":"10.1109\/pmbs.2018.8641578","type":"proceedings-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T23:44:06Z","timestamp":1550187846000},"page":"121-131","source":"Crossref","is-referenced-by-count":34,"title":["Automated Instruction Stream Throughput Prediction for Intel and AMD Microarchitectures"],"prefix":"10.1109","author":[{"given":"Jan","family":"Laukemann","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Julian","family":"Hammer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Johannes","family":"Hofmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Georg","family":"Hager","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gerhard","family":"Wellein","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2011.5762713"},{"key":"ref11","year":"2018","journal-title":"Instruction Tables"},{"key":"ref12","year":"2017","journal-title":"Software Optimization Guide for AMD Family 17h Processors"},{"key":"ref13","author":"andric","year":"0","journal-title":"[RFC] llvm-mca a static performance analysis tool"},{"key":"ref14","year":"0","journal-title":"llvm-exegesis _ LLVM Machine Instruction Benchmark"},{"key":"ref15","article-title":"Ithemal: Accurate, Portable and Fast Basic Block Throughput Estimation using Deep Neural Networks","author":"mendis","year":"2018","journal-title":"ArXiv e-prints"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2024716.2024718"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485963"},{"key":"ref18","first-page":"29","article-title":"Marss-x86: A qemu-based micro-architectural and systems simulator for x86 multicore processors","author":"patel","year":"2011","journal-title":"Proceedings of the 1st International QEMU Users' Forum"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2014.7116904"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-56702-0_1"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1007\/978-3-319-17248-4_7","article-title":"Roofline Model Toolkit: A Practical Tool for Architectural and Program Analysis","volume":"8966","author":"lo","year":"0","journal-title":"High Performance Computing Systems Performance Modeling Benchmarking and Simulation ser Lecture Notes in Computer Science"},{"key":"ref6","author":"laukemann","year":"2017","journal-title":"OSACA - Open Source Architecture Code Analyzer"},{"key":"ref5","year":"2017","journal-title":"Intel Architecture Code Analyzer"},{"key":"ref8","year":"0","journal-title":"Intel 64 and IA-32 Architectures Optimization Reference Manual"},{"key":"ref7","year":"0","journal-title":"Artifact description Automated instruction stream throughput prediction for intel and amd microarchitectures"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751240"},{"key":"ref1","first-page":"49","article-title":"Memory Requirements for Balanced Computer Architectures","author":"kung","year":"1986","journal-title":"Proceedings of the 13th Annual International Symposium on Computer Architecture ser ISCA &#x2018;86"},{"key":"ref9","author":"clark","year":"0","journal-title":"A New X86 Core Architecture for the Next Generation of Computing"},{"key":"ref20","author":"laukemann","year":"2017","journal-title":"Design and Implemention of a Framework for Predicting Instruction Throughput"},{"key":"ref22","author":"schonauer","year":"2000","journal-title":"Scientific Supercomputing Architecture and Use of Shared and Distributed Memory Parallel Computers"},{"key":"ref21","author":"hofmann","year":"2018","journal-title":"ibench _ Measure Instruction Latency and Throughput"},{"key":"ref23","article-title":"OoO Instruction Benchmarking Framework on the Back of Dragons","author":"hammer","year":"0","journal-title":"SC18 SRC Poster (in review)"}],"event":{"name":"2018 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)","location":"Dallas, TX, USA","start":{"date-parts":[[2018,11,12]]},"end":{"date-parts":[[2018,11,12]]}},"container-title":["2018 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8630816\/8641548\/08641578.pdf?arnumber=8641578","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T22:54:19Z","timestamp":1643237659000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8641578\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/pmbs.2018.8641578","relation":{},"subject":[],"published":{"date-parts":[[2018,11]]}}}