{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:29:15Z","timestamp":1750220955220,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,6,26]],"date-time":"2019-06-26T00:00:00Z","timestamp":1561507200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1657336, 1717532, 1750667"],"award-info":[{"award-number":["1657336, 1717532, 1750667"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,6,26]]},"DOI":"10.1145\/3330345.3330362","type":"proceedings-article","created":{"date-parts":[[2019,6,18]],"date-time":"2019-06-18T12:14:30Z","timestamp":1560860070000},"page":"184-194","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Address-stride assisted approximate load value prediction in GPUs"],"prefix":"10.1145","author":[{"given":"Haonan","family":"Wang","sequence":"first","affiliation":[{"name":"College of William &amp; Mary"}]},{"given":"Mohamed","family":"Ibrahim","sequence":"additional","affiliation":[{"name":"College of William &amp; Mary"}]},{"given":"Sparsh","family":"Mittal","sequence":"additional","affiliation":[{"name":"IIT Hyderabad"}]},{"given":"Adwait","family":"Jog","sequence":"additional","affiliation":[{"name":"College of William &amp; Mary"}]}],"member":"320","published-online":{"date-parts":[[2019,6,26]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Analyzing CUDA Workloads Using a Detailed GPU Simulator,\" in ISPASS","author":"Bakhoda A.","year":"2009","unstructured":"A. Bakhoda , G. Yuan , W. Fung , H. Wong , and T. Aamodt , \" Analyzing CUDA Workloads Using a Detailed GPU Simulator,\" in ISPASS , 2009 . A. Bakhoda, G. Yuan, W. Fung, H. Wong, and T. Aamodt, \"Analyzing CUDA Workloads Using a Detailed GPU Simulator,\" in ISPASS, 2009."},{"issue":"10","key":"e_1_3_2_1_2_1","first-page":"33","volume":"48","author":"Carbin M.","year":"2013","unstructured":"M. Carbin , S. Misailovic , and M. C. Rinard , \"Verifying Quantitative Reliability for Programs That Execute on Unreliable Hardware,\" ACM SIGPLAN Notices , vol. 48 , no. 10 , pp. 33 -- 52 , 2013 . M. Carbin, S. Misailovic, and M. C. Rinard, \"Verifying Quantitative Reliability for Programs That Execute on Unreliable Hardware,\" ACM SIGPLAN Notices, vol. 48, no. 10, pp. 33--52, 2013.","journal-title":"\"Verifying Quantitative Reliability for Programs That Execute on Unreliable Hardware,\" ACM SIGPLAN Notices"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.374.0547"},{"key":"e_1_3_2_1_4_1","first-page":"1080","author":"Gabbay F.","year":"1996","unstructured":"F. Gabbay , \" Speculative Execution Based on Value Prediction ,\" Technion - Israel Institute of Technology , Tech. Rep. 1080 , 1996 . F. Gabbay, \"Speculative Execution Based on Value Prediction,\" Technion - Israel Institute of Technology, Tech. Rep. 1080, 1996.","journal-title":"Tech. Rep."},{"key":"e_1_3_2_1_5_1","volume-title":"GTX 480 Configuration. {Online}. Available: https:\/\/dev.ece.ubc.ca\/projects\/gpgpu-sim\/browser\/v3.x\/configs\/GTX480","author":"Sim","year":"2014","unstructured":"GPGPU- Sim v3.2.1. ( 2014 ) GTX 480 Configuration. {Online}. Available: https:\/\/dev.ece.ubc.ca\/projects\/gpgpu-sim\/browser\/v3.x\/configs\/GTX480 GPGPU-Sim v3.2.1. (2014) GTX 480 Configuration. {Online}. Available: https:\/\/dev.ece.ubc.ca\/projects\/gpgpu-sim\/browser\/v3.x\/configs\/GTX480"},{"volume-title":"Hynix GDDR5 SGRAM Part H5GQ1H24AFR Revision 1.0. {Online}. Available: http:\/\/0x04.net\/~mwk\/ram\/H5GQ1H24AFR%28Rev1.0%29.pdf","year":"2009","key":"e_1_3_2_1_6_1","unstructured":"Hynix. ( 2009 ) Hynix GDDR5 SGRAM Part H5GQ1H24AFR Revision 1.0. {Online}. Available: http:\/\/0x04.net\/~mwk\/ram\/H5GQ1H24AFR%28Rev1.0%29.pdf Hynix. (2009) Hynix GDDR5 SGRAM Part H5GQ1H24AFR Revision 1.0. {Online}. Available: http:\/\/0x04.net\/~mwk\/ram\/H5GQ1H24AFR%28Rev1.0%29.pdf"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2588768.2576780"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485951"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499368.2451158"},{"key":"e_1_3_2_1_10_1","volume-title":"RCoal: Mitigating GPU Timing Attack via Subwarp-based Randomized Coalescing Techniques,\" in HPCA","author":"Kadam G.","year":"2018","unstructured":"G. Kadam , D. Zhang , and A. Jog , \" RCoal: Mitigating GPU Timing Attack via Subwarp-based Randomized Coalescing Techniques,\" in HPCA , 2018 . G. Kadam, D. Zhang, and A. Jog, \"RCoal: Mitigating GPU Timing Attack via Subwarp-based Randomized Coalescing Techniques,\" in HPCA, 2018."},{"key":"e_1_3_2_1_11_1","volume-title":"Neither More Nor Less: Optimizing Thread-level Parallelism for GPGPUs,\" in PACT","author":"Kayiran O.","year":"2013","unstructured":"O. Kayiran , A. Jog , M. T. Kandemir , and C. R. Das , \" Neither More Nor Less: Optimizing Thread-level Parallelism for GPGPUs,\" in PACT , 2013 . O. Kayiran, A. Jog, M. T. Kandemir, and C. R. Das, \"Neither More Nor Less: Optimizing Thread-level Parallelism for GPGPUs,\" in PACT, 2013."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"e_1_3_2_1_13_1","volume-title":"Rumba: An Online Quality Management System for Approximate Computing,\" in ISCA","author":"Khudia D. S.","year":"2015","unstructured":"D. S. Khudia , B. Zamirai , M. Samadi , and S. Mahlke , \" Rumba: An Online Quality Management System for Approximate Computing,\" in ISCA , 2015 . D. S. Khudia, B. Zamirai, M. Samadi, and S. Mahlke, \"Rumba: An Online Quality Management System for Approximate Computing,\" in ISCA, 2015."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.14"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2508148.2485964"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2925426.2926255"},{"key":"e_1_3_2_1_17_1","volume-title":"Exceeding the Dataflow Limit via Value Prediction,\" in MICRO","author":"Lipasti M. H.","year":"1996","unstructured":"M. H. Lipasti and J. P. Shen , \" Exceeding the Dataflow Limit via Value Prediction,\" in MICRO , 1996 . M. H. Lipasti and J. P. Shen, \"Exceeding the Dataflow Limit via Value Prediction,\" in MICRO, 1996."},{"key":"e_1_3_2_1_18_1","volume-title":"Axilog: Abstractions for Approximate Hardware Design and Reuse,\" in MICRO","author":"Mahajan D.","year":"2015","unstructured":"D. Mahajan , K. Ramkrishnan , R. Jariwala , A. Yazdanbakhsh , J. Park , B. Thwaites , A. Nagendrakumar , A. Rahimi , H. Esmaeilzadeh , and K. Bazargan , \" Axilog: Abstractions for Approximate Hardware Design and Reuse,\" in MICRO , 2015 . D. Mahajan, K. Ramkrishnan, R. Jariwala, A. Yazdanbakhsh, J. Park, B. Thwaites, A. Nagendrakumar, A. Rahimi, H. Esmaeilzadeh, and K. Bazargan, \"Axilog: Abstractions for Approximate Hardware Design and Reuse,\" in MICRO, 2015."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541963"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.22"},{"key":"e_1_3_2_1_21_1","volume-title":"Global Context-Based Value Prediction,\" in HPCA","author":"Nakra T.","year":"1999","unstructured":"T. Nakra , R. Gupta , and M. L. Soffa , \" Global Context-Based Value Prediction,\" in HPCA , 1999 . T. Nakra, R. Gupta, and M. L. Soffa, \"Global Context-Based Value Prediction,\" in HPCA, 1999."},{"key":"e_1_3_2_1_22_1","volume-title":"Kepler GK110","author":"A's Next NVIDIA","year":"2012","unstructured":"NVIDIA , \"NVIDI A's Next Generation CUDA Compute Architecture : Kepler GK110 ,\" 2012 . NVIDIA, \"NVIDIA's Next Generation CUDA Compute Architecture: Kepler GK110,\" 2012."},{"key":"e_1_3_2_1_23_1","volume-title":"Improving GPGPU Concurrency with Elastic Kernels,\" in ASPLOS","author":"Pai S.","year":"2013","unstructured":"S. Pai , M. J. Thazhuthaveetil , and R. Govindarajan , \" Improving GPGPU Concurrency with Elastic Kernels,\" in ASPLOS , 2013 . S. Pai, M. J. Thazhuthaveetil, and R. Govindarajan, \"Improving GPGPU Concurrency with Elastic Kernels,\" in ASPLOS, 2013."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694346"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2786805.2786807"},{"key":"e_1_3_2_1_26_1","volume-title":"Practical Data Value Speculation for Future High-End Processors,\" in HPCA","author":"Perais A.","year":"2014","unstructured":"A. Perais and A. Seznec , \" Practical Data Value Speculation for Future High-End Processors,\" in HPCA , 2014 . A. Perais and A. Seznec, \"Practical Data Value Speculation for Future High-End Processors,\" in HPCA, 2014."},{"key":"e_1_3_2_1_27_1","volume-title":"Paving the Way for an Effective Implementation of Value Prediction,\" in ISCA","author":"Seznec Arthur","year":"2014","unstructured":"Perais, Arthur and Seznec , Andr\u00e9, \"EOLE : Paving the Way for an Effective Implementation of Value Prediction,\" in ISCA , 2014 . Perais, Arthur and Seznec, Andr\u00e9, \"EOLE: Paving the Way for an Effective Implementation of Value Prediction,\" in ISCA, 2014."},{"key":"e_1_3_2_1_28_1","volume-title":"A Cost Effective Predictor Infrastructure for Superscalar Value Prediction,\" in HPCA","author":"Seznec Arthur","year":"2015","unstructured":"Perais, Arthur and Seznec , Andr\u00e9, \"BeBoP : A Cost Effective Predictor Infrastructure for Superscalar Value Prediction,\" in HPCA , 2015 . Perais, Arthur and Seznec, Andr\u00e9, \"BeBoP: A Cost Effective Predictor Infrastructure for Superscalar Value Prediction,\" in HPCA, 2015."},{"key":"e_1_3_2_1_29_1","volume-title":"the Polyhedral Benchmark Suite,\" in URL: http:\/\/www.cs.ucla.edu\/~pouchet\/software\/polybench\/","author":"Pouchet L.-N.","year":"2012","unstructured":"L.-N. Pouchet , \"Polybench : the Polyhedral Benchmark Suite,\" in URL: http:\/\/www.cs.ucla.edu\/~pouchet\/software\/polybench\/ , 2012 . L.-N. Pouchet, \"Polybench: the Polyhedral Benchmark Suite,\" in URL: http:\/\/www.cs.ucla.edu\/~pouchet\/software\/polybench\/, 2012."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2006.5"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2006.49"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540711"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/1993316.1993518"},{"key":"e_1_3_2_1_35_1","volume-title":"The Bunker Cache for Spatio-Value Approximation,\" in MICRO","author":"Miguel J. San","year":"2016","unstructured":"J. San Miguel , J. Albericio , N. Enright Jerger , and A. Jaleel , \" The Bunker Cache for Spatio-Value Approximation,\" in MICRO , 2016 . J. San Miguel, J. Albericio, N. Enright Jerger, and A. Jaleel, \"The Bunker Cache for Spatio-Value Approximation,\" in MICRO, 2016."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830790"},{"key":"e_1_3_2_1_38_1","unstructured":"Sazeides Yiannakis and Smith James E \"The Predictability of Data Values \" in MICRO 1997.   Sazeides Yiannakis and Smith James E \"The Predictability of Data Values \" in MICRO 1997."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/279358.279371"},{"key":"e_1_3_2_1_40_1","volume-title":"Using Dataflow Based Context for Accurate Value Prediction,\" in PACT","author":"Thomas R.","year":"2001","unstructured":"R. Thomas and M. Franklin , \" Using Dataflow Based Context for Accurate Value Prediction,\" in PACT , 2001 . R. Thomas and M. Franklin, \"Using Dataflow Based Context for Accurate Value Prediction,\" in PACT, 2001."},{"key":"e_1_3_2_1_41_1","volume-title":"Towards a Systematic Framework for Instruction-Level Approximate Computing and Its Application to Hardware Resiliency,\" in MICRO","author":"Venkatagiri R.","year":"2016","unstructured":"R. Venkatagiri , A. Mahmoud , S. K. S. Hari , and S.V. Adve , \"Approxilyzer : Towards a Systematic Framework for Instruction-Level Approximate Computing and Its Application to Hardware Resiliency,\" in MICRO , 2016 . R. Venkatagiri, A. Mahmoud, S. K. S. Hari, and S.V.Adve, \"Approxilyzer: Towards a Systematic Framework for Instruction-Level Approximate Computing and Its Application to Hardware Resiliency,\" in MICRO, 2016."},{"key":"e_1_3_2_1_42_1","volume-title":"Enabling Efficient Data Compression in GPUs,\" in ISCA","author":"Vijaykumar N.","year":"2015","unstructured":"N. Vijaykumar , G. Pekhimenko , A. Jog , A. Bhowmick , O. Mutlu , C. Das , M. T. Kandemir , T. Mowry , and R. Ausavarungnirun , \" Enabling Efficient Data Compression in GPUs,\" in ISCA , 2015 . N. Vijaykumar, G. Pekhimenko, A. Jog, A. Bhowmick, O. Mutlu, C. Das, M. T. Kandemir, T. Mowry, and R. Ausavarungnirun, \"Enabling Efficient Data Compression in GPUs,\" in ISCA, 2015."},{"key":"e_1_3_2_1_43_1","volume-title":"Efficient and Fair Multiprogramming in GPUs via Effective Bandwidth Management,\" in HPCA","author":"Wang H.","year":"2018","unstructured":"H. Wang , F. Luo , M. Ibrahim , O. Kayiran , and A. Jog , \" Efficient and Fair Multiprogramming in GPUs via Effective Bandwidth Management,\" in HPCA , 2018 . H. Wang, F. Luo, M. Ibrahim, O. Kayiran, and A. Jog, \"Efficient and Fair Multiprogramming in GPUs via Effective Bandwidth Management,\" in HPCA, 2018."},{"key":"e_1_3_2_1_44_1","volume-title":"Approximating Warps with Intra-Warp Operand Value Similarity,\" in HPCA","author":"Wong D.","year":"2016","unstructured":"D. Wong , N. S. Kim , and M. Annavaram , \" Approximating Warps with Intra-Warp Operand Value Similarity,\" in HPCA , 2016 . D. Wong, N. S. Kim, and M. Annavaram, \"Approximating Warps with Intra-Warp Operand Value Similarity,\" in HPCA, 2016."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2836168"}],"event":{"name":"ICS '19: 2019 International Conference on Supercomputing","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Phoenix Arizona","acronym":"ICS '19"},"container-title":["Proceedings of the ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3330345.3330362","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3330345.3330362","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3330345.3330362","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:54:05Z","timestamp":1750204445000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3330345.3330362"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,6,26]]},"references-count":44,"alternative-id":["10.1145\/3330345.3330362","10.1145\/3330345"],"URL":"https:\/\/doi.org\/10.1145\/3330345.3330362","relation":{},"subject":[],"published":{"date-parts":[[2019,6,26]]},"assertion":[{"value":"2019-06-26","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}