{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T05:20:58Z","timestamp":1755926458970,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,6,16]],"date-time":"2020-06-16T00:00:00Z","timestamp":1592265600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,6,16]]},"DOI":"10.1145\/3372799.3394359","type":"proceedings-article","created":{"date-parts":[[2020,5,29]],"date-time":"2020-05-29T15:04:12Z","timestamp":1590764652000},"page":"3-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["PAQSIM"],"prefix":"10.1145","author":[{"given":"Xiang","family":"Gong","sequence":"first","affiliation":[{"name":"Qualcomm, Santa Clara, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunling","family":"Hu","sequence":"additional","affiliation":[{"name":"Qualcomm, Santa Clara, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chu-Cheow","family":"Lim","sequence":"additional","affiliation":[{"name":"Qualcomm, Santa Clara, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,6,16]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2012.6189201"},{"key":"e_1_3_2_2_2_1","volume-title":"European Conference on Parallel Processing","author":"Jooya A.","year":"2012","unstructured":"Jooya , A. , Baniasadi , A. and Dimopoulos , N.J . 2012. Efficient design space exploration of GPGPU architectures . European Conference on Parallel Processing ( 2012 ), 518--527. Jooya, A., Baniasadi, A. and Dimopoulos, N.J. 2012. Efficient design space exploration of GPGPU architectures. European Conference on Parallel Processing (2012), 518--527."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2018.00009"},{"key":"e_1_3_2_2_4_1","volume-title":"International Workshop on Languages and Compilers for Parallel Computing","author":"Bertolli C.","year":"2012","unstructured":"Bertolli , C. , Betts , A. , Loriant , N. , Mudalige , G.R. , Radford , D. , Ham , D.A. , Giles , M.B. and Kelly , P.H.J. 2012. Compiler optimizations for industrial unstructured mesh cfd applications on gpus . International Workshop on Languages and Compilers for Parallel Computing ( 2012 ), 112--126. Bertolli, C., Betts, A., Loriant, N., Mudalige, G.R., Radford, D., Ham, D.A., Giles, M.B. and Kelly, P.H.J. 2012. Compiler optimizations for industrial unstructured mesh cfd applications on gpus. International Workshop on Languages and Compilers for Parallel Computing (2012), 112--126."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2017.7863727"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2736287"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2145816.2145819"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.59"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555775"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD.1999.810662"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322230"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2005.1430560"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2005.1430562"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"Wunderlich R.E. Wenisch T.F. Falsafi B. and Hoe J.C. 2006. Statistical sampling of microarchitecture simulation. ACM Transactions on Modeling and Computer Simulation. (2006). DOI:https:\/\/doi.org\/10.1145\/1147224.1147225.  Wunderlich R.E. Wenisch T.F. Falsafi B. and Hoe J.C. 2006. Statistical sampling of microarchitecture simulation. ACM Transactions on Modeling and Computer Simulation. (2006). DOI:https:\/\/doi.org\/10.1145\/1147224.1147225.","DOI":"10.1145\/1147224.1147225"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.36"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2008.4510733"},{"key":"e_1_3_2_2_17_1","unstructured":"Chanjuan W. Jiawei O. and Jinyuan J. 2010. GPGPU-based Smoothed Particle Hydrodynamic Fluid Simulation [J]. Journal of Computer-Aided Design & Computer Graphics. 3 (2010).  Chanjuan W. Jiawei O. and Jinyuan J. 2010. GPGPU-based Smoothed Particle Hydrodynamic Fluid Simulation [J]. Journal of Computer-Aided Design & Computer Graphics. 3 (2010)."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"Vigueras G. Roy I. Cookson A. Lee J. Smith N. and Nordsletten D. 2014. Toward GPGPU accelerated human electromechanical cardiac simulations. International journal for numerical methods in biomedical engineering. 30 1 (2014) 117--134.  Vigueras G. Roy I. Cookson A. Lee J. Smith N. and Nordsletten D. 2014. Toward GPGPU accelerated human electromechanical cardiac simulations. International journal for numerical methods in biomedical engineering. 30 1 (2014) 117--134.","DOI":"10.1002\/cnm.2593"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1021\/ct3004645"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings of the Annual International Symposium on Microarchitecture, MICRO","author":"Narasiman V.","year":"2011","unstructured":"Narasiman , V. , Shebanow , M. , Lee , C.J. , Miftakhutdinov , R. , Mutlu , O. and Patt , Y.N . 2011. Improving GPU performance via large warps and two-level warp scheduling . Proceedings of the Annual International Symposium on Microarchitecture, MICRO ( 2011 ). Narasiman, V., Shebanow, M., Lee, C.J., Miftakhutdinov, R., Mutlu, O. and Patt, Y.N. 2011. Improving GPU performance via large warps and two-level warp scheduling. Proceedings of the Annual International Symposium on Microarchitecture, MICRO (2011)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"crossref","unstructured":"Suh J.W. and Kim Y. 2013. Accelerating MATLAB with GPU Computing: A Primer with Examples.  Suh J.W. and Kim Y. 2013. Accelerating MATLAB with GPU Computing: A Primer with Examples.","DOI":"10.1016\/B978-0-12-408080-5.00001-8"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"crossref","unstructured":"Okada S. Murakami K. Amako K. Sasaki T. Incerti S. Karamitros M. Henderson N. Gerritsen M. Asai M. and Dotti A. 2016. GPU acceleration of monte carlo simulation at the cellular and DNA levels. Smart Innovation Systems and Technologies (2016).  Okada S. Murakami K. Amako K. Sasaki T. Incerti S. Karamitros M. Henderson N. Gerritsen M. Asai M. and Dotti A. 2016. GPU acceleration of monte carlo simulation at the cellular and DNA levels. Smart Innovation Systems and Technologies (2016).","DOI":"10.1007\/978-3-319-23024-5_29"},{"key":"e_1_3_2_2_24_1","volume-title":"Proceedings - Symposium on Computer Architecture and High Performance Computing","author":"Coutinho B.","year":"2009","unstructured":"Coutinho , B. , Teodoro , G. , Sachetto , R. , Guedes , D. and Ferreira , R . 2009. Profiling general purpose GPU applications . Proceedings - Symposium on Computer Architecture and High Performance Computing ( 2009 ). Coutinho, B., Teodoro, G., Sachetto, R., Guedes, D. and Ferreira, R. 2009. Profiling general purpose GPU applications. Proceedings - Symposium on Computer Architecture and High Performance Computing (2009)."},{"key":"e_1_3_2_2_25_1","volume-title":"ACM International Conference Proceeding Series","author":"Mistry P.","year":"2019","unstructured":"Mistry , P. and Purnomo , B . 2019. Profiling OpenCL kernels using wavefront occupancy with radeon GPU profiler . ACM International Conference Proceeding Series ( 2019 ). Mistry, P. and Purnomo, B. 2019. Profiling OpenCL kernels using wavefront occupancy with radeon GPU profiler. ACM International Conference Proceeding Series (2019)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/MS.2014.127"},{"key":"e_1_3_2_2_27_1","volume-title":"2012 21st International Conference on Parallel Architectures and Compilation Techniques (PACT)","author":"Ubal R.","year":"2012","unstructured":"Ubal , R. , Jang , B. , Mistry , P. , Schaa , D. and Kaeli , D . 2012. Multi2Sim: a simulation framework for CPU-GPU computing . 2012 21st International Conference on Parallel Architectures and Compilation Techniques (PACT) ( 2012 ), 335--344. Ubal, R., Jang, B., Mistry, P., Schaa, D. and Kaeli, D. 2012. Multi2Sim: a simulation framework for CPU-GPU computing. 2012 21st International Conference on Parallel Architectures and Compilation Techniques (PACT) (2012), 335--344."},{"key":"e_1_3_2_2_28_1","volume-title":"2014 21st International Conference on High Performance Computing, HiPC 2014","author":"Malhotra G.","year":"2014","unstructured":"Malhotra , G. , Goel , S. and Sarangi , S.R . 2014. GpuTejas: A parallel simulator for GPU architectures . 2014 21st International Conference on High Performance Computing, HiPC 2014 ( 2014 ). Malhotra, G., Goel, S. and Sarangi, S.R. 2014. GpuTejas: A parallel simulator for GPU architectures. 2014 21st International Conference on High Performance Computing, HiPC 2014 (2014)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"crossref","unstructured":"Hill M.D. and Marty M.R. 2008. Amdahl's law in the multicore era. Computer. (2008). DOI:https:\/\/doi.org\/10.1109\/MC.2008.209.  Hill M.D. and Marty M.R. 2008. Amdahl's law in the multicore era. Computer. (2008). DOI:https:\/\/doi.org\/10.1109\/MC.2008.209.","DOI":"10.1109\/HPCA.2008.4658638"},{"key":"e_1_3_2_2_30_1","volume-title":"Proceedings of the 15th ACM SIGPLAN symposium on Principles and practice of parallel programming","author":"Baghsorkhi S.S.","year":"2010","unstructured":"Baghsorkhi , S.S. , Delahaye , M. , Patel , S.J. , Gropp , W.D. and Hwu , W.W . 2010. An adaptive performance modeling tool for GPU architectures . Proceedings of the 15th ACM SIGPLAN symposium on Principles and practice of parallel programming ( 2010 ), 105--114. Baghsorkhi, S.S., Delahaye, M., Patel, S.J., Gropp, W.D. and Hwu, W.W. 2010. An adaptive performance modeling tool for GPU architectures. Proceedings of the 15th ACM SIGPLAN symposium on Principles and practice of parallel programming (2010), 105--114."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749745"},{"key":"e_1_3_2_2_32_1","volume-title":"Proceedings of the 2012 IEEE 26th International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2012","author":"Parakh A.K.","year":"2012","unstructured":"Parakh , A.K. , Balakrishnan , M. and Paul , K . 2012. Performance estimation of GPUs with cache . Proceedings of the 2012 IEEE 26th International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2012 ( 2012 ). Parakh, A.K., Balakrishnan, M. and Paul, K. 2012. Performance estimation of GPUs with cache. Proceedings of the 2012 IEEE 26th International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2012 (2012)."},{"key":"e_1_3_2_2_33_1","volume-title":"ACM International Conference Proceeding Series","author":"Lai J.","year":"2012","unstructured":"Lai , J. and Seznec , A . 2012. Break down GPU execution time with an analytical method . ACM International Conference Proceeding Series ( 2012 ). Lai, J. and Seznec, A. 2012. Break down GPU execution time with an analytical method. ACM International Conference Proceeding Series (2012)."},{"key":"e_1_3_2_2_34_1","volume-title":"Conference Proceedings - Annual International Symposium on Computer Architecture, ISCA","author":"Karkhanis T.S.","year":"2004","unstructured":"Karkhanis , T.S. and Smith , J.E . 2004. A first-order superscalar processor model . Conference Proceedings - Annual International Symposium on Computer Architecture, ISCA ( 2004 ). Karkhanis, T.S. and Smith, J.E. 2004. A first-order superscalar processor model. Conference Proceedings - Annual International Symposium on Computer Architecture, ISCA (2004)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"crossref","unstructured":"Eyerman S. Eeckhout L. Karkhanis T. and Smith J.E. 2009. A mechanistic performance model for superscalar out-of-order processors. ACM Transactions on Computer Systems. (2009). DOI:https:\/\/doi.org\/10.1145\/1534909.1534910.  Eyerman S. Eeckhout L. Karkhanis T. and Smith J.E. 2009. A mechanistic performance model for superscalar out-of-order processors. ACM Transactions on Computer Systems. (2009). DOI:https:\/\/doi.org\/10.1145\/1534909.1534910.","DOI":"10.1145\/1534909.1534910"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2019.2923618"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_2_38_1","volume-title":"Roofline-aware DVFS for GPUs. ACM International Conference Proceeding Series","author":"Nugteren C.","year":"2014","unstructured":"Nugteren , C. , Van Den Braak , G.J. and Corp oraal, H . 2014 . Roofline-aware DVFS for GPUs. ACM International Conference Proceeding Series ( 2014 ). Nugteren, C., Van Den Braak, G.J. and Corporaal, H. 2014. Roofline-aware DVFS for GPUs. ACM International Conference Proceeding Series (2014)."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"crossref","unstructured":"Doerfler D. Deslippe J. Williams S. Oliker L. Cook B. Kurth T. Lobet M. Malas T. Vay J.L. and Vincenti H. 2016. Applying the roofline performance model to the intel xeon phi knights landing processor. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) (2016).  Doerfler D. Deslippe J. Williams S. Oliker L. Cook B. Kurth T. Lobet M. Malas T. Vay J.L. and Vincenti H. 2016. Applying the roofline performance model to the intel xeon phi knights landing processor. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) (2016).","DOI":"10.1007\/978-3-319-46079-6_24"},{"key":"e_1_3_2_2_40_1","volume-title":"Proceedings - 25th IEEE International Symposium on High Performance Computer Architecture, HPCA 2019","author":"Hill M.","year":"2019","unstructured":"Hill , M. and Janapa Reddi, V. 2019. Gables: A roofline model for mobile SoCs . Proceedings - 25th IEEE International Symposium on High Performance Computer Architecture, HPCA 2019 ( 2019 ). Hill, M. and Janapa Reddi, V. 2019. Gables: A roofline model for mobile SoCs. Proceedings - 25th IEEE International Symposium on High Performance Computer Architecture, HPCA 2019 (2019)."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2014.2299539"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00058"},{"key":"e_1_3_2_2_44_1","volume-title":"2013 IEEE Hot Chips 25 Symposium, HCS 2013","author":"Sander B.","year":"2016","unstructured":"Sander , B. 2016 . HSAIL: Portable compiler IR for HSA . 2013 IEEE Hot Chips 25 Symposium, HCS 2013 (2016). Sander, B. 2016. HSAIL: Portable compiler IR for HSA. 2013 IEEE Hot Chips 25 Symposium, HCS 2013 (2016)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2018.00027"}],"event":{"name":"LCTES '20: 21st ACM SIGPLAN\/SIGBED Conference on Languages, Compilers, and Tools for Embedded Systems","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"London United Kingdom","acronym":"LCTES '20"},"container-title":["The 21st ACM SIGPLAN\/SIGBED Conference on Languages, Compilers, and Tools for Embedded Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3372799.3394359","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3372799.3394359","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:41:08Z","timestamp":1750200068000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3372799.3394359"}},"subtitle":["Fast Performance Model for Graphics Workload on Mobile GPUs"],"short-title":[],"issued":{"date-parts":[[2020,6,16]]},"references-count":45,"alternative-id":["10.1145\/3372799.3394359","10.1145\/3372799"],"URL":"https:\/\/doi.org\/10.1145\/3372799.3394359","relation":{},"subject":[],"published":{"date-parts":[[2020,6,16]]},"assertion":[{"value":"2020-06-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}