{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:24:50Z","timestamp":1771698290237,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,1,23]],"date-time":"2018-01-23T00:00:00Z","timestamp":1516665600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NORTE 2020","award":["NORTE-01-0145-FEDER-000020"],"award-info":[{"award-number":["NORTE-01-0145-FEDER-000020"]}]},{"name":"EU H2020","award":["687698"],"award-info":[{"award-number":["687698"]}]},{"name":"FCT","award":["PD\/BD\/105804\/2014, SFRH\/BPD\/118211\/2016"],"award-info":[{"award-number":["PD\/BD\/105804\/2014, SFRH\/BPD\/118211\/2016"]}]},{"name":"H2020-FET","award":["671623"],"award-info":[{"award-number":["671623"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,1,23]]},"DOI":"10.1145\/3183767.3183776","type":"proceedings-article","created":{"date-parts":[[2018,3,19]],"date-time":"2018-03-19T12:53:23Z","timestamp":1521464003000},"page":"26-31","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["Aspect-Driven Mixed-Precision Tuning Targeting GPUs"],"prefix":"10.1145","author":[{"given":"Ricardo","family":"Nobre","sequence":"first","affiliation":[{"name":"University of Porto, Portugal and INESC-TEC, Portugal"}]},{"given":"Lu\u00eds","family":"Reis","sequence":"additional","affiliation":[{"name":"University of Porto, Portugal and INESC-TEC, Portugal"}]},{"given":"Jo\u00e3o","family":"Bispo","sequence":"additional","affiliation":[{"name":"University of Porto, Portugal and INESC-TEC, Portugal"}]},{"given":"Tiago","family":"Carvalho","sequence":"additional","affiliation":[{"name":"University of Porto, Portugal and INESC-TEC, Portugal"}]},{"given":"Jo\u00e3o M.P.","family":"Cardoso","sequence":"additional","affiliation":[{"name":"University of Porto, Portugal and INESC-TEC, Portugal"}]},{"given":"Stefano","family":"Cherubin","sequence":"additional","affiliation":[{"name":"Politecnico di Milano, Italy"}]},{"given":"Giovanni","family":"Agosta","sequence":"additional","affiliation":[{"name":"Politecnico di Milano, Italy"}]}],"member":"320","published-online":{"date-parts":[[2018,1,23]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2017. ROCm a New Era in Open GPU Computing. (2017). https:\/\/rocm.github.io\/  2017. ROCm a New Era in Open GPU Computing. (2017). https:\/\/rocm.github.io\/"},{"key":"e_1_3_2_1_2_1","unstructured":"AMD. 2017. Radeon's next-generation Vega architecture. (2017). https:\/\/radeon.com\/_downloads\/vega-whitepaper-11.6.17.pdf  AMD. 2017. Radeon's next-generation Vega architecture. (2017). https:\/\/radeon.com\/_downloads\/vega-whitepaper-11.6.17.pdf"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/RSP.2005.15"},{"key":"e_1_3_2_1_4_1","unstructured":"Krishnaraj Bhat. 2017. clpeak: A tool which profiles OpenCL devices to find their peak capacities. (2017). https:\/\/github.com\/krrishnarraj\/clpeak  Krishnaraj Bhat. 2017. clpeak: A tool which profiles OpenCL devices to find their peak capacities. (2017). https:\/\/github.com\/krrishnarraj\/clpeak"},{"key":"e_1_3_2_1_5_1","volume-title":"ACACES 2017 Poster Abstracts (ACACES","author":"Christos","year":"2017","unstructured":"Christos Sakalis et al. 2017. A Software Framework for Investigating Software and Hardware Approximate Computing . In ACACES 2017 Poster Abstracts (ACACES 2017 ). HiPEAC, 225--228. Christos Sakalis et al. 2017. A Software Framework for Investigating Software and Hardware Approximate Computing. In ACACES 2017 Poster Abstracts (ACACES 2017). HiPEAC, 225--228."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503296"},{"key":"e_1_3_2_1_7_1","volume-title":"Low precision arithmetic for deep learning. CoRR abs\/1412.7024","author":"Courbariaux Matthieu","year":"2014","unstructured":"Matthieu Courbariaux , Yoshua Bengio , and Jean-Pierre David . 2014. Low precision arithmetic for deep learning. CoRR abs\/1412.7024 ( 2014 ). http:\/\/arxiv.org\/abs\/1412.7024 Matthieu Courbariaux, Yoshua Bengio, and Jean-Pierre David. 2014. Low precision arithmetic for deep learning. CoRR abs\/1412.7024 (2014). http:\/\/arxiv.org\/abs\/1412.7024"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2535838.2535874"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2903150.2903470"},{"key":"e_1_3_2_1_10_1","volume-title":"Automation Test in Europe Conference Exhibition (DATE). 708--713","author":"Cristina","unstructured":"Cristina Silvano et al. 2016. Autotuning and adaptivity approach for energy efficient Exascale HPC systems: The ANTAREX approach. In 2016 Design , Automation Test in Europe Conference Exhibition (DATE). 708--713 . Cristina Silvano et al. 2016. Autotuning and adaptivity approach for energy efficient Exascale HPC systems: The ANTAREX approach. In 2016 Design, Automation Test in Europe Conference Exhibition (DATE). 708--713."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2884781.2884850"},{"key":"e_1_3_2_1_12_1","volume-title":"HPC: Performance, Energy and Error. In to appear in Proceedings of the 2017 International Conference on Parallel Computing","author":"Stefano Cherubin","year":"2017","unstructured":"Stefano Cherubin et al. 2017 . Implications of Reduced-Precision Computations in HPC: Performance, Energy and Error. In to appear in Proceedings of the 2017 International Conference on Parallel Computing ( ParCo 2017). Stefano Cherubin et al. 2017. Implications of Reduced-Precision Computations in HPC: Performance, Energy and Error. In to appear in Proceedings of the 2017 International Conference on Parallel Computing (ParCo 2017)."},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Computer-Aided Design (ICCAD)","author":"Ga\u00ebl","unstructured":"Ga\u00ebl Deest et al. 2014. Toward Scalable Source Level Accuracy Analysis for Floating-point to Fixed-point Conversion . In International Conference on Computer-Aided Design (ICCAD) . San Jose, United States, 726--733. Ga\u00ebl Deest et al. 2014. Toward Scalable Source Level Accuracy Analysis for Floating-point to Fixed-point Conversion. In International Conference on Computer-Aided Design (ICCAD). San Jose, United States, 726--733."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2012.09.022"},{"key":"e_1_3_2_1_15_1","volume-title":"2013 18th IEEE European Test Symposium (ETS). 1--6.","author":"Han J.","unstructured":"J. Han and M. Orshansky . 2013. Approximate computing: An emerging paradigm for energy-efficient design . In 2013 18th IEEE European Test Symposium (ETS). 1--6. J. Han and M. Orshansky. 2013. Approximate computing: An emerging paradigm for energy-efficient design. In 2013 18th IEEE European Test Symposium (ETS). 1--6."},{"key":"e_1_3_2_1_16_1","volume-title":"Accessed: November 11th","author":"Harris Mark","year":"2016","unstructured":"Mark Harris . 2016 . Mixed-Precision Programming with CUDA 8 | Parallel Forall. https:\/\/devblogs.nvidia.com\/parallelforall\/mixed-precision-programming-cuda-8\/. (Dec. 2016) . Accessed: November 11th , 2017. Mark Harris. 2016. Mixed-Precision Programming with CUDA 8 | Parallel Forall. https:\/\/devblogs.nvidia.com\/parallelforall\/mixed-precision-programming-cuda-8\/. (Dec. 2016). Accessed: November 11th, 2017."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2162049.2162071"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-36812-7_29"},{"key":"e_1_3_2_1_19_1","first-page":"9","article-title":"AUTOSCALER for C: an optimizing floating-point to integer C program converter for fixed-point digital signal processors","volume":"47","author":"Kum Ki-Il","year":"2000","unstructured":"Ki-Il Kum , Jiyang Kang , and Wonyong Sung . 2000 . AUTOSCALER for C: an optimizing floating-point to integer C program converter for fixed-point digital signal processors . IEEE Transactions on Circuits and Systems II: Analog and Digital Signal Processing 47 , 9 (Sep 2000), 840--848. Ki-Il Kum, Jiyang Kang, and Wonyong Sung. 2000. AUTOSCALER for C: an optimizing floating-point to integer C program converter for fixed-point digital signal processors. IEEE Transactions on Circuits and Systems II: Analog and Digital Signal Processing 47, 9 (Sep 2000), 840--848.","journal-title":"IEEE Transactions on Circuits and Systems II: Analog and Digital Signal Processing"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2013.6645508"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342016652462"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2010.05.002"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Martin Golasowski et al. 2017. Expressing and Applying C++ Code Transformations for the HDF5 API Through a DSL. Springer International Publishing Cham 303-- 314.  Martin Golasowski et al. 2017. Expressing and Applying C++ Code Transformations for the HDF5 API Through a DSL. Springer International Publishing Cham 303-- 314.","DOI":"10.1007\/978-3-319-59105-6_26"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1155\/ASP\/2006\/96421"},{"key":"e_1_3_2_1_25_1","unstructured":"Christian Rau. 2017. half: IEEE 754-based half-precision floating point library. (2017). http:\/\/half.sourceforge.net\/  Christian Rau. 2017. half: IEEE 754-based half-precision floating point library. (2017). http:\/\/half.sourceforge.net\/"},{"key":"e_1_3_2_1_26_1","unstructured":"Louis-Noel Pouchet Scott Grauer-Gray. 2012. PolyBench\/GPU: Implementation of PolyBench codes for GPU processing. (2012). http:\/\/web.cs.ucla.edu\/~pouchet\/software\/polybench\/GPU\/index.html  Louis-Noel Pouchet Scott Grauer-Gray. 2012. PolyBench\/GPU: Implementation of PolyBench codes for GPU processing. (2012). http:\/\/web.cs.ucla.edu\/~pouchet\/software\/polybench\/GPU\/index.html"},{"key":"e_1_3_2_1_27_1","volume-title":"University Booth of the Conference on Design, Automation and Test in Europe (DATE)","volume":"38","author":"Simon N.","unstructured":"N. Simon , D. Menard , and O. Sentieys . 2011. ID.Fix-infrastructure for the design of fixed-point systems . In University Booth of the Conference on Design, Automation and Test in Europe (DATE) , Vol. 38 . http:\/\/idfix.gforge.inria.fr N. Simon, D. Menard, and O. Sentieys. 2011. ID.Fix-infrastructure for the design of fixed-point systems. In University Booth of the Conference on Design, Automation and Test in Europe (DATE), Vol. 38. http:\/\/idfix.gforge.inria.fr"},{"key":"e_1_3_2_1_28_1","unstructured":"SiSoftware. 2017. FP16 GPGPU Image Processing Performance & Quality. (2017). http:\/\/www.sisoftware.eu\/2017\/04\/14\/fp16-gpgpu-image-processing-performance-quality\/  SiSoftware. 2017. FP16 GPGPU Image Processing Performance & Quality. (2017). http:\/\/www.sisoftware.eu\/2017\/04\/14\/fp16-gpgpu-image-processing-performance-quality\/"},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 32Nd International Conference on International Conference on Machine Learning -","volume":"37","author":"Suyog","unstructured":"Suyog Gupta et al. 2015. Deep Learning with Limited Numerical Precision . In Proceedings of the 32Nd International Conference on International Conference on Machine Learning - Volume 37 (ICML'15). JMLR.org, 1737--1746. http:\/\/dl.acm.org\/citation.cfm?id=3045118.3045303 Suyog Gupta et al. 2015. Deep Learning with Limited Numerical Precision. In Proceedings of the 32Nd International Conference on International Conference on Machine Learning - Volume 37 (ICML'15). JMLR.org, 1737--1746. http:\/\/dl.acm.org\/citation.cfm?id=3045118.3045303"},{"key":"e_1_3_2_1_30_1","volume-title":"Accelerating Deep Convolutional Networks using low-precision and sparsity. CoRR abs\/1610.00324","author":"Venkatesh Ganesh","year":"2016","unstructured":"Ganesh Venkatesh , Eriko Nurvitadhi , and Debbie Marr . 2016. Accelerating Deep Convolutional Networks using low-precision and sparsity. CoRR abs\/1610.00324 ( 2016 ). http:\/\/arxiv.org\/abs\/1610.00324 Ganesh Venkatesh, Eriko Nurvitadhi, and Debbie Marr. 2016. Accelerating Deep Convolutional Networks using low-precision and sparsity. CoRR abs\/1610.00324 (2016). http:\/\/arxiv.org\/abs\/1610.00324"}],"event":{"name":"PARMA-DITAM '18: 9th Workshop on Parallel Programming and RunTime Management Techniques for Manycore Architectures and 7th Workshop on Design Tools and Architectures for Multicore Embedded Computing Platforms","location":"Manchester United Kingdom","acronym":"PARMA-DITAM '18","sponsor":["HiPEAC HiPEAC Network of Excellence"]},"container-title":["Proceedings of the 9th Workshop and 7th Workshop on Parallel Programming and RunTime Management Techniques for Manycore Architectures and Design Tools and Architectures for Multicore Embedded Computing Platforms"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3183767.3183776","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3183767.3183776","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T01:08:29Z","timestamp":1750208909000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3183767.3183776"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,1,23]]},"references-count":30,"alternative-id":["10.1145\/3183767.3183776","10.1145\/3183767"],"URL":"https:\/\/doi.org\/10.1145\/3183767.3183776","relation":{},"subject":[],"published":{"date-parts":[[2018,1,23]]},"assertion":[{"value":"2018-01-23","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}