{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T11:35:42Z","timestamp":1730201742372,"version":"3.28.0"},"reference-count":23,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,9]]},"DOI":"10.1109\/cahpc.2018.8645923","type":"proceedings-article","created":{"date-parts":[[2019,2,21]],"date-time":"2019-02-21T23:19:26Z","timestamp":1550791166000},"page":"356-363","source":"Crossref","is-referenced-by-count":1,"title":["A Case Study on Optimizing Accurate Half Precision Average"],"prefix":"10.1109","author":[{"given":"Kenny","family":"Peou","sequence":"first","affiliation":[]},{"given":"Alan","family":"Kelly","sequence":"additional","affiliation":[]},{"given":"Joel","family":"Falcou","sequence":"additional","affiliation":[]},{"given":"Cecile","family":"Germain","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"199","article-title":"Instruction tables","volume":"4","author":"fog","year":"2017","journal-title":"Software Optimization Resources"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1975.1055330"},{"key":"ref12","first-page":"2861","article-title":"Accelerating deep convolutional networks using low-precision and sparsity","author":"marr","year":"2017","journal-title":"Acoustics Speech and Signal Processing (ICASSP) 2017 IEEE International Conference on"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/103162.103163"},{"key":"ref14","first-page":"1737","article-title":"Deep Learning with Limited Numerical Precision","author":"gupta","year":"2015","journal-title":"Proceedings of the 32nd International Conference on Machine Learning (ICML-15)"},{"journal-title":"New Features in CUDA 7 5","year":"0","author":"harris","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/363707.363723"},{"key":"ref17","first-page":"281","article-title":"Some Methods for classification and Analysis of Multivariate Observations","volume":"1","author":"macqueen -kmeans","year":"1967","journal-title":"Proceedings of 5th Berkeley Symposium on Mathematical Statistics and Probability 1967"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.68"},{"journal-title":"Handbook of Floating-Point Arithmetic","year":"2009","author":"muller","key":"ref19"},{"journal-title":"An efficient K-means clustering algorithm","year":"1997","author":"alsabti","key":"ref4"},{"journal-title":"Auto-vectorization in Gcc","year":"0","key":"ref3"},{"journal-title":"2017 Revision ID081717","article-title":"ARM. ARM Architecture Reference Manual Supplement: The Scalable Vector Extension(SVE), for ARMv8-A","year":"2017","key":"ref6"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2015.44"},{"journal-title":"Compiler auto-vectorization techniques and challenges","year":"2016","author":"emerson","key":"ref8"},{"key":"ref7","article-title":"Quantifying the interference caused by subnormal floating-point values","author":"dooley","year":"2006","journal-title":"Proceedings of the Workshop on Operating System Interference in High Performance Applications"},{"journal-title":"Caffe2 adds 16 bit floating point training support on the nvidia volta platform","year":"0","key":"ref2"},{"journal-title":"bsimd Introduction","year":"0","key":"ref1"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2004.1327964"},{"journal-title":"half Half-precision floating point library","year":"0","author":"rau","key":"ref20"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1145\/512274.512287","article-title":"Reducing Truncation Errors by Programming","volume":"7","author":"wolfe","year":"1963","journal-title":"Communications of the ACM"},{"journal-title":"Data alignment Straighten up and fly right","year":"2005","author":"rentzsh","key":"ref21"},{"journal-title":"Fast Half Float Conversions","year":"2010","author":"van der zijp","key":"ref23"}],"event":{"name":"2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","start":{"date-parts":[[2018,9,24]]},"location":"Lyon, France","end":{"date-parts":[[2018,9,27]]}},"container-title":["2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8638685\/8645847\/08645923.pdf?arnumber=8645923","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T06:04:59Z","timestamp":1643263499000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8645923\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/cahpc.2018.8645923","relation":{},"subject":[],"published":{"date-parts":[[2018,9]]}}}