{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T12:15:20Z","timestamp":1763468120314},"reference-count":20,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,2]]},"DOI":"10.1109\/cgo.2013.6494986","type":"proceedings-article","created":{"date-parts":[[2013,4,11]],"date-time":"2013-04-11T17:38:10Z","timestamp":1365701890000},"page":"1-10","source":"Crossref","is-referenced-by-count":36,"title":["Performance upper bound analysis and optimization of SGEMM on Fermi and Kepler GPUs"],"prefix":"10.1109","author":[{"family":"Junjie Lai","sequence":"first","affiliation":[]},{"given":"A.","family":"Seznec","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1145\/2145816.2145819"},{"key":"18","first-page":"351","article-title":"Fast implementation of dgemm on fermi gpu","author":"tan","year":"2011","journal-title":"Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis on - SC '11"},{"journal-title":"NVIDIA Tesla K20\/K20X GPU Accelerators Application Performance Technical Brief","year":"2012","key":"15"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1145\/1356058.1356084"},{"journal-title":"Fermi Whitepaper","year":"2009","key":"13"},{"journal-title":"GTX680 Whitepaper","year":"2012","key":"14"},{"journal-title":"An Improved MAGMA GEMM for Fermi GPUs","year":"2010","author":"nath","key":"11"},{"journal-title":"Nvidia Cuda C Programming Guide 4 2","year":"0","key":"12"},{"journal-title":"Visual Profiler","year":"0","key":"3"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749745"},{"year":"0","key":"2"},{"journal-title":"Asfermi","year":"0","key":"1"},{"key":"10","doi-asserted-by":"crossref","DOI":"10.1145\/2063384.2063402","article-title":"Grophecy: Gpu performance projection from cpu code skeletons","author":"meng","year":"2011","journal-title":"Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis on - SC '11"},{"key":"7","first-page":"1","article-title":"Autotuning gemm kernels for the fermi gpu","author":"kurzak","year":"2012","journal-title":"Parallel and Distributed Systems IEEE Transactions on"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555775"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1145\/77726.255132"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1145\/362875.362879"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1145\/106973.106981"}],"event":{"name":"2013 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)","start":{"date-parts":[[2013,2,23]]},"location":"Shenzhen","end":{"date-parts":[[2013,2,27]]}},"container-title":["Proceedings of the 2013 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6489844\/6494968\/06494986.pdf?arnumber=6494986","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,21]],"date-time":"2017-06-21T07:14:51Z","timestamp":1498029291000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6494986\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,2]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/cgo.2013.6494986","relation":{},"subject":[],"published":{"date-parts":[[2013,2]]}}}