{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T21:03:40Z","timestamp":1777928620199,"version":"3.51.4"},"reference-count":57,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Research Grants Council of Hong Kong, SAR","award":["CUHK24209017"],"award-info":[{"award-number":["CUHK24209017"]}]},{"DOI":"10.13039\/501100010428","name":"Innovation and Technology Fund","doi-asserted-by":"publisher","award":["PRP\/065\/20FX"],"award-info":[{"award-number":["PRP\/065\/20FX"]}],"id":[{"id":"10.13039\/501100010428","id-type":"DOI","asserted-by":"publisher"}]},{"name":"SmartMore"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput.-Aided Des. Integr. Circuits Syst."],"published-print":{"date-parts":[[2023,10]]},"DOI":"10.1109\/tcad.2023.3241110","type":"journal-article","created":{"date-parts":[[2023,2,8]],"date-time":"2023-02-08T18:41:50Z","timestamp":1675881710000},"page":"3210-3223","source":"Crossref","is-referenced-by-count":10,"title":["A High-Performance Accelerator for Super-Resolution Processing on Embedded GPU"],"prefix":"10.1109","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9501-9254","authenticated-orcid":false,"given":"Wenqian","family":"Zhao","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong, SAR"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5337-1783","authenticated-orcid":false,"given":"Yang","family":"Bai","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong, SAR"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5153-6698","authenticated-orcid":false,"given":"Qi","family":"Sun","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong, SAR"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4604-778X","authenticated-orcid":false,"given":"Wenbo","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong, SAR"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8555-4544","authenticated-orcid":false,"given":"Haisheng","family":"Zheng","sequence":"additional","affiliation":[{"name":"Heterogeneous Computing Center, SmartMore Corporation Limited, Hong Kong, China"}]},{"given":"Nianjuan","family":"Jiang","sequence":"additional","affiliation":[{"name":"Heterogeneous Computing Center, SmartMore Corporation Limited, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0048-3140","authenticated-orcid":false,"given":"Jiangbo","family":"Lu","sequence":"additional","affiliation":[{"name":"Heterogeneous Computing Center, SmartMore Corporation Limited, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6406-4810","authenticated-orcid":false,"given":"Bei","family":"Yu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong, SAR"}]},{"given":"Martin D. F.","family":"Wong","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong, SAR"}]}],"member":"263","reference":[{"key":"ref13","first-page":"1072","article-title":"ACPNet: Anchor-Center based person network for human pose estimation and instance segmentation","author":"bai","year":"2019","journal-title":"Proc IEEE Int Conf Multimedia Expo (ICME)"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00024"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2001.937655"},{"key":"ref15","year":"2021","journal-title":"Nvidia tensorrt"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293898"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/3448104"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_16"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062207"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.5244\/C.26.135"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.23919\/DATE51398.2021.9474100"},{"key":"ref54","year":"2020","journal-title":"Simple-SR"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD45719.2019.8942086"},{"key":"ref16","year":"2021","journal-title":"Intel MKL-DNN"},{"key":"ref19","article-title":"Unfolding the alternating optimization for blind super resolution","volume":"33","author":"luo","year":"2020","journal-title":"Proc Conf Neural Inf Process Syst (NIPS)"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01132"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.153"},{"key":"ref50","first-page":"529","article-title":"Collapsible linear blocks for super-efficient super resolution","volume":"4","author":"bhardwaj","year":"2022","journal-title":"Proc Mach Learn Syst"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00399"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.618"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00344"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9413080"},{"key":"ref42","article-title":"Learning sparse neural networks through L_0 regularization","author":"louizos","year":"2018","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref41","article-title":"BRECQ: Pushing the limit of post-training quantization by block reconstruction","author":"li","year":"2020","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.298"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10593-2_13"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58574-7_6"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218732"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317829"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317874"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6789"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"ref6","first-page":"20343","article-title":"LAPAR: Linearly-assembled pixel-adaptive regression network for single image super-resolution and beyond","author":"li","year":"2020","journal-title":"Proc Conf Neural Inf Process Syst (NIPS)"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58607-2_20"},{"key":"ref40","article-title":"Gaussian process optimization in the bandit setting: No regret and experimental design","author":"srinivas","year":"2009","journal-title":"arXiv 0912 3995"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_25"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.155"},{"key":"ref37","first-page":"578","article-title":"TVM: An automated end-to-end optimizing compiler for deep learning","author":"chen","year":"2018","journal-title":"Proc of the 2nd USENIX Symp on Operating Systems Design and Implementation (OSDI)"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.182"},{"key":"ref31","article-title":"Ternary neural networks with fine-grained quantization","author":"mellempudi","year":"2017","journal-title":"arXiv 1705 01462"},{"key":"ref30","article-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding","author":"han","year":"2015","journal-title":"arXiv 1510 00149 [cs]"},{"key":"ref33","first-page":"4107","article-title":"Binarized neural networks","volume":"29","author":"hubara","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref32","first-page":"3123","article-title":"BinaryConnect: Training deep neural networks with binary weights during propagations","volume":"28","author":"courbariaux","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2439281"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643472"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643487"},{"key":"ref38","first-page":"7587","article-title":"GPyTorch: Blackbox matrix-matrix Gaussian process inference with GPU acceleration","author":"gardner","year":"2018","journal-title":"Proc Conf Neural Inf Process Syst (NIPS)"},{"key":"ref24","author":"cheng","year":"2014","journal-title":"Professional CUDA C Programming"},{"key":"ref23","article-title":"Lightweight image super-resolution with adaptive weighted learning network","author":"wang","year":"2019","journal-title":"arXiv 1904 02358"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00141"},{"key":"ref25","article-title":"Integer quantization for deep learning inference: Principles and empirical evaluation","author":"wu","year":"2020","journal-title":"arXiv 2004 09602"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2012.2192127"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCPHOT.2018.8368476"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCI.2016.2629284"},{"key":"ref28","first-page":"7948","article-title":"Post training 4-bit quantization of convolutional networks for rapid-deployment","volume":"32","author":"banner","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref27","first-page":"7197","article-title":"Up or down? Adaptive rounding for post-training quantization","author":"nagel","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref29","article-title":"Training deep neural networks with low precision multiplications","author":"courbariaux","year":"2014","journal-title":"Arxiv 1412 7024"}],"container-title":["IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/43\/10255327\/10041020.pdf?arnumber=10041020","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T19:01:21Z","timestamp":1696878081000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10041020\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10]]},"references-count":57,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tcad.2023.3241110","relation":{},"ISSN":["0278-0070","1937-4151"],"issn-type":[{"value":"0278-0070","type":"print"},{"value":"1937-4151","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10]]}}}