{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T15:34:44Z","timestamp":1772206484551,"version":"3.50.1"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2021,11,28]],"date-time":"2021-11-28T00:00:00Z","timestamp":1638057600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,11,28]],"date-time":"2021-11-28T00:00:00Z","timestamp":1638057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","award":["UIDB\/50021\/2020"],"award-info":[{"award-number":["UIDB\/50021\/2020"]}]},{"name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","award":["PTDC\/EEI-HAC\/30485\/2017"],"award-info":[{"award-number":["PTDC\/EEI-HAC\/30485\/2017"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sign Process Syst"],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1007\/s11265-021-01687-7","type":"journal-article","created":{"date-parts":[[2021,11,28]],"date-time":"2021-11-28T17:02:20Z","timestamp":1638118940000},"page":"1365-1385","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["A Reconfigurable Posit Tensor Unit with Variable-Precision Arithmetic and Automatic Data Streaming"],"prefix":"10.1007","volume":"93","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0628-2259","authenticated-orcid":false,"given":"Nuno","family":"Neves","sequence":"first","affiliation":[]},{"given":"Pedro","family":"Tom\u00e1s","sequence":"additional","affiliation":[]},{"given":"Nuno","family":"Roma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,11,28]]},"reference":[{"issue":"2","key":"1687_CR1","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/MM.2018.112130030","volume":"38","author":"J Dean","year":"2018","unstructured":"Dean, J., Patterson, D., & Young, C. (2018). A new golden age in computer architecture: Empowering the machine-learning revolution. IEEE Micro, 38(2), 21\u201329.","journal-title":"IEEE Micro"},{"issue":"2","key":"1687_CR2","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1145\/3282307","volume":"62","author":"JL Hennessy","year":"2019","unstructured":"Hennessy, J. L., & Patterson, D. A. (2019). A new golden age for computer architecture. Communications of the ACM, 62(2), 48\u201360.","journal-title":"Communications of the ACM"},{"issue":"2","key":"1687_CR3","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1109\/MM.2018.022071131","volume":"38","author":"E Chung","year":"2018","unstructured":"Chung, E., Fowers, J., Ovtcharov, K., Papamichael, M., Caulfield, A., Massengill, T., Liu, M., Lo, D., Alkalay, S., Haselman, M., et al. (2018). Serving dnns in real time at datacenter scale with project brainwave. IEEE Micro, 38(2), 8\u201320.","journal-title":"IEEE Micro"},{"key":"1687_CR4","doi-asserted-by":"crossref","unstructured":"Delaye, E., Sirasao, A., Dudha, C., & Das, S. (2017).\u00a0Deep learning challenges and solutions with xilinx fpgas. In 2017 IEEE\/ACM International Conference on Computer-Aided Design (ICCAD), IEEE, pp.\u00a0908\u2013913.","DOI":"10.1109\/ICCAD.2017.8203877"},{"key":"1687_CR5","doi-asserted-by":"crossref","unstructured":"Fowers, J., Ovtcharov, K., Papamichael, M., Massengill, T., Liu, M., Lo, D., Alkalay, S., Haselman, M., Adams, L., Ghandi, M., et\u00a0al. (2018). A configurable cloud-scale dnn processor for real-time ai. In 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA), IEEE, pp.\u00a01\u201314.","DOI":"10.1109\/ISCA.2018.00012"},{"issue":"9","key":"1687_CR6","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1145\/3154484","volume":"61","author":"NP Jouppi","year":"2018","unstructured":"Jouppi, N. P., Young, C., Patil, N., & Patterson, D. (2018). A domain-specific architecture for deep neural networks. Communications of the ACM, 61(9), 50\u201359.","journal-title":"Communications of the ACM"},{"key":"1687_CR7","unstructured":"NVIDIA. (2017). Nvidia tesla v100 GPU architecture. White paper."},{"key":"1687_CR8","doi-asserted-by":"crossref","unstructured":"Reagen, B., Whatmough, P., Adolf, R., Rama, S., Lee, H., Lee, S.\u00a0K., Hern\u00e1ndez-Lobato, J.\u00a0M., Wei, G.-Y., & Brooks, D. (2016).\u00a0Minerva: Enabling low-power, highly-accurate deep neural network accelerators. In 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA), IEEE, pp.\u00a0267\u2013278.","DOI":"10.1109\/ISCA.2016.32"},{"key":"1687_CR9","doi-asserted-by":"crossref","unstructured":"Jouppi, N.\u00a0P., Young, C., Patil, N., Patterson, D., Agrawal, G., Bajwa, R., Bates, S., Bhatia, S., Boden, N., Borchers, A., et\u00a0al. (2017).\u00a0In-datacenter performance analysis of a tensor processing unit. In Proceedings of the 44th Annual International Symposium on Computer Architecture, pp.\u00a01\u201312.","DOI":"10.1145\/3079856.3080246"},{"key":"1687_CR10","unstructured":"K\u00f6ster, U., Webb, T., Wang, X., Nassar, M., Bansal, A.\u00a0K., Constable, W., Elibol, O., Gray, S., Hall, S., Hornof, L., et\u00a0al. (2017). Flexpoint: An adaptive numerical format for efficient training of deep neural networks. In Advances in neural information processing systems, pp.\u00a01742\u20131752."},{"key":"1687_CR11","doi-asserted-by":"crossref","unstructured":"Markidis, S., Der\u00a0Chien, S.\u00a0W., Laure, E., Peng, I.\u00a0B., & Vetter, J.\u00a0S. (2018).\u00a0Nvidia tensor core programmability, performance & precision. In 2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), IEEE, pp.\u00a0522\u2013531.","DOI":"10.1109\/IPDPSW.2018.00091"},{"issue":"2","key":"1687_CR12","first-page":"71","volume":"4","author":"JL Gustafson","year":"2017","unstructured":"Gustafson, J. L., & Yonemoto, I. T. (2017). Beating floating point at its own game: Posit arithmetic. Supercomputing Frontiers and Innovations, 4(2), 71\u201386.","journal-title":"Supercomputing Frontiers and Innovations"},{"key":"1687_CR13","doi-asserted-by":"crossref","unstructured":"Carmichael, Z., Langroudi, H.\u00a0F., Khazanov, C., Lillie, J., Gustafson, J.\u00a0L., & Kudithipudi, D. (2019). Deep positron: A deep neural network using the posit number system. In 2019 Design, Automation & Test in Europe Conference & Exhibition (DATE), IEEE, pp.\u00a01421\u20131426.","DOI":"10.23919\/DATE.2019.8715262"},{"key":"1687_CR14","doi-asserted-by":"crossref","unstructured":"Chaurasiya, R., Gustafson, J., Shrestha, R., Neudorfer, J., Nambiar, S., Niyogi, K., Merchant, F., & Leupers, R. (2018).\u00a0Parameterized posit arithmetic hardware generator. In 2018 IEEE 36th International Conference on Computer Design (ICCD), IEEE, pp.\u00a0334\u2013341.","DOI":"10.1109\/ICCD.2018.00057"},{"key":"1687_CR15","unstructured":"P. W, & Group. (2018). Posit standard documentation. Release, 3, 2."},{"issue":"1","key":"1687_CR16","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1109\/JSSC.2016.2616357","volume":"52","author":"Y-H Chen","year":"2016","unstructured":"Chen, Y.-H., Krishna, T., Emer, J. S., & Sze, V. (2016). Eyeriss: An energy-efficient reconfigurable accelerator for deep convolutional neural networks. IEEE journal of solid-state circuits, 52(1), 127\u2013138.","journal-title":"IEEE journal of solid-state circuits"},{"key":"1687_CR17","doi-asserted-by":"crossref","unstructured":"Koeplinger, D., Feldman, M., Prabhakar, R., Zhang, Y., Hadjis, S., Fiszel, R., Zhao, T., Nardi, L., Pedram, A., Kozyrakis, C., et\u00a0al. (2018).\u00a0Spatial: A language and compiler for application accelerators. In Proceedings of the 39th ACM SIGPLAN Conference on Programming Language Design and Implementation, pp.\u00a0296\u2013311.","DOI":"10.1145\/3192366.3192379"},{"key":"1687_CR18","doi-asserted-by":"crossref","unstructured":"Nowatzki, T., Gangadhar, V., Ardalani, N., & Sankaralingam, K. (2017).\u00a0Stream-dataflow acceleration. In 2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA), IEEE, pp.\u00a0416\u2013429.","DOI":"10.1145\/3079856.3080255"},{"key":"1687_CR19","doi-asserted-by":"crossref","unstructured":"Prabhakar, R., Zhang, Y., Koeplinger, D., Feldman, M., Zhao, T., Hadjis, S., Pedram, A., Kozyrakis, C., & Olukotun, K. (2017).\u00a0Plasticine: A reconfigurable architecture for parallel patterns. In 2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA), IEEE, pp.\u00a0389\u2013402.","DOI":"10.1145\/3079856.3080256"},{"key":"1687_CR20","doi-asserted-by":"crossref","unstructured":"Neves, N., Tom\u00e1s, P., & Roma, N. (2017).\u00a0Adaptive in-cache streaming for efficient data management. IEEE Transactions on Very Large Scale Integration (VLSI) Systems 25, 7, 2130\u20132143.","DOI":"10.1109\/TVLSI.2017.2671405"},{"key":"1687_CR21","doi-asserted-by":"crossref","unstructured":"Jaiswal, M.\u00a0K., and So, H. K. (2018).\u00a0Architecture generator for type-3 unum posit adder\/subtractor. In 2018 IEEE International Symposium on Circuits and Systems (ISCAS), IEEE, pp.\u00a01\u20135.","DOI":"10.1109\/ISCAS.2018.8351142"},{"key":"1687_CR22","unstructured":"Forget, L., Uguen, Y., & De Dinechin, F.\u00a0(2019). Hardware cost evaluation of the posit number system. In Compas\u20192019 - Conf\u00e9rence d\u2019informatique en Parall\u00e9lisme, Architecture et Syst\u00e8me, pp.\u00a01\u20137."},{"key":"1687_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, H., et\u00a0al. (2019)\u00a0Efficient posit multiply-accumulate unit generator for deep learning applications. In 2019 IEEE International Symposium on Circuits and Systems (ISCAS), IEEE, pp.\u00a01\u20135.","DOI":"10.1109\/ISCAS.2019.8702349"},{"key":"1687_CR24","doi-asserted-by":"crossref","unstructured":"Ghosh, S., Martonosi, M., & Malik, S. (1997).\u00a0Cache miss equations: An analytical representation of cache misses. In Proceedings of the 11th international conference on Supercomputing, pp.\u00a0317\u2013324.","DOI":"10.1145\/263580.263657"},{"key":"1687_CR25","doi-asserted-by":"crossref","unstructured":"Pai\u00e1gua, S., Pratas, F., Tom\u00e1s, P., Roma, N., & Chaves, R. (2013).\u00a0Hotstream: Efficient data streaming of complex patterns to multiple accelerating kernels. In 2013 25th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD), IEEE, pp.\u00a017\u201324.","DOI":"10.1109\/SBAC-PAD.2013.17"},{"key":"1687_CR26","doi-asserted-by":"crossref","unstructured":"Hussain, T., Palomar, O., Unsal, O., Cristal, A., Ayguad\u00e9, E., & Valero, M. (2014).\u00a0Advanced Pattern based Memory Controller for FPGA based HPC applications. In 2014 International Conference on High Performance Computing & Simulation (HPCS), IEEE, pp.\u00a0287\u2013294.","DOI":"10.1109\/HPCSim.2014.6903697"},{"key":"1687_CR27","first-page":"1","volume":"2011","author":"T Grosser","year":"2011","unstructured":"Grosser, T., Zheng, H., Aloor, R., Simb\u00fcrger, A., Gr\u00f6\u00dflinger, A., & Pouchet, L.-N. (2011). Polly-polyhedral optimization in llvm. In Proceedings of the First International Workshop on Polyhedral Compilation Techniques (IMPACT), 2011, 1.","journal-title":"In Proceedings of the First International Workshop on Polyhedral Compilation Techniques (IMPACT)"},{"key":"1687_CR28","first-page":"1","volume":"2019","author":"F De Dinechin","year":"2019","unstructured":"De Dinechin, F., Forget, L., Muller, J.-M., & Uguen, Y. (2019). Posits: the good, the bad and the ugly. In Proceedings of the Conference for Next Generation Arithmetic, 2019, 1\u201310.","journal-title":"In Proceedings of the Conference for Next Generation Arithmetic"},{"key":"1687_CR29","doi-asserted-by":"crossref","unstructured":"Viitanen, T., J\u00e4\u00e4skel\u00e4inen, P., Esko, O., & Takala, J. (2013).\u00a0Simplified floating-point division and square root. In 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, IEEE, pp.\u00a02707\u20132711.","DOI":"10.1109\/ICASSP.2013.6638148"},{"key":"1687_CR30","doi-asserted-by":"crossref","unstructured":"Guthaus, M.\u00a0R., Stine, J.\u00a0E., Ataei, S., Chen, B., Wu, B., & Sarwar, M. (2016).\u00a0Openram: An open-source memory compiler. In 2016 IEEE\/ACM International Conference on Computer-Aided Design (ICCAD), IEEE, pp.\u00a01\u20136.","DOI":"10.1145\/2966986.2980098"},{"key":"1687_CR31","unstructured":"Svensson, B. J. (2016). Exploring opencl memory throughput on the zynq. Technical Report."}],"container-title":["Journal of Signal Processing Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-021-01687-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11265-021-01687-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-021-01687-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,14]],"date-time":"2021-12-14T00:06:06Z","timestamp":1639440366000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11265-021-01687-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,11,28]]},"references-count":31,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2021,12]]}},"alternative-id":["1687"],"URL":"https:\/\/doi.org\/10.1007\/s11265-021-01687-7","relation":{},"ISSN":["1939-8018","1939-8115"],"issn-type":[{"value":"1939-8018","type":"print"},{"value":"1939-8115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,11,28]]},"assertion":[{"value":"25 October 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 April 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 July 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 November 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}