{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T14:38:11Z","timestamp":1740148691391,"version":"3.37.3"},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2020,2,28]],"date-time":"2020-02-28T00:00:00Z","timestamp":1582848000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,2,28]],"date-time":"2020-02-28T00:00:00Z","timestamp":1582848000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sign Process Syst"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s11265-019-01505-1","type":"journal-article","created":{"date-parts":[[2020,2,28]],"date-time":"2020-02-28T11:03:46Z","timestamp":1582887826000},"page":"659-675","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["MAHASIM: Machine-Learning Hardware Acceleration Using a Software-Defined Intelligent Memory System"],"prefix":"10.1007","volume":"93","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2305-9892","authenticated-orcid":false,"given":"Bahar","family":"Asgari","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Saibal","family":"Mukhopadhyay","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sudhakar","family":"Yalamanchili","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,2,28]]},"reference":[{"key":"1505_CR1","doi-asserted-by":"crossref","unstructured":"Asgari, B., Hadidi, R., Kim, H., Yalamanchili, S. (2019). Eridanus: Efficiently running inference of dnns using systolic arrays. IEEE Micro.","DOI":"10.1109\/MM.2019.2930057"},{"key":"1505_CR2","doi-asserted-by":"crossref","unstructured":"Asgari, B., Hadidi, R., Kim, H., Yalamanchili, S. (2019). Lodestar: Creating locally-dense cnns for efficient inference on systolic arrays. In Proceedings of the 56th Annual Design Automation Conference 2019 (pp. 233). ACM.","DOI":"10.1145\/3316781.3322472"},{"key":"1505_CR3","doi-asserted-by":"crossref","unstructured":"Asghari-Moghaddam, H., Son, Y. H., Ahn, J. H., Kim, N. S. (2016). Chameleon: Versatile and practical near-dram acceleration architecture for large memory systems. In 2016 49th annual IEEE\/ACM international symposium on Microarchitecture (MICRO) (pp 1\u201313). IEEE.","DOI":"10.1109\/MICRO.2016.7783753"},{"key":"1505_CR4","doi-asserted-by":"crossref","unstructured":"Azarkhish, E., Rossi, D., Loi, I., Benini, L. (2017). Neurostream: Scalable and energy efficient deep learning with smart memory cubes. arXiv:1701.06420.","DOI":"10.1109\/TPDS.2017.2752706"},{"key":"1505_CR5","doi-asserted-by":"crossref","unstructured":"Bae, Y., Hadidi, R., Asgari, B., Cao, J., Kim, H. (2019). Capella: Customizing perception for edge devices by efficiently allocating fpgas to dnns. In Proceedings of The International Conference on Field-Programmable Logic and Applications 2019.","DOI":"10.1109\/FPL.2019.00076"},{"key":"1505_CR6","unstructured":"Bahdanau, D., Cho, K., Bengio, Y. (2014). Neural machine translation by jointly learning to align and translate. arXiv:1409.0473."},{"key":"1505_CR7","doi-asserted-by":"crossref","unstructured":"Balfour, J., & Dally, W. J. (2014). Design tradeoffs for tiled cmp on-chip networks. In ACM International conference on supercomputing 25th anniversary volume (pp. 390\u2013401). ACM.","DOI":"10.1145\/2591635.2667187"},{"issue":"24","key":"1505_CR8","doi-asserted-by":"publisher","first-page":"3927","DOI":"10.1109\/JLT.2013.2283277","volume":"31","author":"S Borkar","year":"2013","unstructured":"Borkar, S. (2013). Role of interconnects in the future of computing. Journal of Lightwave Technology, 31(24), 3927\u20133933.","journal-title":"Journal of Lightwave Technology"},{"key":"1505_CR9","unstructured":"Canziani, A., Paszke, A., Culurciello, E. (2016). An analysis of deep neural network models for practical applications. arXiv:1605.07678."},{"key":"1505_CR10","unstructured":"Cao, J., Hadidi, R., Arulraj, J. S., Kim, H. (2019). Understanding the power consumption of executing deep neural networks on a distributed robot system. In Proceedings of the CODES+ISSS: International Conference on Hardware\/Software Codesign and System Synthesis 2019."},{"key":"1505_CR11","doi-asserted-by":"crossref","unstructured":"Chang, A. X. M., & Culurciello, E. (2017). Hardware accelerators for recurrent neural networks on fpga. In 2017 IEEE International symposium on circuits and systems (ISCAS) (pp. 1\u20134). IEEE.","DOI":"10.1109\/ISCAS.2017.8050816"},{"key":"1505_CR12","doi-asserted-by":"crossref","unstructured":"Chen, Y., Luo, T., Liu, S., Zhang, S., He, L., Wang, J., Li, L., Chen, T., Xu, Z., Sun, N., et al. (2014). Dadiannao: a machine-learning supercomputer. In Proceedings of the 47th Annual IEEE\/ACM International Symposium on Microarchitecture (pp. 609\u2013622). IEEE Computer Society.","DOI":"10.1109\/MICRO.2014.58"},{"issue":"1","key":"1505_CR13","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1109\/JSSC.2016.2616357","volume":"52","author":"YH Chen","year":"2017","unstructured":"Chen, Y. H., Krishna, T., Emer, J. S., Sze, V. (2017). Eyeriss: an energy-efficient reconfigurable accelerator for deep convolutional neural networks. IEEE Journal of Solid-State Circuits, 52(1), 127\u2013138.","journal-title":"IEEE Journal of Solid-State Circuits"},{"key":"1505_CR14","doi-asserted-by":"crossref","unstructured":"Cho, K., Van Merri\u00ebnboer, B., Bahdanau, D., Bengio, Y. (2014). On the properties of neural machine translation: Encoder-decoder approaches. arXiv:1409.1259.","DOI":"10.3115\/v1\/W14-4012"},{"key":"1505_CR15","doi-asserted-by":"crossref","unstructured":"Cho, K., Van Merri\u00ebnboer, B., Gulcehre, C., Bahdanau, D., Bougares, F., Schwenk, H., Bengio, Y. (2014). Learning phrase representations using rnn encoder-decoder for statistical machine translation. arXiv:1406.1078.","DOI":"10.3115\/v1\/D14-1179"},{"key":"1505_CR16","unstructured":"Consortium, H.M.C., & et al. (2013). Hybrid memory cube specification 1.0 Last Revision Jan."},{"key":"1505_CR17","doi-asserted-by":"crossref","unstructured":"Dally, W. J., Labonte, F., Das, A., Hanrahan, P., Ahn, J. H., Gummaraju, J., Erez, M., Jayasena, N., Buck, I., Knight, T. J., et al. (2003). Merrimac: Supercomputing with streams. In Proceedings of the 2003 ACM\/IEEE conference on Supercomputing (pp. 35). ACM.","DOI":"10.1145\/1048935.1050187"},{"key":"1505_CR18","unstructured":"Dean, J. (2017). Machine learning for systems and systems for machine learning."},{"key":"1505_CR19","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. J., Li, K., Fei-Fei, L. (2009). Imagenet: a large-scale hierarchical image database. In 2009. CVPR 2009. IEEE conference on Computer vision and pattern recognition (pp. 248\u2013255). IEEE.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1505_CR20","doi-asserted-by":"crossref","unstructured":"Du, Z., Fasthuber, R., Chen, T., Ienne, P., Li, L., Luo, T., Feng, X., Chen, Y., Temam, O. (2015). Shidiannao: Shifting vision processing closer to the sensor. In ACM SIGARCH Computer architecture news (vol. 43, pp. 92\u2013104). ACM.","DOI":"10.1145\/2872887.2750389"},{"key":"1505_CR21","doi-asserted-by":"crossref","unstructured":"Farmahini-Farahani, A., Ahn, J. H., Morrow, K., Kim, N. S. (2015). Nda: Near-dram acceleration architecture leveraging commodity dram devices and standard memory modules. In 2015 IEEE 21st international symposium on High performance computer architecture (HPCA) (pp. 283\u2013295). IEEE.","DOI":"10.1109\/HPCA.2015.7056040"},{"key":"1505_CR22","doi-asserted-by":"crossref","unstructured":"Gao, C., Neil, D., Ceolini, E., Liu, S. C., Delbruck, T. (2018). Deltarnn: a power-efficient recurrent neural network accelerator. In Proceedings of the 2018 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (pp. 21\u201330). ACM.","DOI":"10.1145\/3174243.3174261"},{"key":"1505_CR23","doi-asserted-by":"crossref","unstructured":"Gao, M., Ayers, G., Kozyrakis, C. (2015). Practical near-data processing for in-memory analytics frameworks. In 2015 international conference on Parallel architecture and compilation (PACT) (pp. 113\u2013124). IEEE.","DOI":"10.1109\/PACT.2015.22"},{"key":"1505_CR24","doi-asserted-by":"crossref","unstructured":"Gao, M., Pu, J., Yang, X., Horowitz, M., Kozyrakis, C. (2017). Tetris: Scalable and efficient neural network acceleration with 3d memory. In Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems (pp. 751\u2013764). ACM.","DOI":"10.1145\/3093315.3037702"},{"key":"1505_CR25","doi-asserted-by":"crossref","unstructured":"Gentleman, W. M., & Kung, H. (1982). Matrix triangularization by systolic arrays. In Real-time signal processing IV (vol. 298, pp. 19\u201327). International Society for Optics and Photonics.","DOI":"10.1117\/12.932507"},{"issue":"4","key":"1505_CR26","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1109\/2.375174","volume":"28","author":"M Gokhale","year":"1995","unstructured":"Gokhale, M., Holmes, B., Iobst, K. (1995). Processing in memory: The terasys massively parallel pim array. Computer, 28(4), 23\u201331.","journal-title":"Computer"},{"key":"1505_CR27","doi-asserted-by":"crossref","unstructured":"Grot, B., Hestness, J., Keckler, S. W., Mutlu, O. (2009). Express cube topologies for on-chip interconnects. In 2009. HPCA 2009. IEEE 15th international symposium on High performance computer architecture (pp. 163\u2013174). IEEE.","DOI":"10.1109\/HPCA.2009.4798251"},{"key":"1505_CR28","doi-asserted-by":"crossref","unstructured":"Hadidi, R., Asgari, B., Mudassar, B. A., Mukhopadhyay, S., Yalamanchili, S., Kim, H. (2017). Demystifying the characteristics of 3d-stacked memories: a case study for hybrid memory cube. In 2017 IEEE international symposium on Workload characterization (IISWC) (pp. 66\u201375). IEEE.","DOI":"10.1109\/IISWC.2017.8167757"},{"key":"1505_CR29","doi-asserted-by":"crossref","unstructured":"Hadidi, R., Asgari, B., Young, J., Mudassar, B. A., Garg, K., Krishna, T., Kim, H. (2018). Performance implications of nocs on 3d-stacked memories: Insights from the hybrid memory cube. In 2018 IEEE international symposium on Performance analysis of systems and software (ISPASS) (pp. 99\u2013108). IEEE.","DOI":"10.1109\/ISPASS.2018.00018"},{"key":"1505_CR30","doi-asserted-by":"crossref","unstructured":"Hadidi, R., Cao, J., Ryoo, M. S., Kim, H. (2019). Robustly executing dnns in iot systems using coded distributed computing. In Proceedings of the 56th Annual Design Automation Conference 2019 (pp. 234). ACM.","DOI":"10.1145\/3316781.3322474"},{"issue":"4","key":"1505_CR31","doi-asserted-by":"publisher","first-page":"3709","DOI":"10.1109\/LRA.2018.2856261","volume":"3","author":"R Hadidi","year":"2018","unstructured":"Hadidi, R., Cao, J., Woodward, M., Ryoo, M. S., Kim, H. (2018). Distributed perception by collaborative robots. IEEE Robotics and Automation Letters, 3(4), 3709\u20133716.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"1505_CR32","doi-asserted-by":"crossref","unstructured":"Hadidi, R., Cao, J., Woodward, M., Ryoo, M. S., Kim, H. (2018). Real-time image recognition using collaborative iot devices. In Proceedings of the 1st on Reproducible Quality-Efficient Systems Tournament on Co-designing Pareto-efficient Deep Learning (pp. 4). ACM.","DOI":"10.1145\/3229762.3229765"},{"key":"1505_CR33","doi-asserted-by":"crossref","unstructured":"Hadidi, R., Cao, J., Xie, Y., Asgari, B., Krishna, T., Kim, H. (2019). Characterizing the deployment of deep neural networks on commercial edge devices. In Proceedings of the International Symposium on Workload Characterization 2019. IEEE.","DOI":"10.1109\/IISWC47752.2019.9041955"},{"key":"1505_CR34","doi-asserted-by":"crossref","unstructured":"Han, S., Kang, J., Mao, H., Hu, Y., Li, X., Li, Y., Xie, D., Luo, H., Yao, S., Wang, Y., et al. (2017). Ese: Efficient speech recognition engine with sparse lstm on fpga. In Proceedings of the 2017 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (pp. 75\u201384). ACM.","DOI":"10.1145\/3020078.3021745"},{"key":"1505_CR35","doi-asserted-by":"crossref","unstructured":"Hassan, S. M., Yalamanchili, S., Mukhopadhyay, S. (2015). Near data processing: Impact and optimization of 3d memory system architecture on the uncore. In Proceedings of the 2015 International Symposium on Memory Systems (pp. 11\u201321). ACM.","DOI":"10.1145\/2818950.2818952"},{"key":"1505_CR36","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"1505_CR37","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., & Schmidhuber, J. (1997). Long short-term memory. Neural computation, 9(8), 1735\u20131780.","journal-title":"Neural computation"},{"key":"1505_CR38","doi-asserted-by":"crossref","unstructured":"Jeddeloh, J., & Keeth, B. (2012). Hybrid memory cube new dram architecture increases density and performance. In 2012 symposium on VLSI Technology (VLSIT) (pp. 87\u201388). IEEE.","DOI":"10.1109\/VLSIT.2012.6242474"},{"key":"1505_CR39","doi-asserted-by":"crossref","unstructured":"Jouppi, N. P., Young, C., Patil, N., Patterson, D., Agrawal, G., Bajwa, R., Bates, S., Bhatia, S., Boden, N., Borchers, A., et al. (2017). In-datacenter performance analysis of a tensor processing unit. In Proceedings of the 44th Annual International Symposium on Computer Architecture (pp. 1\u201312). ACM.","DOI":"10.1145\/3079856.3080246"},{"issue":"3-4","key":"1505_CR40","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1080\/03772063.1978.11451597","volume":"24","author":"G Kanttaiah","year":"1978","unstructured":"Kanttaiah, G. (1978). Bit-slice microprocessors. IETE Journal of Research, 24(3-4), 124\u2013131.","journal-title":"IETE Journal of Research"},{"issue":"5","key":"1505_CR41","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1109\/MM.2011.89","volume":"31","author":"SW Keckler","year":"2011","unstructured":"Keckler, S. W., Dally, W. J., Khailany, B., Garland, M., Glasco, D. (2011). Gpus and the future of parallel computing. IEEE Micro, 31(5), 7\u201317.","journal-title":"IEEE Micro"},{"key":"1505_CR42","doi-asserted-by":"crossref","unstructured":"Kim, D., Kung, J., Chai, S., Yalamanchili, S., Mukhopadhyay, S. (2016). Neurocube: a programmable digital neuromorphic architecture with high-density 3d memory. In 2016 ACM\/IEEE 43rd annual international symposium on Computer architecture (ISCA) (pp. 380\u2013392). IEEE.","DOI":"10.1109\/ISCA.2016.41"},{"key":"1505_CR43","unstructured":"Kim, G., Kim, J., Ahn, J. H., Kim, J. (2013). Memory-centric system interconnect design with hybrid memory cubes. In Proceedings of the 22nd international conference on Parallel architectures and compilation techniques (pp. 145\u2013156). IEEE Press."},{"key":"1505_CR44","doi-asserted-by":"crossref","unstructured":"Kim, J., Balfour, J., Dally, W. (2007). Flattened butterfly topology for on-chip networks. In Proceedings of the 40th Annual IEEE\/ACM International Symposium on Microarchitecture (pp. 172\u2013182). IEEE Computer Society.","DOI":"10.1109\/MICRO.2007.29"},{"issue":"2","key":"1505_CR45","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1145\/329.332","volume":"9","author":"W Kim","year":"1984","unstructured":"Kim, W., Gajski, D., Kuck, D. J. (1984). A parallel pipelined relational query processor. ACM Transactions on Database Systems (TODS), 9(2), 214\u2013235.","journal-title":"ACM Transactions on Database Systems (TODS)"},{"key":"1505_CR46","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097\u20131105)."},{"key":"1505_CR47","doi-asserted-by":"crossref","unstructured":"Kung, H., McDanel, B., Zhang, S. Q. (2019). Packing sparse convolutional neural networks for efficient systolic array implementations: Column combining under joint optimization. In Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems (pp. 821\u2013834). ACM.","DOI":"10.1145\/3297858.3304028"},{"issue":"1","key":"1505_CR48","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1109\/MC.1982.1653825","volume":"15","author":"HT Kung","year":"1982","unstructured":"Kung, H. T. (1982). Why systolic architectures? IEEE Computer, 15(1), 37\u201346.","journal-title":"IEEE Computer"},{"key":"1505_CR49","doi-asserted-by":"crossref","unstructured":"Li, S., Ahn, J. H., Strong, R. D., Brockman, J. B., Tullsen, D. M., Jouppi, N. P. (2009). Mcpat: an integrated power, area, and timing modeling framework for multicore and manycore architectures. In 2009. MICRO-42. 42nd annual IEEE\/ACM international symposium on Microarchitecture (pp. 469\u2013480). IEEE.","DOI":"10.1145\/1669112.1669172"},{"key":"1505_CR50","doi-asserted-by":"crossref","unstructured":"LiKamWa, R., Hou, Y., Gao, J., Polansky, M., Zhong, L. (2016). Redeye: Analog convnet image sensor architecture for continuous mobile vision. In ISCA\u201916 (pp. 255\u2013266). ACM.","DOI":"10.1145\/3007787.3001164"},{"issue":"2","key":"1505_CR51","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1145\/342001.339673","volume":"28","author":"K Mai","year":"2000","unstructured":"Mai, K., Paaske, T., Jayasena, N., Ho, R., Dally, W. J., Horowitz, M. (2000). Smart memories: a modular reconfigurable architecture. ACM SIGARCH Computer Architecture News, 28(2), 161\u2013171.","journal-title":"ACM SIGARCH Computer Architecture News"},{"key":"1505_CR52","doi-asserted-by":"crossref","unstructured":"Nai, L., Hadidi, R., Sim, J., Kim, H., Kumar, P., Kim, H. (2017). Graphpim: Enabling instruction-level pim offloading in graph computing frameworks. In 2017 IEEE international symposium on High performance computer architecture (HPCA) (pp. 457\u2013468). IEEE.","DOI":"10.1109\/HPCA.2017.54"},{"key":"1505_CR53","unstructured":"O\u2019Connor, M. (2014). Highlights of the high-bandwidth memory (hbm) standard. In Memory forum workshop."},{"key":"1505_CR54","doi-asserted-by":"crossref","unstructured":"O\u2019Connor, M., Chatterjee, N., Lee, D., Wilson, J., Agrawal, A., Keckler, S. W., Dally, W. J. (2017). Fine grained dram: energy efficient dram for extreme bandwidth systems. In Proceedings of the 50th Annual IEEE\/ACM International Symposium on Microarchitecture (pp. 41\u201354). ACM.","DOI":"10.1145\/3123939.3124545"},{"key":"1505_CR55","doi-asserted-by":"crossref","unstructured":"Pomerleau, D. A., Gusciora, G. L., Touretzky, D. S., Kung, H. (1988). Neural network simulation at warp speed: How we got 17 million connections per second. In Proceedings of 1988 IEEE International Conference on Neural Networks (pp. 143\u2013150).","DOI":"10.1109\/ICNN.1988.23922"},{"key":"1505_CR56","doi-asserted-by":"crossref","unstructured":"Quinton, P. (1984). Automatic synthesis of systolic arrays from uniform recurrent equations. In ACM SIGARCH Computer architecture news (vol. 12, pp. 208\u2013214). ACM.","DOI":"10.1145\/773453.808184"},{"issue":"3","key":"1505_CR57","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1007\/BF02253318","volume":"34","author":"G Rote","year":"1985","unstructured":"Rote, G. (1985). A systolic array algorithm for the algebraic path problem (shortest paths; matrix inversion). Computing, 34(3), 191\u2013219.","journal-title":"Computing"},{"key":"1505_CR58","unstructured":"Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556."},{"issue":"11","key":"1505_CR59","doi-asserted-by":"publisher","first-page":"1590","DOI":"10.1109\/TCPMT.2015.2485158","volume":"5","author":"WJ Song","year":"2015","unstructured":"Song, W. J., Mukhopadhyay, S., Yalamanchili, S. (2015). Kitfox: Multiphysics libraries for integrated power, thermal, and reliability simulations of multicore microarchitecture. IEEE Transactions on Components. Packaging and Manufacturing Technology, 5(11), 1590\u20131601.","journal-title":"Packaging and Manufacturing Technology"},{"key":"1505_CR60","unstructured":"Standard, J. (2013). High bandwidth memory (hbm) dram JESD235."},{"key":"1505_CR61","unstructured":"Sutskever, I. (2013). Training recurrent neural networks. University of Toronto, Toronto."},{"key":"1505_CR62","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z. (2016). Rethinking the inception architecture for computer vision. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (pp. 2818\u20132826).","DOI":"10.1109\/CVPR.2016.308"},{"key":"1505_CR63","unstructured":"TensorFlow: Performance benchmarks (2017). https:\/\/www.tensorflow.org\/performance\/benchmarks."},{"key":"1505_CR64","unstructured":"TensorFlow: Tensorflow (2018). https:\/\/www.tensorflow.org\/."},{"key":"1505_CR65","doi-asserted-by":"crossref","unstructured":"Toomarian, N., & Barhen, J. (1991). Fast temporal neural learning using teacher forcing. In 1991., IJCNN-91-seattle international joint conference on Neural networks (vol. 1, pp. 817\u2013822). IEEE.","DOI":"10.1109\/IJCNN.1991.155284"},{"key":"1505_CR66","unstructured":"TRANSLATION, T.W.O.S.M.: Wmt\u201915 (2017). http:\/\/www.statmt.org\/wmt15\/translation-task.html."},{"key":"1505_CR67","doi-asserted-by":"crossref","unstructured":"Venkataramani, S., Ranjan, A., Banerjee, S., Das, D., Avancha, S., Jagannathan, A., Durg, A., Nagaraj, D., Kaul, B., Dubey, P., et al. (2017). Scaledeep: a scalable compute architecture for learning and evaluating deep networks. In Proceedings of the 44th Annual International Symposium on Computer Architecture (pp. 13\u201326). ACM.","DOI":"10.1145\/3079856.3080244"},{"key":"1505_CR68","doi-asserted-by":"crossref","unstructured":"Wang, S., Zhou, D., Han, X., Yoshimura, T. (2017). Chain-nn: an energy-efficient 1d chain architecture for accelerating deep convolutional neural networks. In Design, automation & test in europe conference & exhibition (DATE), 2017 (pp. 1032\u20131037). IEEE.","DOI":"10.23919\/DATE.2017.7927142"},{"key":"1505_CR69","doi-asserted-by":"crossref","unstructured":"Wang, Y., Zhang, M., Yang, J. (2017). Towards memory-efficient processing-in-memory architecture for convolutional neural networks. In ACM SIGPLAN Notices (vol. 52, pp. 81\u201390). ACM.","DOI":"10.1145\/3140582.3081032"},{"issue":"10","key":"1505_CR70","doi-asserted-by":"publisher","first-page":"1550","DOI":"10.1109\/5.58337","volume":"78","author":"PJ Werbos","year":"1990","unstructured":"Werbos, P. J. (1990). Backpropagation through time: what it does and how to do it. Proceedings of the IEEE, 78(10), 1550\u2013 1560.","journal-title":"Proceedings of the IEEE"},{"issue":"4","key":"1505_CR71","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D. (2009). Roofline: an insightful visual performance model for multicore architectures. Communications of the ACM, 52(4), 65\u201376.","journal-title":"Communications of the ACM"},{"key":"1505_CR72","unstructured":"Wu, Y., Schuster, M., Chen, Z., Le, Q. V., Norouzi, M., Macherey, W., Krikun, M., Cao, Y., Gao, Q., Macherey, K., et al. (2016). Google\u2019s neural machine translation system: Bridging the gap between human and machine translation. arXiv:1609.08144."}],"container-title":["Journal of Signal Processing Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-019-01505-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11265-019-01505-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-019-01505-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,6,3]],"date-time":"2021-06-03T06:35:35Z","timestamp":1622702135000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11265-019-01505-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,2,28]]},"references-count":72,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["1505"],"URL":"https:\/\/doi.org\/10.1007\/s11265-019-01505-1","relation":{},"ISSN":["1939-8018","1939-8115"],"issn-type":[{"type":"print","value":"1939-8018"},{"type":"electronic","value":"1939-8115"}],"subject":[],"published":{"date-parts":[[2020,2,28]]},"assertion":[{"value":"4 February 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 August 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 November 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 February 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}