{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:39:02Z","timestamp":1740123542509,"version":"3.37.3"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2020,6,17]],"date-time":"2020-06-17T00:00:00Z","timestamp":1592352000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,6,17]],"date-time":"2020-06-17T00:00:00Z","timestamp":1592352000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s11227-020-03362-3","type":"journal-article","created":{"date-parts":[[2020,6,17]],"date-time":"2020-06-17T13:03:46Z","timestamp":1592399026000},"page":"2486-2510","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Performance benchmarking of deep learning framework on Intel Xeon Phi"],"prefix":"10.1007","volume":"77","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9579-4426","authenticated-orcid":false,"given":"Chao-Tung","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jung-Chun","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu-Wei","family":"Chan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Endah","family":"Kristiani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chan-Fu","family":"Kuo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,6,17]]},"reference":[{"key":"3362_CR1","doi-asserted-by":"crossref","unstructured":"Ben-Nun T, Besta M, Huber S, Ziogas AN, Peter D, Hoefler T (2019) A modular benchmarking infrastructure for high-performance and reproducible deep learning. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE, pp 66\u201377","DOI":"10.1109\/IPDPS.2019.00018"},{"issue":"2","key":"3362_CR2","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1145\/567806.567807","volume":"28","author":"LS Blackford","year":"2002","unstructured":"Blackford LS, Petitet A, Pozo R, Remington K, Whaley RC, Demmel J, Dongarra J, Duff I, Hammarling S, Henry G et al (2002) An updated set of basic linear algebra subprograms (blas). ACM Trans Math Softw 28(2):135\u2013151","journal-title":"ACM Trans Math Softw"},{"key":"3362_CR3","doi-asserted-by":"crossref","unstructured":"Bottleson J, Kim S, Andrews J, Bindu P, Murthy DN, Jin J (2016) Clcaffe: Opencl accelerated caffe for convolutional neural networks. In: Proceedings\u20142016 IEEE 30th International Parallel and Distributed Processing Symposium, IPDPS 2016, pp 50\u201357. www.scopus.com","DOI":"10.1109\/IPDPSW.2016.182"},{"key":"3362_CR4","doi-asserted-by":"crossref","unstructured":"Bottou L, Cortes C, Denker JS, Drucker H, Guyon I, Jackel LD, LeCun Y, Muller UA, Sackinger E, Simard P et\u00a0al (1994) Comparison of classifier methods: a case study in handwritten digit recognition. In: Pattern Recognition, 1994. Vol 2-Conference B: Computer Vision & Image Processing. Proceedings of the 12th IAPR International. Conference on, vol\u00a02. IEEE, pp. 77\u201382","DOI":"10.1109\/ICPR.1994.576879"},{"key":"3362_CR5","unstructured":"Cifar10 (2017). https:\/\/www.cs.toronto.edu\/~kriz\/cifar.html"},{"key":"3362_CR6","unstructured":"Coates A, Ng A, Lee H (2011) An analysis of single-layer networks in unsupervised feature learning. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp 215\u2013223"},{"key":"3362_CR7","doi-asserted-by":"crossref","unstructured":"Dahl GE, Sainath TN, Hinton GE (2013) Improving deep neural networks for lvcsr using rectified linear units and dropout. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 8609\u20138613","DOI":"10.1109\/ICASSP.2013.6639346"},{"key":"3362_CR8","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-5209-5","volume-title":"Deep learning in natural language processing","author":"L Deng","year":"2018","unstructured":"Deng L, Liu Y (2018) Deep learning in natural language processing. Springer, Berlin"},{"key":"3362_CR9","unstructured":"Docker (2019). https:\/\/www.docker.com\/"},{"issue":"4","key":"3362_CR10","first-page":"381","volume":"2","author":"S Gold","year":"1996","unstructured":"Gold S, Rangarajan A et al (1996) Softmax to softassign: neural network algorithms for combinatorial optimization. J Artif Neural Netw 2(4):381\u2013399","journal-title":"J Artif Neural Netw"},{"issue":"6","key":"3362_CR11","doi-asserted-by":"publisher","first-page":"789","DOI":"10.1016\/0167-8191(96)00024-5","volume":"22","author":"W Gropp","year":"1996","unstructured":"Gropp W, Lusk E, Doss N, Skjellum A (1996) A high-performance, portable implementation of the mpi message passing interface standard. Parallel Comput 22(6):789\u2013828","journal-title":"Parallel Comput"},{"key":"3362_CR12","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7055.001.0001","volume-title":"Using MPI: portable parallel programming with the message-passing interface","author":"W Gropp","year":"1999","unstructured":"Gropp W, Lusk E, Skjellum A (1999) Using MPI: portable parallel programming with the message-passing interface, vol 1. MIT Press, Cambridge"},{"key":"3362_CR13","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7055.001.0001","volume-title":"Using MPI-2: advanced features of the message-passing interface","author":"W Gropp","year":"1999","unstructured":"Gropp W, Lusk E, Thakur R (1999) Using MPI-2: advanced features of the message-passing interface. MIT Press, Cambridge"},{"key":"3362_CR14","doi-asserted-by":"crossref","unstructured":"Grupp A, Kozlov V, Campos I, David M, Gomes J, Garc\u00eda \u00c1 L (2019) Benchmarking deep learning infrastructures by means of tensorflow and containers. In: International Conference on High Performance Computing. Springer, pp 478\u2013489","DOI":"10.1007\/978-3-030-34356-9_36"},{"key":"3362_CR15","unstructured":"Hacker SK (2018) Mastering docker: a quick-start beginner\u2019s guide. CreateSpace Independent Publishing Platform. https:\/\/dl.acm.org\/doi\/book\/10.5555\/3235203"},{"issue":"1","key":"3362_CR16","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1109\/MSP.2017.2749125","volume":"35","author":"J Han","year":"2018","unstructured":"Han J, Zhang D, Cheng G, Liu N, Xu D (2018) Advanced deep-learning techniques for salient and category-specific object detection: a survey. IEEE Signal Process Mag 35(1):84\u2013100","journal-title":"IEEE Signal Process Mag"},{"key":"3362_CR17","unstructured":"Han S, Pool J, Tran J, Dally W (2015) Learning both weights and connections for efficient neural network. In: Advances in Neural Information Processing Systems, pp 1135\u20131143"},{"key":"3362_CR18","doi-asserted-by":"crossref","unstructured":"Hegde G, Ramasamy N, Kapre N et\u00a0al (2016) Caffepresso: an optimized library for deep learning on embedded accelerator-based platforms. In: 2016 International Conference on Compliers, Architectures, and Sythesis of Embedded Systems (CASES). IEEE, pp 1\u201310","DOI":"10.1145\/2968455.2968511"},{"key":"3362_CR19","doi-asserted-by":"crossref","unstructured":"Jia Y, Shelhamer E, Donahue J, Karayev S, Long J, Girshick R, Guadarrama S, Darrell T (2014) Caffe: convolutional architecture for fast feature embedding. In: MM 2014\u2014Proceedings of the 2014 ACM Conference on Multimedia, pp 675\u2013678. www.scopus.com","DOI":"10.1145\/2647868.2654889"},{"key":"3362_CR20","doi-asserted-by":"crossref","unstructured":"Kim Y (2014) Convolutional neural networks for sentence classification. arXiv preprint arXiv:1408.5882","DOI":"10.3115\/v1\/D14-1181"},{"key":"3362_CR21","doi-asserted-by":"crossref","unstructured":"Kristiani E, Yang CT, Wang YT, Huang CY, Ko PC (2018) Container-based virtualization for real-time data streaming processing on the edge computing architecture. In: International Wireless Internet Conference. Springer, pp 203\u2013211","DOI":"10.1007\/978-3-030-06158-6_21"},{"key":"3362_CR22","unstructured":"Krizhevsky A, Hinton G (2010) Convolutional deep belief networks on cifar-10. Unpublished manuscript 40"},{"key":"3362_CR23","doi-asserted-by":"crossref","unstructured":"Kurth T, Smorkalov M, Mendygral P, Sridharan S, Mathuriya A (2018) Tensorflow at scale: performance and productivity analysis of distributed training with horovod, mlsl, and cray pe ml. Concurrency and Computation: Practice and Experience, p e4989","DOI":"10.1002\/cpe.4989"},{"key":"3362_CR24","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1016\/j.neucom.2016.12.038","volume":"234","author":"W Liu","year":"2017","unstructured":"Liu W, Wang Z, Liu X, Zeng N, Liu Y, Alsaadi FE (2017) A survey of deep neural network architectures and their applications. Neurocomputing 234:11\u201326","journal-title":"Neurocomputing"},{"issue":"2","key":"3362_CR25","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/s11263-019-01247-4","volume":"128","author":"L Liu","year":"2020","unstructured":"Liu L, Ouyang W, Wang X, Fieguth P, Chen J, Liu X, Pietik\u00e4inen M (2020) Deep learning for generic object detection: a survey. Int J Comput Vis 128(2):261\u2013318","journal-title":"Int J Comput Vis"},{"key":"3362_CR26","doi-asserted-by":"crossref","unstructured":"Liu L, Wu Y, Wei W, Cao W, Sahin S, Zhang Q (2018) Benchmarking deep learning frameworks: design considerations, metrics and beyond. In: 2018 IEEE 38th International Conference on Distributed Computing Systems (ICDCS). IEEE, pp 1258\u20131269","DOI":"10.1109\/ICDCS.2018.00125"},{"issue":"4","key":"3362_CR27","doi-asserted-by":"publisher","first-page":"3133","DOI":"10.1109\/COMST.2019.2916583","volume":"21","author":"NC Luong","year":"2019","unstructured":"Luong NC, Hoang DT, Gong S, Niyato D, Wang P, Liang YC, Kim DI (2019) Applications of deep reinforcement learning in communications and networking: a survey. IEEE Commun Surv Tutor 21(4):3133\u20133174","journal-title":"IEEE Commun Surv Tutor"},{"key":"3362_CR28","unstructured":"Nair V, Hinton GE (2010) Rectified linear units improve restricted Boltzmann machines. In: Proceedings of the 27th International Conference on Machine Learning (ICML-10), pp 807\u2013814"},{"key":"3362_CR29","doi-asserted-by":"publisher","first-page":"19143","DOI":"10.1109\/ACCESS.2019.2896880","volume":"7","author":"AB Nassif","year":"2019","unstructured":"Nassif AB, Shahin I, Attili I, Azzeh M, Shaalan K (2019) Speech recognition using deep neural networks: a systematic review. IEEE Access 7:19143\u201319165","journal-title":"IEEE Access"},{"key":"3362_CR30","doi-asserted-by":"crossref","unstructured":"Nath R, Tomov S, Dongarra J (2010) Accelerating GPU kernels for dense linear algebra. In: VECPAR. Springer, pp 83\u201392","DOI":"10.1007\/978-3-642-19328-6_10"},{"key":"3362_CR31","unstructured":"Openmpi (2017). https:\/\/www.open-mpi.org\/"},{"key":"3362_CR32","doi-asserted-by":"crossref","unstructured":"Panda DK, Awan AA, Subramoni H (2019) High performance distributed deep learning: a beginner\u2019s guide. In: PPoPP, pp 452\u2013454","DOI":"10.1145\/3293883.3302260"},{"key":"3362_CR33","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1016\/j.jbi.2018.04.007","volume":"83","author":"S Purushotham","year":"2018","unstructured":"Purushotham S, Meng C, Che Z, Liu Y (2018) Benchmarking deep learning models on large healthcare datasets. J Biomed Inform 83:112\u2013134","journal-title":"J Biomed Inform"},{"key":"3362_CR34","doi-asserted-by":"crossref","unstructured":"Rosales C (2014) Porting to the intel xeon phi: opportunities and challenges. In: Proceedings\u20142013 Extreme Scaling Workshop, XSW 2013, pp 1\u20137. www.scopus.com","DOI":"10.1109\/XSW.2013.5"},{"issue":"3","key":"3362_CR35","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1109\/81.222799","volume":"40","author":"T Roska","year":"1993","unstructured":"Roska T, Hamori J, Labos E, Lotz K, Orz\u00f3 L, Takacs J, Venetianer PL, Vidnyanszky Z, Zar\u00e1ndy \u00c1 (1993) The use of cnn models in the subcortical visual pathway. IEEE Trans Circuits Syst I Fundam Theory Appl 40(3):182\u2013195","journal-title":"IEEE Trans Circuits Syst I Fundam Theory Appl"},{"key":"3362_CR36","unstructured":"Soheil B, Naveen R, Lukas S, et\u00a0al (2016) Comparative study of deep learning software frameworks. arXiv preprint arXiv:1511.06435"},{"key":"3362_CR37","doi-asserted-by":"crossref","unstructured":"Tanno R, Yanai K (2016) Caffe2c: a framework for easy implementation of cnn-based mobile applications. In: ACM International Conference Proceeding Series, vol 28-November-2016, pp 159\u2013164. www.scopus.com","DOI":"10.1145\/3004010.3004025"},{"key":"3362_CR38","doi-asserted-by":"crossref","unstructured":"Tarasov V, Rupprecht L, Skourtis D, Li W, Rangaswami R, Zhao M (2019) Evaluating docker storage performance: from workloads to graph drivers. Cluster Computing pp 1\u201314","DOI":"10.1007\/s10586-018-02893-y"},{"key":"3362_CR39","unstructured":"Tensorflow description (2019). https:\/\/www.tensorflow.org\/"},{"key":"3362_CR40","doi-asserted-by":"crossref","unstructured":"Tokic M, Palm G (2011) Value-difference based exploration: adaptive control between epsilon-greedy and softmax. KI 2011: Advances in Artificial Intelligence, pp 335\u2013346","DOI":"10.1007\/978-3-642-24455-1_33"},{"key":"3362_CR41","doi-asserted-by":"publisher","unstructured":"Venkateswaran S, Sarkar S (2019) Fitness-aware containerization service leveraging machine learning. IEEE Trans Serv Comput. https:\/\/doi.org\/10.1109\/TSC.2019.2898666","DOI":"10.1109\/TSC.2019.2898666"},{"key":"3362_CR42","doi-asserted-by":"publisher","unstructured":"Voulodimos A, Doulamis N, Doulamis A, Protopapadakis E (2018) Deep learning for computer vision: a brief review. Comput Intell Neurosci. https:\/\/doi.org\/10.1155\/2018\/7068349","DOI":"10.1155\/2018\/7068349"},{"key":"3362_CR43","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1016\/j.cpc.2018.03.016","volume":"228","author":"H Wang","year":"2018","unstructured":"Wang H, Zhang L, Han J, Weinan E (2018) Deepmd-kit: a deep learning package for many-body potential energy representation and molecular dynamics. Comput Phys Commun 228:178\u2013184","journal-title":"Comput Phys Commun"},{"key":"3362_CR44","unstructured":"Xbyak (2017). https:\/\/github.com\/herumi\/xbyak"},{"key":"3362_CR45","doi-asserted-by":"crossref","unstructured":"Yang CT, Liu JC, Chan YW, Kristiani E, Kuo CF (2018) On construction of a caffe deep learning framework based on intel xeon phi. In: International Conference on P2P, Parallel, Grid, Cloud and Internet Computing. Springer, pp 96\u2013106","DOI":"10.1007\/978-3-030-02607-3_9"},{"issue":"1","key":"3362_CR46","doi-asserted-by":"publisher","first-page":"266","DOI":"10.1016\/j.cpc.2010.06.035","volume":"182","author":"CT Yang","year":"2011","unstructured":"Yang CT, Huang CL, Lin CF (2011) Hybrid CUDA, OpenMP, and MPI parallel programming on multicore GPU clusters. Comput Phys Commun 182(1):266\u2013269","journal-title":"Comput Phys Commun"},{"issue":"2","key":"3362_CR47","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1109\/81.747190","volume":"46","author":"\u00c1 Zar\u00e1ndy","year":"1999","unstructured":"Zar\u00e1ndy \u00c1, Orz\u00f3 L, Grawes E, Werblin F (1999) CNN-based models for color vision and visual illusions. IEEE Trans Circuits Syst I Fundam Theory Appl 46(2):229\u2013238","journal-title":"IEEE Trans Circuits Syst I Fundam Theory Appl"},{"issue":"5","key":"3362_CR48","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3178115","volume":"9","author":"Z Zhang","year":"2018","unstructured":"Zhang Z, Geiger J, Pohjalainen J, Mousa AED, Jin W, Schuller B (2018) Deep learning for environmentally robust speech recognition: an overview of recent developments. ACM TIST 9(5):1\u201328","journal-title":"ACM TIST"},{"issue":"11","key":"3362_CR49","doi-asserted-by":"publisher","first-page":"3212","DOI":"10.1109\/TNNLS.2018.2876865","volume":"30","author":"ZQ Zhao","year":"2019","unstructured":"Zhao ZQ, Zheng P, Xu St, Wu X (2019) Object detection with deep learning: a review. IEEE Trans Neural Netw Learn Syst 30(11):3212\u20133232","journal-title":"IEEE Trans Neural Netw Learn Syst"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-020-03362-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-020-03362-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-020-03362-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,6,16]],"date-time":"2021-06-16T23:25:08Z","timestamp":1623885908000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-020-03362-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,17]]},"references-count":49,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["3362"],"URL":"https:\/\/doi.org\/10.1007\/s11227-020-03362-3","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2020,6,17]]},"assertion":[{"value":"17 June 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}