{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T19:23:11Z","timestamp":1774120991899,"version":"3.50.1"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030206550","type":"print"},{"value":"9783030206567","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-20656-7_3","type":"book-chapter","created":{"date-parts":[[2019,6,4]],"date-time":"2019-06-04T23:02:40Z","timestamp":1559689360000},"page":"40-58","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Learning Neural Representations for Predicting GPU Performance"],"prefix":"10.1007","author":[{"given":"Shweta","family":"Salaria","sequence":"first","affiliation":[]},{"given":"Aleksandr","family":"Drozd","sequence":"additional","affiliation":[]},{"given":"Artur","family":"Podobas","sequence":"additional","affiliation":[]},{"given":"Satoshi","family":"Matsuoka","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,5,17]]},"reference":[{"key":"3_CR1","unstructured":"Almazro, D., Shahatah, G., Albdulkarim, L., Kherees, M., Martinez, R., Nzoukou, W.: A survey paper on recommender systems. CoRR abs\/1006.5278 (2010)"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Baghsorkhi, S.S., Delahaye, M., Patel, S.J., Gropp, W.D., Huw, W.M.: An adaptive performance modeling tool for GPU architectures. In: Proceedings of the 15th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2010, pp. 105\u2013114 (2010)","DOI":"10.1145\/1693453.1693470"},{"key":"3_CR3","doi-asserted-by":"publisher","unstructured":"Bakhoda, A., Yuan, G.L., Fung, W.W.L., Wong, H., Aamodt, T.M.: Analyzing CUDA workloads using a detailed GPU simulator. In: 2009 IEEE International Symposium on Performance Analysis of Systems and Software, pp. 163\u2013174, April 2009. \n                      https:\/\/doi.org\/10.1109\/ISPASS.2009.4919648","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"3_CR4","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/978-3-7908-2604-3_16","volume-title":"Proceedings of COMPSTAT'2010","author":"L\u00e9on Bottou","year":"2010","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent. In: Proceedings of COMPSTAT 2010, pp. 177\u2013186 (2010)"},{"key":"3_CR5","doi-asserted-by":"publisher","first-page":"1247","DOI":"10.5194\/gmd-7-1247-2014","volume":"7","author":"T Chai","year":"2014","unstructured":"Chai, T., Draxler, R.R.: Root mean square error (RMSE) or mean absolute error (MAE) - arguments against avoiding RMSE in the literature. Geosco. Model Dev. 7, 1247\u20131250 (2014)","journal-title":"Geosco. Model Dev."},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Che, S., et al.: Rodinia: a benchmark suite for hetrogenous computing. In: International Symposium on Workload Characterization (IISWC) (2009)","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"3_CR7","unstructured":"NVIDIA Corporation. \n                      https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html"},{"issue":"2","key":"3_CR8","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/MM.2018.112130030","volume":"38","author":"J Dean","year":"2018","unstructured":"Dean, J., Patterson, D., Young, C.: A new golden age in computer architecture: empowering the machine-learning revolution. IEEE Micro 38(2), 21\u201329 (2018)","journal-title":"IEEE Micro"},{"key":"3_CR9","unstructured":"Glorot, X., Bordes, A., Bengio, Y.: Deep sparse rectifier neural network. In: Proceedings of the Fourteenth International Conference on Artifical Intelligence and Statistics. PMLR 15, pp. 315\u2013323 (2011)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Govindaraju, N.K., Larsen, S., Gray, J., Manocha, D.: A memory model for scientific algorithms on graphics processors. In: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, November 2006 (2006)","DOI":"10.1109\/SC.2006.2"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Grauer-Gray, S., Xu, L., Searles, R., Ayalasomayajula, S., Cavazos, J.: Auto-tuning a high-level language targeted to GPU codes. In: Innovative Parallel Computing (InPar) (2012)","DOI":"10.1109\/InPar.2012.6339595"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Hong, S., Kim, H.: An integrated GPU power and performance model. In: Proceedings of the 37th Annual International Symposium on Computer Architecture, ISCA 2010, pp. 280\u2013289 (2010)","DOI":"10.1145\/1815961.1815998"},{"key":"3_CR13","unstructured":"Jaderberg, M., et al.: Reinforcement learning with unsupervised auxiliary tasks. CoRR abs\/1611.05397 (2016)"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Kerr, A., Anger, E., Hendry, G., Yalamanchili, S.: Eiger: a framework for the automated synthesis of statistical performance models. In: 2012 19th International Conference on High Performance Computing, pp. 1\u20136 (2012)","DOI":"10.1109\/HiPC.2012.6507525"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Liu, W., Schmidt, B.: Performance predictions for general-purpose computation on GPUs. In: Proceedings of 2007 International Conference on Parallel Processing, ICPP (2017)","DOI":"10.1109\/ICPP.2007.67"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Luo, C., Suda, R.: A performance and energy consumption analytical model for GPU. In: 2011 IEEE Ninth International Conference on Dependable, Autonomic and Secure Computing, pp. 658\u2013665 (2011)","DOI":"10.1109\/DASC.2011.117"},{"key":"3_CR17","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. In: Advances in Neural Information Processing Systems 26. Curran Associates, Inc. (2013)"},{"key":"3_CR18","unstructured":"Mirowski, P.W., et al.: Learning to navigate in complex environments. CoRR abs\/1611.03673 (2016)"},{"key":"3_CR19","unstructured":"Nvidia Turing GPU Architecture. \n                      https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/design-visualization\/technologies\/turing-architecture\/NVIDIA-Turing-Architecture-Whitepaper.pdf"},{"key":"3_CR20","unstructured":"NVProf. \n                      https:\/\/docs.nvidia.com\/cuda\/profiler-users-guide\/index.html"},{"key":"3_CR21","unstructured":"The OpenCL Specification. \n                      https:\/\/www.khronos.org\/opencl\/"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Salaria, S., Drozd, A., Podobas, A., Matsuoka, S.: Predicting performance using collaborative filtering. In: Proceedings of the 2018 IEEE International Conference on Cluster Computing, pp. 504\u2013514. CLUSTER (2018)","DOI":"10.1109\/CLUSTER.2018.00066"},{"key":"3_CR23","unstructured":"Tokui, S., Oono, K., Hido, S., Clayton, J.: Chainer: a next generation open source framework for deep learning. In: Proceedings of Workshop on Machine Learning Systems in NIPS (2010)"},{"key":"3_CR24","unstructured":"Top500. \n                      https:\/\/www.top500.org"},{"issue":"4","key":"3_CR25","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52(4), 65\u201376 (2009)","journal-title":"Commun. ACM"},{"key":"3_CR26","doi-asserted-by":"crossref","unstructured":"Wu, G., Greathouse, J.L., Lyashevsky, A., Jayasena, N., Chiou, D.: GPGPU performance and power estimation using machine learning. In: 2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA), pp. 564\u2013576, February 2015","DOI":"10.1109\/HPCA.2015.7056063"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Xhang, Y., Owens, J.D.: A quantitative performance analysis model for GPU architectures. In: Proceedings of the 17th IEEE International Symposium on High Performance Computer Architecture, HPCA 2011 (2011)","DOI":"10.1109\/HPCA.2011.5749745"},{"key":"3_CR28","unstructured":"Yuting, Z., Kibok, L., Honglak, L.: Augmenting supervised neural networks with unsupervised objectives for large-scale image classification. In: Proceedings of the 33rd International Conference on International Conference on Machine Learning, ICML 2016, vol. 48, pp. 612\u2013621. JMLR.org (2016)"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-20656-7_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,4]],"date-time":"2019-06-04T23:17:43Z","timestamp":1559690263000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-20656-7_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030206550","9783030206567"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-20656-7_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"17 May 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISC High Performance","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on High Performance Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Frankfurt","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 June 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 June 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"34","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"supercomputing2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.isc-hpc.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"70","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"17","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"4-5","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"n\/a","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}}]}}