{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T09:36:30Z","timestamp":1761989790007,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031695766"},{"type":"electronic","value":"9783031695773"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-69577-3_17","type":"book-chapter","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T19:02:05Z","timestamp":1724612525000},"page":"239-253","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["DProbe: Profiling and\u00a0Predicting Multi-tenant Deep Learning Workloads for\u00a0GPU Resource Scaling"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-9092-2725","authenticated-orcid":false,"given":"Zechun","family":"Zhou","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5098-1503","authenticated-orcid":false,"given":"Jingwei","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Hengquan","family":"Mei","sequence":"additional","affiliation":[]},{"given":"Peng","family":"Sun","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0794-7681","authenticated-orcid":false,"given":"Guangzhong","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,26]]},"reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"Arbat, S., Jayakumar, V.K., Lee, J., Wang, W., Kim, I.K.: Wasserstein adversarial Transformer for cloud workload prediction. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), pp. 12433\u201312439 (2022)","DOI":"10.1609\/aaai.v36i11.21509"},{"key":"17_CR2","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. In: International Conference on Learning Representations (ICLR) (2015)"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Cho, K., van Merrienboer, B., Gulcehre, C., Bougares, F., Schwenk, H., Bengio, Y.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP) (2014)","DOI":"10.3115\/v1\/D14-1179"},{"key":"17_CR4","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (ICLR) (2021)"},{"key":"17_CR5","unstructured":"Fujimoto, S., Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning (ICML), pp. 1587\u20131596 (2018)"},{"key":"17_CR6","doi-asserted-by":"crossref","unstructured":"Gao, Y., Gu, X., Zhang, H., Lin, H., Yang, M.: Runtime performance prediction for deep learning models with graph neural network. In: IEEE\/ACM 45th International Conference on Software Engineering: Software Engineering in Practice (ICSE-SEIP), pp. 368\u2013380 (2023)","DOI":"10.1109\/ICSE-SEIP58684.2023.00039"},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"Howard, A., et\u00a0al.: Searching for MobileNetV3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Hu, Q., Sun, P., Yan, S., Wen, Y., Zhang, T.: Characterization and prediction of deep learning workloads in large-scale GPU datacenters. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC), pp. 1\u201315 (2021)","DOI":"10.1145\/3458817.3476223"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Hu, Q., Zhang, M., Sun, P., Wen, Y., Zhang, T.: Lucid: a non-intrusive, scalable and interpretable scheduler for deep learning training jobs. In: Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS), pp. 457\u2013472 (2023)","DOI":"10.1145\/3575693.3575705"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Jayakumar, V.K., Lee, J., Kim, I.K., Wang, W.: A self-optimized generic workload prediction framework for cloud computing. In: IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 779\u2013788 (2020)","DOI":"10.1109\/IPDPS47924.2020.00085"},{"key":"17_CR12","unstructured":"Jeon, M., Venkataraman, S., Phanishayee, A., Qian, J., Xiao, W., Yang, F.: Analysis of large-scale multi-tenant GPU clusters for DNN training workloads. In: USENIX Annual Technical Conference (ATC), pp. 947\u2013960 (2019)"},{"key":"17_CR13","unstructured":"Kenton, J.D.M.W.C., Toutanova, L.K.: BERT: pre-training of deep bidirectional Transformers for language understanding. In: Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics (NAACL), pp. 4171\u20134186 (2019)"},{"key":"17_CR14","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems (NeurIPS) (2012)"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin Transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"17_CR16","doi-asserted-by":"crossref","unstructured":"Ma, J., Zhao, Z., Yi, X., Chen, J., Hong, L., Chi, E.H.: Modeling task relationships in multi-task learning with multi-gate mixture-of-experts. In: Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (KDD), pp. 1930\u20131939 (2018)","DOI":"10.1145\/3219819.3220007"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Mei, H., Qu, H., Sun, J., Gao, Y., Lin, H., Sun, G.: GPU occupancy prediction of deep learning models using graph neural network. In: IEEE International Conference on Cluster Computing (CLUSTER), pp. 318\u2013329 (2023)","DOI":"10.1109\/CLUSTER52292.2023.00034"},{"key":"17_CR18","doi-asserted-by":"crossref","unstructured":"Patel, T., Liu, Z., Kettimuthu, R., Rich, P., Allcock, W., Tiwari, D.: Job characteristics on large-scale systems: long-term analysis, quantification, and implications. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC), pp. 1\u201317 (2020)","DOI":"10.1109\/SC41405.2020.00088"},{"key":"17_CR19","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative pre-training (2018). https:\/\/openai.com\/research\/language-unsupervised"},{"key":"17_CR20","unstructured":"Radford, A., et\u00a0al.: Language models are unsupervised multitask learners (2019). https:\/\/openai.com\/research\/better-language-models"},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"17_CR22","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms (2017). https:\/\/openai.com\/research\/openai-baselines-ppo"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Shen, Z., Subbiah, S., Gu, X., Wilkes, J.: CloudScale: elastic resource scaling for multi-tenant cloud systems. In: Proceedings of the 2nd ACM Symposium on Cloud Computing (SoCC), pp. 1\u201314 (2011)","DOI":"10.1145\/2038916.2038921"},{"key":"17_CR24","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: International Conference on Learning Representations (ICLR) (2015)"},{"key":"17_CR25","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems (NeurIPS) (2017)"},{"key":"17_CR26","unstructured":"Veli\u010dkovi\u0107, P., Cucurull, G., Casanova, A., Romero, A., Li\u00f2, P., Bengio, Y.: Graph attention networks. In: International Conference on Learning Representations (ICLR) (2018)"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Weng, Q., et al.: MLaaS in the wild: workload analysis and scheduling in large-scale heterogeneous GPU clusters. In: 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI), pp. 945\u2013960 (2022)","DOI":"10.21203\/rs.3.rs-2266264\/v1"},{"key":"17_CR28","unstructured":"Weng, Q., et al.: Beware of fragmentation: scheduling GPU-Sharing workloads with fragmentation gradient descent. In: USENIX Annual Technical Conference (ATC), pp. 995\u20131008 (2023)"},{"key":"17_CR29","unstructured":"Wu, H., Xu, J., Wang, J., Long, M.: Autoformer: Decomposition Transformers with auto-correlation for long-term series forecasting. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 22419\u201322430 (2021)"},{"key":"17_CR30","unstructured":"Xiao, W., et al.: AntMan: dynamic scaling on GPU clusters for deep learning. In: 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI), pp. 533\u2013548 (2020)"},{"issue":"1","key":"17_CR31","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1109\/TPDS.2021.3079202","volume":"33","author":"G Yeung","year":"2021","unstructured":"Yeung, G., Borowiec, D., Yang, R., Friday, A., Harper, R., Garraghan, P.: Horus: interference-aware and prediction-based scheduling in deep learning systems. IEEE Trans. Parallel Distrib. Syst. 33(1), 88\u2013100 (2021)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"17_CR32","doi-asserted-by":"crossref","unstructured":"Zhou, Z., et al.: AHPA: Adaptive horizontal pod autoscaling systems on Alibaba cloud container service for Kubernetes. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), pp. 15621\u201315629 (2023)","DOI":"10.1609\/aaai.v37i13.26852"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2024: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-69577-3_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T19:07:06Z","timestamp":1724612826000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-69577-3_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031695766","9783031695773"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-69577-3_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"26 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Madrid","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}