{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T16:05:00Z","timestamp":1764691500713,"version":"3.46.0"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T00:00:00Z","timestamp":1764633600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T00:00:00Z","timestamp":1764633600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1007\/s11704-024-40710-5","type":"journal-article","created":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T15:00:52Z","timestamp":1764687652000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Superior F1-score: I\/O feature driven algorithms for stream computing systems workload identification"],"prefix":"10.1007","volume":"20","author":[{"given":"Yuxiao","family":"Han","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yubo","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziyan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fei","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiguang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nong","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,2]]},"reference":[{"key":"40710_CR1","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1145\/2745844.2745855","volume-title":"Proceedings of 2015 ACM SIGMETRICS International Conference on Measurement and Modeling of Computer Systems","author":"X Zhang","year":"2015","unstructured":"Zhang X, Huang Z, Wu C, Li Z, Lau F C M. Online auctions in IaaS clouds: welfare and profit maximization with server costs. In: Proceedings of 2015 ACM SIGMETRICS International Conference on Measurement and Modeling of Computer Systems. 2015, 3\u201315"},{"issue":"12","key":"40710_CR2","doi-asserted-by":"publisher","first-page":"3122","DOI":"10.14778\/3476311.3476388","volume":"14","author":"A Roy","year":"2021","unstructured":"Roy A, Jindal A, Gomatam P, Ouyang X, Gosalia A, Ravi N, Mann S, Jain P. SparkCruise: workload optimization in managed spark clusters at microsoft. Proceedings of the VLDB Endowment, 2021, 14(12): 3122\u20133134","journal-title":"Proceedings of the VLDB Endowment"},{"issue":"2","key":"40710_CR3","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1007\/s00778-023-00819-8","volume":"33","author":"M Fragkoulis","year":"2024","unstructured":"Fragkoulis M, Carbone P, Kalavri V, Katsifodimos A. A survey on the evolution of stream processing systems. The VLDB Journal, 2024, 33(2): 507\u2013541","journal-title":"The VLDB Journal"},{"key":"40710_CR4","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1145\/1007568.1007615","volume-title":"Proceedings of 2004 ACM SIGMOD International Conference on Management of Data","author":"S Babu","year":"2004","unstructured":"Babu S, Motwani R, Munagala K, Nishizawa I, Widom J. Adaptive ordering of pipelined stream filters. In: Proceedings of 2004 ACM SIGMOD International Conference on Management of Data. 2004, 407\u2013418"},{"key":"40710_CR5","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1145\/872757.872789","volume-title":"Proceedings of 2003 ACM SIGMOD International Conference on Management of Data","author":"B Babcock","year":"2003","unstructured":"Babcock B, Babu S, Motwani R, Datar M. Chain: operator scheduling for memory minimization in data stream systems. In: Proceedings of 2003 ACM SIGMOD International Conference on Management of Data. 2003, 253\u2013264"},{"key":"40710_CR6","first-page":"838","volume-title":"Proceedings of the 29th International Conference on Very Large Data Bases","author":"D Carney","year":"2003","unstructured":"Carney D, \u00c7etintemel U, Rasin A, Zdonik S, Cherniack M, Stonebraker M. Operator scheduling in a data stream manager. In: Proceedings of the 29th International Conference on Very Large Data Bases. 2003, 838\u2013849"},{"issue":"4","key":"40710_CR7","doi-asserted-by":"publisher","first-page":"517","DOI":"10.1007\/s00778-013-0335-9","volume":"23","author":"B Gedik","year":"2014","unstructured":"Gedik B. Partitioning functions for stateful data parallelism in stream processing. The VLDB Journal, 2014, 23(4): 517\u2013539","journal-title":"The VLDB Journal"},{"key":"40710_CR8","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1145\/2675743.2771827","volume-title":"Proceedings of the 9th ACM International Conference on Distributed Event-Based Systems","author":"N Rivetti","year":"2015","unstructured":"Rivetti N, Querzoni L, Anceaume E, Busnel Y, Sericola B. Efficient key grouping for near-optimal load balancing in stream processing systems. In: Proceedings of the 9th ACM International Conference on Distributed Event-Based Systems. 2015, 80\u201391"},{"issue":"4","key":"40710_CR9","doi-asserted-by":"publisher","first-page":"1061","DOI":"10.1109\/TPDS.2016.2603510","volume":"28","author":"R Tolosana-Calasanz","year":"2017","unstructured":"Tolosana-Calasanz R, Diaz-Montes J, Rana O F, Parashar M. Feedback-control & queueing theory-based resource management for streaming applications. IEEE Transactions on Parallel and Distributed Systems, 2017, 28(4): 1061\u20131075","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"40710_CR10","first-page":"723","volume-title":"Proceedings of the 31st IEEE International Conference on Data Engineering","author":"Y Wu","year":"2015","unstructured":"Wu Y, Tan K L. ChronoStream: elastic stateful stream computation in the cloud. In: Proceedings of the 31st IEEE International Conference on Data Engineering. 2015, 723\u2013734"},{"issue":"2","key":"40710_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3572779","volume":"19","author":"J Li","year":"2023","unstructured":"Li J, Wang Q, Lee P P C, Shi C. An in-depth comparative analysis of cloud block storage workloads: findings and implications. ACM Transactions on Storage, 2023, 19(2): 1\u201332","journal-title":"ACM Transactions on Storage"},{"key":"40710_CR12","first-page":"279","volume-title":"Proceedings of the 20th IEEE International Parallel & Distributed Processing Symposium","author":"C Ding","year":"2006","unstructured":"Ding C, Dwarkadas S, Huang M C, Shen K, Carter J B. Program phase detection and exploitation. In: Proceedings of the 20th IEEE International Parallel & Distributed Processing Symposium. 2006, 279\u2013286"},{"key":"40710_CR13","first-page":"111","volume-title":"Proceedings of the International Symposium on Code Generation and Optimization","author":"P Nagpurkar","year":"2006","unstructured":"Nagpurkar P, Hind P, Krintz C, Sweeney P F, Rajan V T. Online phase detection algorithms. In: Proceedings of the International Symposium on Code Generation and Optimization. 2006, 111\u2013123"},{"key":"40710_CR14","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1016\/j.compeleceng.2014.09.005","volume":"42","author":"S Esfandiarpoor","year":"2015","unstructured":"Esfandiarpoor S, Pahlavan A, Goudarzi M. Structure-aware online virtual machine consolidation for datacenter energy improvement in cloud computing. Computers & Electrical Engineering, 2015, 42: 74\u201389","journal-title":"Computers & Electrical Engineering"},{"key":"40710_CR15","doi-asserted-by":"publisher","first-page":"102064","DOI":"10.1016\/j.sysarc.2021.102064","volume":"116","author":"V Meyer","year":"2021","unstructured":"Meyer V, Kirchoff D F, Da Silva M L, De Rose C A F. ML-driven classification scheme for dynamic interference-aware resource scheduling in cloud infrastructures. Journal of Systems Architecture, 2021, 116: 102064","journal-title":"Journal of Systems Architecture"},{"issue":"4","key":"40710_CR16","doi-asserted-by":"publisher","first-page":"1313","DOI":"10.1109\/TPDS.2023.3240567","volume":"34","author":"D Saxena","year":"2023","unstructured":"Saxena D, Kumar J, Singh A K, Schmid S. Performance analysis of machine learning centered workload prediction models for cloud. IEEE Transactions on Parallel and Distributed Systems, 2023, 34(4): 1313\u20131330","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"40710_CR17","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1016\/j.neunet.2021.10.021","volume":"145","author":"A Ali","year":"2022","unstructured":"Ali A, Zhu Y, Zakarya M. Exploiting dynamic spatio-temporal graph convolutional neural networks for citywide traffic flows prediction. Neural Networks, 2022, 145: 233\u2013247","journal-title":"Neural Networks"},{"key":"40710_CR18","doi-asserted-by":"publisher","first-page":"1010","DOI":"10.1016\/j.renene.2023.01.118","volume":"205","author":"T Limouni","year":"2023","unstructured":"Limouni T, Yaagoubi R, Bouziane K, Guissi K, Baali E H. Accurate one step and multistep forecasting of very short-term PV power using LSTM-TCN model. Renewable Energy, 2023, 205: 1010\u20131024","journal-title":"Renewable Energy"},{"issue":"1","key":"40710_CR19","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1007\/s10586-020-03214-y","volume":"26","author":"L Ruan","year":"2023","unstructured":"Ruan L, Bai Y, Li S, He S, Xiao L. Workload time series prediction in storage systems: a deep learning based approach. Cluster Computing, 2023, 26(1): 25\u201335","journal-title":"Cluster Computing"},{"issue":"3","key":"40710_CR20","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1145\/2818716","volume":"12","author":"J Basak","year":"2016","unstructured":"Basak J, Wadhwani K, Voruganti K. Storage workload identification. ACM Transactions on Storage, 2016, 12(3): 14","journal-title":"ACM Transactions on Storage"},{"key":"40710_CR21","doi-asserted-by":"publisher","first-page":"101681","DOI":"10.1016\/j.cose.2019.101681","volume":"89","author":"J Zhang","year":"2020","unstructured":"Zhang J, Ling Y, Fu X, Yang X, Xiong G, Zhang R. Model of the intrusion detection system based on the integration of spatial-temporal features. Computers & Security, 2020, 89: 101681","journal-title":"Computers & Security"},{"key":"40710_CR22","first-page":"11","volume-title":"Proceedings of the 4th USENIX Workshop on Hot Topics in Storage and File Systems","author":"P Pipada","year":"2012","unstructured":"Pipada P, Kundu A, Gopinath K, Bhattacharyya C, Susarla S, Nagesh P C. LoadIQ: Learning to identify workload phases from a live storage trace. In: Proceedings of the 4th USENIX Workshop on Hot Topics in Storage and File Systems. 2012, 11\u201315"},{"key":"40710_CR23","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1145\/342009.335420","volume-title":"Proceedings of 2000 ACM SIGMOD International Conference on Management of Data","author":"R Avnur","year":"2000","unstructured":"Avnur R, Hellerstein J M. Eddies: continuously adaptive query processing. In: Proceedings of 2000 ACM SIGMOD International Conference on Management of Data. 2000, 261\u2013272"},{"key":"40710_CR24","volume-title":"Google Cloud Blog","author":"E Kirpichov","year":"2016","unstructured":"Kirpichov E, Denielou M. No shard left behind: dynamic work rebalancing in Google cloud dataflow. Google Cloud Blog, 2016"},{"key":"40710_CR25","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1145\/2723372.2742788","volume-title":"Proceedings of 2015 ACM SIGMOD International Conference on Management of Data","author":"S Kulkarni","year":"2015","unstructured":"Kulkarni S, Bhagat N, Fu M, Kedigehalli V, Kellogg C, Mittal S, Patel J M, Ramasamy K, Taneja S. Twitter heron: stream processing at scale. In: Proceedings of 2015 ACM SIGMOD International Conference on Management of Data. 2015, 239\u2013250"},{"issue":"6","key":"40710_CR26","doi-asserted-by":"publisher","first-page":"1447","DOI":"10.1109\/TPDS.2013.295","volume":"25","author":"B Gedik","year":"2014","unstructured":"Gedik B, Schneider S, Hirzel M, Wu K L. Elastic scaling for data stream processing. IEEE Transactions on Parallel and Distributed Systems, 2014, 25(6): 1447\u20131463","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"issue":"4","key":"40710_CR27","first-page":"28","volume":"36","author":"P Carbone","year":"2015","unstructured":"Carbone P, Katsifodimos A, Ewen S, Markl V, Haridi S, Tzoumas K. Apache flink: stream and batch processing in a single engine. The Bulletin of the Technical Committee on Data Engineering, 2015, 36(4): 28\u201338","journal-title":"The Bulletin of the Technical Committee on Data Engineering"},{"key":"40710_CR28","volume-title":"An hmm applied to semi-online program phase analysis","author":"M Otte","year":"2007","unstructured":"Otte M, Richardson S. An hmm applied to semi-online program phase analysis. Boulder: University of Colorado Boulder, 2007"},{"key":"40710_CR29","first-page":"97","volume-title":"Proceedings of 2006 USENIX Annual Technical Conference","author":"A Riska","year":"2006","unstructured":"Riska A, Riedel E. Disk drive level workload characterization. In: Proceedings of 2006 USENIX Annual Technical Conference. 2006, 97\u2013102"},{"key":"40710_CR30","first-page":"297","volume-title":"Proceedings of the 5th International Conference on Quantitative Evaluation of Systems","author":"X Zhang","year":"2008","unstructured":"Zhang X, Riska A, Riedel E. Characterization of the e-commerce storage subsystem workload. In: Proceedings of the 5th International Conference on Quantitative Evaluation of Systems. 2008, 297\u2013306"},{"key":"40710_CR31","first-page":"622","volume-title":"Proceedings of the 11th ACM CIKM International Conference on Information and Knowledge Management","author":"S Elnaffar","year":"2002","unstructured":"Elnaffar S, Martin P, Horman R. Automatically classifying database workloads. In: Proceedings of the 11th ACM CIKM International Conference on Information and Knowledge Management. 2002, 622\u2013624"},{"key":"40710_CR32","first-page":"334","volume-title":"Proceedings of the International Conference on Convergence and Hybrid Information Technology","author":"J S Oh","year":"2008","unstructured":"Oh J S, Choi K S, Kwon J R, Lee S H. Finding the near workload type between TPC-C and TPC-W environments. In: Proceedings of the International Conference on Convergence and Hybrid Information Technology. 2008, 334\u2013337"},{"key":"40710_CR33","first-page":"183","volume-title":"Proceedings of the 8th USENIX Conference on File and Storage Technologies","author":"N J Yadwadkar","year":"2010","unstructured":"Yadwadkar N J, Bhattacharyya C, Gopinath K, Niranjan T, Susarla S. Discovery of application workloads from network file traces. In: Proceedings of the 8th USENIX Conference on File and Storage Technologies. 2010, 183\u2013196"},{"key":"40710_CR34","first-page":"213","volume-title":"Proceedings of the 12th USENIX Conference on File and Storage Technologies","author":"Y Liu","year":"2014","unstructured":"Liu Y, Gunasekaran R, Ma X, Vazhkudai S S. Automatic identification of application I\/O signatures from noisy server-side traces. In: Proceedings of the 12th USENIX Conference on File and Storage Technologies. 2014, 213\u2013228"},{"key":"40710_CR35","first-page":"1","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021","author":"C G Northcutt","year":"2021","unstructured":"Northcutt C G, Athalye A, Mueller J. Pervasive label errors in test sets destabilize machine learning benchmarks. In: Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021. 2021, 1\u201313"},{"key":"40710_CR36","volume-title":"Applied Time Series Analysis: A Practical Guide to Modeling and Forecasting","author":"T C Mills","year":"2019","unstructured":"Mills T C. Applied Time Series Analysis: A Practical Guide to Modeling and Forecasting. Amsterdam: Elsevier, 2019"},{"key":"40710_CR37","volume-title":"A course in time series analysis. volume 409","author":"D Pena","year":"2001","unstructured":"Pena D, Tiao G C, Tsay R S. A course in time series analysis. volume 409. Wiley Online Library, 2001"},{"issue":"1","key":"40710_CR38","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1023\/A:1022643204877","volume":"1","author":"J R Quinlan","year":"1986","unstructured":"Quinlan J R. Induction of decision trees. Machine Learning, 1986, 1(1): 81\u2013106","journal-title":"Machine Learning"},{"key":"40710_CR39","first-page":"6639","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","author":"L Prokhorenkova","year":"2018","unstructured":"Prokhorenkova L, Gusev G, Vorobev A, Dorogush A V, Gulin A. CatBoost: unbiased boosting with categorical features. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems. 2018, 6639\u20136649"},{"issue":"2","key":"40710_CR40","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1214\/aos\/1016218223","volume":"28","author":"J Friedman","year":"2000","unstructured":"Friedman J, Hastie T, Tibshirani R. Additive logistic regression: a statistical view of boosting (with discussion and a rejoinder by the authors). The Annals of Statistics, 2000, 28(2): 337\u2013407","journal-title":"The Annals of Statistics"},{"issue":"5","key":"40710_CR41","doi-asserted-by":"publisher","first-page":"1189","DOI":"10.1214\/aos\/1013203451","volume":"29","author":"J H Friedman","year":"2001","unstructured":"Friedman J H. Greedy function approximation: a gradient boosting machine. The Annals of Statistics, 2001, 29(5): 1189\u20131232","journal-title":"The Annals of Statistics"},{"key":"40710_CR42","doi-asserted-by":"publisher","first-page":"785","DOI":"10.1145\/2939672.2939785","volume-title":"Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":"T Chen","year":"2016","unstructured":"Chen T, Guestrin C. XGBoost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2016, 785\u2013794"},{"key":"40710_CR43","first-page":"3149","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","author":"G Ke","year":"2017","unstructured":"Ke G, Meng Q, Finley T, Wang T, Chen W, Ma W, Ye Q, Liu T Y. LightGBM: a highly efficient gradient boosting decision tree. In: Proceedings of the 31st International Conference on Neural Information Processing Systems. 2017, 3149\u20133157"},{"issue":"4","key":"40710_CR44","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1023\/A:1008306431147","volume":"13","author":"D R Jones","year":"1998","unstructured":"Jones D R, Schonlau M, Welch W J. Efficient global optimization of expensive black-box functions. Journal of Global Optimization, 1998, 13(4): 455\u2013492","journal-title":"Journal of Global Optimization"},{"key":"40710_CR45","first-page":"5668","volume-title":"Proceedings of the 33rd AAAI Conference on Artificial Intelligence","author":"H Yao","year":"2018","unstructured":"Yao H, Tang X, Wei H, Zheng G, Li Z. Revisiting spatial-temporal similarity: a deep learning framework for traffic prediction. In: Proceedings of the 33rd AAAI Conference on Artificial Intelligence. 2018, 5668\u20135675"},{"issue":"3","key":"40710_CR46","doi-asserted-by":"publisher","first-page":"1411","DOI":"10.1109\/TSC.2020.2993728","volume":"15","author":"J Gao","year":"2022","unstructured":"Gao J, Wang H, Shen H. Task failure prediction in cloud data centers using deep learning. IEEE Transactions on Services Computing, 2022, 15(3): 1411\u20131422","journal-title":"IEEE Transactions on Services Computing"},{"key":"40710_CR47","unstructured":"Bai S, Kolter J Z, Koltun V. An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. 2018, arXiv preprint arXiv: 1803.01271"},{"issue":"3","key":"40710_CR48","doi-asserted-by":"publisher","first-page":"284","DOI":"10.12720\/jait.13.3.284-289","volume":"13","author":"A Mrhari","year":"2022","unstructured":"Mrhari A, Hadi Y. Workload prediction using VMD and TCN in cloud computing. Journal of Advances in Information Technology, 2022, 13(3): 284\u2013289","journal-title":"Journal of Advances in Information Technology"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-024-40710-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-024-40710-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-024-40710-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T16:02:25Z","timestamp":1764691345000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-024-40710-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,2]]},"references-count":48,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2026,5]]}},"alternative-id":["40710"],"URL":"https:\/\/doi.org\/10.1007\/s11704-024-40710-5","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"type":"print","value":"2095-2228"},{"type":"electronic","value":"2095-2236"}],"subject":[],"published":{"date-parts":[[2025,12,2]]},"assertion":[{"value":"10 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare that they have no competing interests or financial conflicts to disclose.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"2005102"}}