{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T15:20:55Z","timestamp":1772205655983,"version":"3.50.1"},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,5,30]],"date-time":"2023-05-30T00:00:00Z","timestamp":1685404800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,5,30]],"date-time":"2023-05-30T00:00:00Z","timestamp":1685404800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172419"],"award-info":[{"award-number":["62172419"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s42514-023-00153-z","type":"journal-article","created":{"date-parts":[[2023,5,30]],"date-time":"2023-05-30T09:04:32Z","timestamp":1685437472000},"page":"206-220","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Compressed data direct computing for Chinese dataset on DCU"],"prefix":"10.1007","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8933-6787","authenticated-orcid":false,"given":"Yani","family":"Liu","sequence":"first","affiliation":[]},{"given":"Feng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zaifeng","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Xiaoguang","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Yihua","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Xiao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xiaoyong","family":"Du","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,5,30]]},"reference":[{"key":"153_CR1","unstructured":"Agarwal, R., Khandelwal, A., Stoica, I.: Succinct: enabling queries on compressed data. In: Symposium on Networked Systems Design and Implementation (2015)"},{"key":"153_CR2","unstructured":"Ahmad, F., Lee, S., Thottethodi, M., et\u00a0al.: Puma: Purdue mapreduce benchmarks suite (2012)"},{"key":"153_CR3","doi-asserted-by":"publisher","unstructured":"Bille, P., Landau, G.M., Raman, R., et\u00a0al.: Random access to grammar-compressed strings (2010). ArXiv arXiv:1001.1565. https:\/\/doi.org\/10.1137\/130936889","DOI":"10.1137\/130936889"},{"key":"153_CR4","doi-asserted-by":"publisher","unstructured":"Blumenstock, J.E.: Size matters: word count as a measure of quality on wikipedia. In: The Web Conference (2008). https:\/\/doi.org\/10.1145\/1367497.1367673","DOI":"10.1145\/1367497.1367673"},{"key":"153_CR5","doi-asserted-by":"publisher","unstructured":"Burrows, M., Wheeler, D.J.: A block-sorting lossless data compression algorithm (1994). https:\/\/doi.org\/10.1109\/DCC.1997.582137","DOI":"10.1109\/DCC.1997.582137"},{"key":"153_CR6","doi-asserted-by":"crossref","unstructured":"Chang, P.C., Galley, M., Manning, C.D.: Optimizing Chinese word segmentation for machine translation performance. In: Proceedings of the Third Workshop on Statistical Machine Translation, pp. 224\u2013232 (2008)","DOI":"10.3115\/1626394.1626430"},{"key":"153_CR7","doi-asserted-by":"publisher","first-page":"2554","DOI":"10.1109\/TIT.2005.850116","volume":"51","author":"M Charikar","year":"2005","unstructured":"Charikar, M., Lehman, E., Liu, D., et al.: The smallest grammar problem. IEEE Trans. Inf. Theory 51, 2554\u20132576 (2005). https:\/\/doi.org\/10.1109\/TIT.2005.850116","journal-title":"IEEE Trans. Inf. Theory"},{"key":"153_CR8","unstructured":"Chinese government website. http:\/\/www.gov.cn (2022)"},{"key":"153_CR9","doi-asserted-by":"publisher","unstructured":"Fan, W., Li, J., Wang, X., et\u00a0al.: Query preserving graph compression. In: Proceedings of the 2012 ACM SIGMOD International Conference on Management of Data https:\/\/doi.org\/10.1145\/2213836.2213855 (2012)","DOI":"10.1145\/2213836.2213855"},{"key":"153_CR10","doi-asserted-by":"publisher","unstructured":"Farruggia, A., Ferragina, P., Venturini, R.: Bicriteria data compression: efficient and usable. In: ESA (2014). https:\/\/doi.org\/10.1007\/978-3-662-44777-2_34","DOI":"10.1007\/978-3-662-44777-2_34"},{"key":"153_CR11","doi-asserted-by":"publisher","unstructured":"Ferragina, P., Nitto, I., Venturini, R.: On the bit-complexity of lempel-ziv compression. SIAM J. Comput. (2009). https:\/\/doi.org\/10.1137\/1.9781611973068.84","DOI":"10.1137\/1.9781611973068.84"},{"key":"153_CR12","doi-asserted-by":"publisher","unstructured":"Ferragina, P., Venturini, R.: Indexing compressed text. JACM. https:\/\/doi.org\/10.1145\/1082036.1082039 (2005)","DOI":"10.1145\/1082036.1082039"},{"key":"153_CR13","doi-asserted-by":"publisher","unstructured":"Grossi, R., Vitter, J.S.: Compressed suffix arrays and suffix trees with applications to text indexing and string matching. SIAM J. Comput. 35:378\u2013407. https:\/\/doi.org\/10.1145\/335305.335351 (2005)","DOI":"10.1145\/335305.335351"},{"key":"153_CR14","unstructured":"He, H., Hanlp: Han language processing. https:\/\/www.githubcom\/hankcs\/HanLP (2014)"},{"key":"153_CR15","unstructured":"Hon, W.K., Lam, T.W., Sung, W.K., et\u00a0al.: Practical aspects of compressed suffix arrays and fm-index in searching dna sequences. In: ALENEX\/ANALC (2004)"},{"key":"153_CR16","unstructured":"Hpccube. https:\/\/www.hpccube.com\/ac\/home\/index.html (2021)"},{"key":"153_CR17","doi-asserted-by":"publisher","unstructured":"Hu, B., Chen, Q., Zhu, F., Lcsts: a large scale chinese short text summarization dataset. In: Conference on Empirical Methods in Natural Language Processing (2015). https:\/\/doi.org\/10.48550\/arXiv.1506.05865","DOI":"10.48550\/arXiv.1506.05865"},{"key":"153_CR18","unstructured":"Hu, W.: Research on dcu-oriented polyhedron compiler optimization technology. Zhengzhou University (2021)"},{"key":"153_CR19","unstructured":"Hygon website. https:\/\/www.hygon.cn\/index (2022)"},{"key":"153_CR20","unstructured":"Jin, J.: Optimization and implementation of complex matrix multiplication based on hygon dcu. Chin. Acad. Sci. (2020)"},{"key":"153_CR21","doi-asserted-by":"publisher","unstructured":"Lebart, L.: Classification problems in text analysis and information retrieval. https:\/\/doi.org\/10.1007\/978-3-642-72253-0_63 (1998)","DOI":"10.1007\/978-3-642-72253-0_63"},{"key":"153_CR22","doi-asserted-by":"crossref","unstructured":"Li, J., Rotem, D., Srivastava, J.: Aggregation algorithms for very large compressed data warehouses. In: Very Large Data Bases Conference (1999)","DOI":"10.1007\/BF02948809"},{"key":"153_CR23","unstructured":"Li, J., Rotem, D., Wong, H.K.T.: A new compression method with fast searching on large databases. In: Very Large Data Bases Conference (1987)"},{"key":"153_CR24","doi-asserted-by":"publisher","unstructured":"Li, Z., Sun, M.: Punctuation as implicit annotations for Chinese word segmentation. Comput. Ling. 35(4):505\u2013512 (2009). https:\/\/doi.org\/10.1162\/coli.2009.35.4.35403","DOI":"10.1162\/coli.2009.35.4.35403"},{"key":"153_CR25","doi-asserted-by":"publisher","first-page":"515","DOI":"10.1109\/TKDE.2002.1000340","volume":"14","author":"J Li","year":"2002","unstructured":"Li, J., Srivastava, J.: Efficient aggregation algorithms for compressed data warehouses. IEEE Trans. Knowl. Data Eng. 14, 515\u2013529 (2002). https:\/\/doi.org\/10.1109\/TKDE.2002.1000340","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"153_CR26","doi-asserted-by":"publisher","unstructured":"Manber, U., Myers, E.W.: Suffix arrays: a new method for on-line string searches. SIAM J. Comput. 22:935\u2013948 (1993). https:\/\/doi.org\/10.1137\/0222058","DOI":"10.1137\/0222058"},{"key":"153_CR27","doi-asserted-by":"publisher","unstructured":"Matsuo, Y., Ishizuka, M.: Keyword extraction from a single document using word co-occurrence statistical information. Int. J. Artif. Intell. Tools (2004). https:\/\/doi.org\/10.1142\/S0218213004001466","DOI":"10.1142\/S0218213004001466"},{"key":"153_CR28","doi-asserted-by":"publisher","unstructured":"Moffat, A., Petri, M.: Index compression using byte-aligned ans coding and two-dimensional contexts. In: Proceedings of the Eleventh ACM International Conference on Web Search and Data Mining (2018). https:\/\/doi.org\/10.1145\/3159652.3159663","DOI":"10.1145\/3159652.3159663"},{"key":"153_CR29","unstructured":"More relevant data. https:\/\/www.heywhale.com\/home\/dataset (2022)"},{"key":"153_CR32","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.374","volume":"7","author":"CG Nevill-Manning","year":"1997","unstructured":"Nevill-Manning, C.G., Witten, I.H.: Identifying hierarchical structure in sequences: a linear-time algorithm. J. Artif. Intell. Res. 7, 67\u201382 (1997a). https:\/\/doi.org\/10.1613\/jair.374","journal-title":"J. Artif. Intell. Res."},{"key":"153_CR30","doi-asserted-by":"publisher","unstructured":"Nevill-Manning, C.G., Witten, I.H.: Linear-time, incremental hierarchy inference for compression. In: Proceedings DCC\u201997. Data Compression Conference, IEEE, pp. 3\u201311 (1997b). https:\/\/doi.org\/10.1109\/DCC.1997.581951","DOI":"10.1109\/DCC.1997.581951"},{"key":"153_CR31","unstructured":"Nevill-Manning, C.: Inferring sequential structure (1998)"},{"key":"153_CR33","doi-asserted-by":"publisher","first-page":"1553","DOI":"10.1109\/TPDS.2021.3119402","volume":"33","author":"Z Pan","year":"2022","unstructured":"Pan, Z., Zhang, F., Zhou, Y., et al.: Exploring data analytics without decompression on embedded GPU systems. IEEE Trans. Parallel Distrib. Syst. 33, 1553\u20131568 (2022). https:\/\/doi.org\/10.1109\/TPDS.2021.3119402","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"153_CR34","unstructured":"Pennebaker, J.W., Francis, M.E., Booth, R.J.: Linguistic Inquiry and Word Count: Liwc 2001. Lawrence Erlbaum Associates, Mahway 71(2001):2001 (2001)"},{"key":"153_CR35","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.D.: Glove: global vectors for word representation. In: Conference on Empirical Methods in Natural Language Processing (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"153_CR36","doi-asserted-by":"publisher","unstructured":"Petri, M., Moffat, A: Compact inverted index storage using general-purpose compression libraries. Software: Practice and Experience 48:974\u2013982. https:\/\/doi.org\/10.1002\/spe.2556 (2018)","DOI":"10.1002\/spe.2556"},{"key":"153_CR37","doi-asserted-by":"publisher","unstructured":"Pibiri, G.E., Petri, M., Moffat, A.: Fast dictionary-based compression for inverted indexes. In: Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining (2019). https:\/\/doi.org\/10.1145\/3289600.3290962","DOI":"10.1145\/3289600.3290962"},{"key":"153_CR38","doi-asserted-by":"publisher","unstructured":"Rong, X.: word2vec parameter learning explained (2014). ArXiv arXiv:1411.2738. https:\/\/doi.org\/10.48550\/arXiv.1411.2738","DOI":"10.48550\/arXiv.1411.2738"},{"key":"153_CR39","doi-asserted-by":"publisher","unstructured":"Rytter, W.: Grammar compression, lz-encodings, and string algorithms with implicit input. In: International Colloquium on Automata, Languages and Programming (2004). https:\/\/doi.org\/10.1007\/978-3-540-27836-8_5","DOI":"10.1007\/978-3-540-27836-8_5"},{"key":"153_CR40","unstructured":"Sogounews. http:\/\/www.sogoucom\/labs\/resource\/csphp (2012)"},{"key":"153_CR41","unstructured":"Sun, J.J.: Chinese word segmentation tool (2012)"},{"issue":"1","key":"153_CR44","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1109\/TSC.2016.2531698","volume":"11","author":"S Tang","year":"2016","unstructured":"Tang, S., Lee, B.S., He, B.: Fair resource allocation for data-intensive computing in the cloud. IEEE Trans. Serv. Comput. 11(1), 20\u201333 (2016)","journal-title":"IEEE Trans. Serv. Comput."},{"key":"153_CR43","doi-asserted-by":"crossref","unstructured":"Tang, S., Yu, C., Sun, C., et\u00a0al.: Qknober: a knob-based fairness-efficiency scheduler for cloud computing with qos guarantees. In: Service-Oriented Computing: 16th International Conference, ICSOC 2018, Hangzhou, China, November 12\u201315, 2018, Proceedings 16, Springer, pp. 837\u2013853 (2018)","DOI":"10.1007\/978-3-030-03596-9_60"},{"key":"153_CR42","doi-asserted-by":"crossref","unstructured":"Tang, S., Chai, Q., Yu, C., et\u00a0al.: Balancing fairness and efficiency for cache sharing in semi-external memory system. In: Proceedings of the 49th International Conference on Parallel Processing, pp. 1\u201311 (2020a)","DOI":"10.1145\/3404397.3404450"},{"issue":"1","key":"153_CR45","first-page":"71","volume":"34","author":"S Tang","year":"2020","unstructured":"Tang, S., He, B., Yu, C., et al.: A survey on spark ecosystem: big data processing infrastructure, machine learning, and applications. IEEE Trans. Knowl. Data Eng. 34(1), 71\u201391 (2020b)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"3","key":"153_CR46","doi-asserted-by":"publisher","first-page":"1806","DOI":"10.1109\/TCC.2020.3021084","volume":"10","author":"S Tang","year":"2020","unstructured":"Tang, S., Yu, C., Li, Y.: Fairness-efficiency scheduling for cloud computing with soft fairness guarantees. IEEE Trans. Cloud Comput. 10(3), 1806\u20131818 (2020c)","journal-title":"IEEE Trans. Cloud Comput."},{"key":"153_CR47","unstructured":"Thucnews. http:\/\/www.thuctcthunlporg\/message (2016)"},{"key":"153_CR48","doi-asserted-by":"crossref","unstructured":"Thulac website. http:\/\/thulac.thunlp.org\/ (2016)","DOI":"10.22233\/20412495.1116.24"},{"key":"153_CR49","unstructured":"Uci machine learning repository. http:\/\/www.archive.ics.uci.edu\/ml (2021)"},{"key":"153_CR50","unstructured":"Wang, Y., Hu, H., William, T.: Performance evaluation of sugon exascale prototype with gtc-p. Comput. Eng. Sci. 42, 1 (2020)"},{"key":"153_CR51","unstructured":"Wang, J., Liang, Y.: A summary of Chinese word segmentation research. Software Guide (2021)"},{"key":"153_CR52","unstructured":"Wang, R.: Snownlp python package (2018)"},{"key":"153_CR53","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1016\/j.dss.2017.11.001","volume":"105","author":"Y Wang","year":"2018","unstructured":"Wang, Y., Xu, W.: Leveraging deep learning with lda-based text analytics to detect automobile insurance fraud. Decis. Support Syst. 105, 87\u201395 (2018). https:\/\/doi.org\/10.1016\/j.dss.2017.11.001","journal-title":"Decis. Support Syst."},{"key":"153_CR54","unstructured":"Wikipedia data download. https:\/\/dumps.wikimedia.org\/ (2021)"},{"key":"153_CR55","doi-asserted-by":"publisher","unstructured":"Xie, J., Hu, W., Han, L.: Quantum fourier transform simulation based on \u201csongshan\u201d supercomputer system. Comput. Eng. Sci (2021). https:\/\/doi.org\/10.11896\/jsjkx.201200023","DOI":"10.11896\/jsjkx.201200023"},{"key":"153_CR56","unstructured":"Zernik, U.: Lexical acquisition: exploiting on-line resources to build a lexicon (1991)"},{"issue":"3","key":"153_CR63","doi-asserted-by":"publisher","first-page":"905","DOI":"10.1109\/TPDS.2016.2586074","volume":"28","author":"F Zhang","year":"2017","unstructured":"Zhang, F., Zhai, J., He, B., et al.: Understanding co-running behaviors on integrated cpu\/gpu architectures. IEEE Trans. Parallel Distrib. Syst. 28(3), 905\u2013918 (2017). https:\/\/doi.org\/10.1109\/TPDS.2016.2586074","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"153_CR59","doi-asserted-by":"publisher","unstructured":"Zhang, F., Zhai, J., Shen, X., et\u00a0al.: Efficient document analytics on compressed data: method, challenges, algorithms, insights. In: Proceedings of the VLDB Endowment, vol. 11(11), pp. 1522\u20131535. https:\/\/doi.org\/10.14778\/3236187.3236203 (2018a)","DOI":"10.14778\/3236187.3236203"},{"key":"153_CR61","doi-asserted-by":"publisher","unstructured":"Zhang, F., Zhai, J., Shen, X., et\u00a0al.: Zwift: a programming framework for high performance text analytics on compressed data. In: Proceedings of the 2018 International Conference on Supercomputing, pp. 195\u2013206 (2018b). https:\/\/doi.org\/10.1145\/3205289.3205325","DOI":"10.1145\/3205289.3205325"},{"key":"153_CR60","doi-asserted-by":"publisher","unstructured":"Zhang, F., Zhai, J., Shen, X., et\u00a0al.: Enabling efficient random access to hierarchically-compressed data. In: 2020 IEEE 36th International Conference on Data Engineering (ICDE), IEEE, pp. 1069\u20131080. https:\/\/doi.org\/10.1109\/ICDE48307.2020.00097 (2020)","DOI":"10.1109\/ICDE48307.2020.00097"},{"key":"153_CR57","doi-asserted-by":"publisher","unstructured":"Zhang, F., Pan, Z., Zhou, Y., et\u00a0al.: G-tadoc: enabling efficient gpu-based text analytics without decompression. In: 2021 IEEE 37th International Conference on Data Engineering (ICDE), IEEE, pp. 1679\u20131690 (2021a). https:\/\/doi.org\/10.1109\/ICDE51399.2021.00148","DOI":"10.1109\/ICDE51399.2021.00148"},{"issue":"2","key":"153_CR64","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1007\/s00778-020-00636-3","volume":"30","author":"F Zhang","year":"2021","unstructured":"Zhang, F., Zhai, J., Shen, X., et al.: Tadoc: text analytics directly on compression. VLDB J. 30(2), 163\u2013188 (2021b). https:\/\/doi.org\/10.1007\/s00778-020-00636-3","journal-title":"VLDB J."},{"key":"153_CR58","doi-asserted-by":"publisher","unstructured":"Zhang, F., Wan, W., Zhang, C., et\u00a0al.: CompressDB: enabling efficient compressed data direct processing for various databases. In: Proceedings of the 2022 International Conference on Management of Data. https:\/\/doi.org\/10.1145\/3514221.3526130 (2022a)","DOI":"10.1145\/3514221.3526130"},{"key":"153_CR65","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1109\/TPDS.2021.3093234","volume":"33","author":"F Zhang","year":"2022","unstructured":"Zhang, F., Zhai, J., Shen, X., et al.: POCLib: a high-performance framework for enabling near orthogonal processing on compression. IEEE Trans. Parallel Distrib. Syst. 33, 459\u2013475 (2022b). https:\/\/doi.org\/10.1109\/TPDS.2021.3093234","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"153_CR62","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhang, F., Li, H., et\u00a0al.: CompressStreamDB: fine-grained adaptive stream processing without decompression. In: 39th IEEE International Conference on Data Engineering (ICDE 2023) (2023)","DOI":"10.1109\/ICDE55515.2023.00038"}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-023-00153-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-023-00153-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-023-00153-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T09:03:42Z","timestamp":1715677422000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-023-00153-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,30]]},"references-count":65,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["153"],"URL":"https:\/\/doi.org\/10.1007\/s42514-023-00153-z","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"value":"2524-4922","type":"print"},{"value":"2524-4930","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,5,30]]},"assertion":[{"value":"29 January 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 May 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 May 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}