{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T20:57:17Z","timestamp":1772830637879,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":13,"publisher":"ACM","license":[{"start":{"date-parts":[[2010,10,26]],"date-time":"2010-10-26T00:00:00Z","timestamp":1288051200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2010,10,26]]},"DOI":"10.1145\/1871840.1871853","type":"proceedings-article","created":{"date-parts":[[2010,10,28]],"date-time":"2010-10-28T14:43:19Z","timestamp":1288276999000},"page":"81-88","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["Tokenizing micro-blogging messages using a text classification approach"],"prefix":"10.1145","author":[{"given":"Gustavo","family":"Laboreiro","sequence":"first","affiliation":[{"name":"LIACC - Faculdade de Engenharia da Faculdade do Porto, Porto, Portugal"}]},{"given":"Lu\u00eds","family":"Sarmento","sequence":"additional","affiliation":[{"name":"Labs SAPO and LIACC - Faculdade de Engenharia da Faculdade do Porto, Porto, Portugal"}]},{"given":"Jorge","family":"Teixeira","sequence":"additional","affiliation":[{"name":"Labs SAPO and LIACC - Faculdade de Engenharia da Faculdade do Porto, Porto, Portugal"}]},{"given":"Eug\u00e9nio","family":"Oliveira","sequence":"additional","affiliation":[{"name":"LIACC - Faculdade de Engenharia da Faculdade do Porto, Porto, Portugal"}]}],"member":"320","published-online":{"date-parts":[[2010,10,26]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390749.1390761"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390749.1390756"},{"key":"e_1_3_2_1_3_1","first-page":"427","volume-title":"Proceedings First International Conference on Language Resources and Evaluation","author":"Habert B.","year":"1998","unstructured":"B. Habert , G. Adda , M. Adda-Decker , P. B. de Mar\u00ebuil , S. Ferrari , O. Ferret , G. Illouz , and P. Paroubek . Towards tokenization evaluation. In A. Rubio, N. Gallardo, R. Castro, and A. Tejada, editors . Proceedings First International Conference on Language Resources and Evaluation , volume I , pages 427 -- 431 , Granada, may 1998 . B. Habert, G. Adda, M. Adda-Decker, P. B. de Mar\u00ebuil, S. Ferrari, O. Ferret, G. Illouz, and P. Paroubek. Towards tokenization evaluation. In A. Rubio, N. Gallardo, R. Castro, and A. Tejada, editors. Proceedings First International Conference on Language Resources and Evaluation, volume I, pages 427--431, Granada, may 1998."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390749.1390755"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.5555\/645326.649721"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073336.1073361"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/146370.146380"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1516360.1516442"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.3115\/1075218.1075234"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/1628960.1628969"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-007-0056-y"},{"issue":"2","key":"e_1_3_2_1_12_1","first-page":"55","article-title":"Chinese word segmentation based on large margin methods","volume":"19","author":"Tang B.","year":"2009","unstructured":"B. Tang , X. Wang , and X. Wang . Chinese word segmentation based on large margin methods . International Journal of Asian Language Processing , 19 ( 2 ): 55 -- 68 , 2009 . B. Tang, X. Wang, and X. Wang. Chinese word segmentation based on large margin methods. International Journal of Asian Language Processing, 19(2):55--68, 2009.","journal-title":"International Journal of Asian Language Processing"},{"key":"e_1_3_2_1_13_1","first-page":"49","volume-title":"PACLING 2007 -- Proceedings of the 10th Conference of the Pacific Association for Computational Linguistics","author":"Tomanek K.","year":"2007","unstructured":"K. Tomanek , J. Wermter , and U. Hahn . Sentence and token splitting based on conditional random fields . In PACLING 2007 -- Proceedings of the 10th Conference of the Pacific Association for Computational Linguistics , pages 49 -- 57 . Melbourne, Australia, September 19--21 , 2007 . Melbourne: Pacific Association for Computational Linguistics, 2007. K. Tomanek, J. Wermter, and U. Hahn. Sentence and token splitting based on conditional random fields. In PACLING 2007 -- Proceedings of the 10th Conference of the Pacific Association for Computational Linguistics, pages 49--57. Melbourne, Australia, September 19--21, 2007. Melbourne: Pacific Association for Computational Linguistics, 2007."}],"event":{"name":"CIKM '10: International Conference on Information and Knowledge Management","location":"Toronto ON Canada","acronym":"CIKM '10","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the fourth workshop on Analytics for noisy unstructured text data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1871840.1871853","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1871840.1871853","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T12:17:38Z","timestamp":1750249058000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1871840.1871853"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,10,26]]},"references-count":13,"alternative-id":["10.1145\/1871840.1871853","10.1145\/1871840"],"URL":"https:\/\/doi.org\/10.1145\/1871840.1871853","relation":{},"subject":[],"published":{"date-parts":[[2010,10,26]]},"assertion":[{"value":"2010-10-26","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}