{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T16:51:03Z","timestamp":1773939063298,"version":"3.50.1"},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,3,22]],"date-time":"2018-03-22T00:00:00Z","timestamp":1521676800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Basic Research Program of China","doi-asserted-by":"crossref","award":["2014CB340506"],"award-info":[{"award-number":["2014CB340506"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61631013"],"award-info":[{"award-number":["61631013"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61561130160"],"award-info":[{"award-number":["61561130160"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Microsoft Research Asia"},{"name":"Royal Society-Newton Advanced Fellowship Award"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Soc. Netw. Anal. Min."],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1007\/s13278-018-0495-0","type":"journal-article","created":{"date-parts":[[2018,3,22]],"date-time":"2018-03-22T12:15:54Z","timestamp":1521720954000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Profiling Web users using big data"],"prefix":"10.1007","volume":"8","author":[{"given":"Xiaotao","family":"Gu","sequence":"first","affiliation":[]},{"given":"Hong","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Fanjin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Debing","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Wendy","family":"Hall","sequence":"additional","affiliation":[]},{"given":"Xiao","family":"Fu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,3,22]]},"reference":[{"issue":"1","key":"495_CR1","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/MIS.2003.1179189","volume":"18","author":"H Alani","year":"2003","unstructured":"Alani H, Kim S, Millard DE, Weal MJ, Hall W, Lewis PH, Shadbolt NR (2003) Automatic ontology-based knowledge extraction from web documents. IEEE Intell Syst 18(1):14\u201321","journal-title":"IEEE Intell Syst"},{"key":"495_CR2","volume-title":"Modern information retrieval","author":"R Baeza-Yates","year":"1999","unstructured":"Baeza-Yates R, Ribeiro-Neto B (1999) Modern information retrieval. ACM Press, New York"},{"key":"495_CR3","doi-asserted-by":"crossref","unstructured":"Balog K, Azzopardi L, de\u00a0Rijke M (2006) Formal models for expert finding in enterprise corpora. In: Proceedings of the 29th annual international ACM SIGIR conference on research and development in information retrieval, pp 43\u201355","DOI":"10.1145\/1148170.1148181"},{"key":"495_CR4","unstructured":"Banko M, Cafarella MJ, Soderland S, Broadhead M, Etzioni O (2007) Open information extraction from the web. In: Proceedings of the 20th international joint conference on artificial intelligence, pp 2670\u20132676"},{"key":"495_CR5","doi-asserted-by":"crossref","unstructured":"Basu S, Bilenko M, Mooney RJ (2004) A probabilistic framework for semi-supervised clustering. In: Proceedings of the 10th ACM SIGKDD international conference on knowledge discovery and data mining, pp 59\u201368","DOI":"10.1145\/1014052.1014062"},{"key":"495_CR6","doi-asserted-by":"crossref","unstructured":"Bi B, Shokouhi M, Kosinski M, Graepel T (2013) Inferring the demographics of search users: social data meets search queries. In: Proceedings of the 22nd international conference on world wide web, pp 131\u2013140","DOI":"10.1145\/2488388.2488401"},{"key":"495_CR7","doi-asserted-by":"crossref","unstructured":"Blanco L, Bronzi M, Crescenzi V, Merialdo P, Papotti P (2010) Redundancy-driven web data extraction and integration. In: Procceedings of the 13th international workshop on the web and databases, pp 7:1\u20137:6","DOI":"10.1145\/1859127.1859137"},{"issue":"4","key":"495_CR8","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1016\/0306-4573(87)90020-3","volume":"23","author":"G Brajnik","year":"1987","unstructured":"Brajnik G, Guida G, Tasso C (1987) User modeling in intelligent information retrieval. Inf Process Manag 23(4):305\u2013320","journal-title":"Inf Process Manag"},{"key":"495_CR9","unstructured":"Chan PK (1999) Constructing web user profiles: a non-invasive learning approach. In: KDD-99 workshop on web usage analysis and user profiling, pp 39\u201355"},{"key":"495_CR10","unstructured":"Collins M (2002) Ranking algorithms for named-entity extraction: boosting and the voted perceptron. In: Proceedings of the 40th annual meeting on association for computational linguistics, pp 489\u2013496"},{"issue":"3","key":"495_CR11","first-page":"273","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes C, Vapnik V (1995) Support-vector networks. Mach Learn 20(3):273\u2013297","journal-title":"Mach Learn"},{"issue":"2","key":"495_CR12","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1111\/j.2517-6161.1958.tb00292.x","volume":"20","author":"DR Cox","year":"1958","unstructured":"Cox DR (1958) The regression analysis of binary sequences. J Roy Stat Soc Ser B (Methodol) 20(2):215\u2013242","journal-title":"J Roy Stat Soc Ser B (Methodol)"},{"key":"495_CR13","unstructured":"Cunningham H, Maynard D, Bontcheva K, Tablan V (2002) GATE: a framework and graphical development environment for robust NLP tools and applications. In: Proceedings of the 40th annual meeting of the association for computational linguistics, pp 168\u2013175"},{"key":"495_CR14","doi-asserted-by":"crossref","unstructured":"Dong Y, Yang Y, Tang J, Yang Y, Chawla NV (2014) Inferring user demographics and social strategies in mobile social networks. In: Proceedings of the 20th ACM SIGKDD international conference on knowledge discovery and data mining, pp 15\u201324","DOI":"10.1145\/2623330.2623703"},{"key":"495_CR15","unstructured":"Downey D, Etzioni O, Soderland S (2005) A probabilistic model of redundancy in information extraction. In: Proceedings of the 19th international joint conference on artificial intelligence, pp 1034\u20131041"},{"issue":"1","key":"495_CR16","doi-asserted-by":"publisher","first-page":"66:1","DOI":"10.1007\/s13278-016-0376-3","volume":"6","author":"H Efstathiades","year":"2016","unstructured":"Efstathiades H, Antoniades D, Pallis G, Dikaiakos MD (2016) Users key locations in online social networks: identification and applications. Soc Netw Anal Min 6(1):66:1\u201366:17","journal-title":"Soc Netw Anal Min"},{"issue":"2","key":"495_CR17","doi-asserted-by":"publisher","first-page":"84","DOI":"10.12720\/jait.6.2.84-87","volume":"6","author":"M Eltaher","year":"2015","unstructured":"Eltaher M, Lee J (2015) User profiling of Flickr: integrating multiple types of features for gender classification. J Adv Inf Technol 6(2):84\u201387","journal-title":"J Adv Inf Technol"},{"key":"495_CR18","doi-asserted-by":"crossref","unstructured":"Figueiredo F, Ribeiro B, Almeida JM, Faloutsos C (2016) TribeFlow: mining and predicting user trajectories. In: Proceedings of the 25th international conference on world wide web, pp 695\u2013706","DOI":"10.1145\/2872427.2883059"},{"key":"495_CR19","doi-asserted-by":"crossref","unstructured":"Finkel JR, Grenager T, Manning C (2005) Incorporating non-local information into information extraction systems by Gibbs sampling. In: Proceedings of the 43rd annual meeting on association for computational linguistics, pp 363\u2013370","DOI":"10.3115\/1219840.1219885"},{"issue":"2\u20133","key":"495_CR20","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1023\/A:1007425814087","volume":"29","author":"Z Ghahramani","year":"1997","unstructured":"Ghahramani Z, Jordan MI (1997) Factorial hidden Markov models. Mach Learn 29(2\u20133):245\u2013273","journal-title":"Mach Learn"},{"key":"495_CR21","unstructured":"Hammersley JM, Clifford P (1971) Markov fields on finite graphs and lattices"},{"key":"495_CR22","doi-asserted-by":"crossref","unstructured":"Hu J, Zeng HJ, Li H, Niu C, Chen Z (2007) Demographic prediction based on user\u2019s browsing behavior. In: Proceedings of the 16th international conference on world wide web, pp 151\u2013160","DOI":"10.1145\/1242572.1242594"},{"issue":"1","key":"495_CR23","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1016\/j.knosys.2013.06.020","volume":"51","author":"K Ikeda","year":"2013","unstructured":"Ikeda K, Hattori G, Ono C, Asoh H, Higashino T (2013) Twitter user profiling based on text and community mining for market analysis. Knowl Based Syst 51(1):35\u201347","journal-title":"Knowl Based Syst"},{"key":"495_CR24","doi-asserted-by":"crossref","unstructured":"Joseph K, Wei W, Carley KM (2016) Exploring patterns of identity usage in tweets: a new problem, solution and case study. In: Proceedings of the 25th international conference on world wide web, pp 401\u2013412","DOI":"10.1145\/2872427.2883027"},{"key":"495_CR25","unstructured":"Kristjansson T, Culotta A, Viola P, McCallum A (2004) Interactive information extraction with constrained conditional random fields. In: Proceedings of the 19th national conference on artificial intelligence, pp 412\u2013418"},{"issue":"2","key":"495_CR26","first-page":"37","volume":"18","author":"B Krulwich","year":"1997","unstructured":"Krulwich B (1997) Lifestyle finder: intelligent user profiling using large-scale demographic data. AI Mag 18(2):37\u201345","journal-title":"AI Mag"},{"key":"495_CR27","unstructured":"Lafferty JD, McCallum A, Pereira FCN (2001) Conditional random fields: probabilistic models for segmenting and labeling sequence data. In: Proceedings of the 18th international conference on machine learning, pp 282\u2013289"},{"key":"495_CR29","doi-asserted-by":"crossref","unstructured":"Li R, Wang S, Deng H, Wang R, Chang KCC (2012) Towards social user profiling: unified and discriminative influence model for inferring home locations. In: Proceedings of the 18th ACM SIGKDD international conference on knowledge discovery and data mining, pp 1023\u20131031","DOI":"10.1145\/2339530.2339692"},{"key":"495_CR28","doi-asserted-by":"crossref","unstructured":"Li J, Ritter A, Hovy E (2014) Weakly supervised user profile extraction from Twitter. In: Proceedings of the 52nd annual meeting of the association for computational linguistics, pp 165\u2013174","DOI":"10.3115\/v1\/P14-1016"},{"issue":"1","key":"495_CR30","doi-asserted-by":"publisher","first-page":"193:1","DOI":"10.1007\/s13278-014-0193-5","volume":"4","author":"A Makazhanov","year":"2014","unstructured":"Makazhanov A, Rafiei D, Waqar M (2014) Predicting political preference of Twitter users. Soc Netw Anal Min 4(1):193:1\u2013193:15","journal-title":"Soc Netw Anal Min"},{"key":"495_CR31","unstructured":"McCallum A, Freitag D, Pereira FCN (2000) Maximum entropy Markov models for information extraction and segmentation. In: Proceedings of the 17th international conference on machine learning, pp 591\u2013598"},{"issue":"3","key":"495_CR32","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s10032-007-0052-2","volume":"10","author":"M Michelson","year":"2007","unstructured":"Michelson M, Knoblock C (2007) Unsupervised information extraction from unstructured, ungrammatical data sources on the world wide web. Int J Doc Anal Recogn 10(3):211\u2013226","journal-title":"Int J Doc Anal Recogn"},{"issue":"3","key":"495_CR33","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1023\/A:1007369909943","volume":"27","author":"M Pazzani","year":"1997","unstructured":"Pazzani M, Billsus D (1997) Learning and revising user profiles: the identification of interesting web sites. Mach Learn 27(3):313\u2013331","journal-title":"Mach Learn"},{"issue":"3","key":"495_CR34","doi-asserted-by":"publisher","first-page":"13:1","DOI":"10.1145\/1993036.1993037","volume":"29","author":"JS Pedro","year":"2011","unstructured":"Pedro JS, Siersdorfer S, Sanderson M (2011) Content redundancy in YouTube and its application to video tagging. ACM Trans Inf Syst 29(3):13:1\u201313:31","journal-title":"ACM Trans Inf Syst"},{"issue":"1\u20132","key":"495_CR35","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/s10994-006-5833-1","volume":"62","author":"M Richardson","year":"2006","unstructured":"Richardson M, Domingos P (2006) Markov logic networks. Mach Learn 62(1\u20132):107\u2013136","journal-title":"Mach Learn"},{"key":"495_CR36","doi-asserted-by":"crossref","unstructured":"Ritze D, Lehmberg O, Oulabi Y, Bizer C (2016) Profiling the potential of web tables for augmenting cross-domain knowledge bases. In: Proceedings of the 25th international conference on world wide web, pp 251\u2013261","DOI":"10.1145\/2872427.2883017"},{"key":"495_CR37","unstructured":"Sarawagi S, Cohen WW (2004) Semi-Markov conditional random fields for information extraction. In: Proceedings of the 17th neural information processing systems, pp 1185\u20131192"},{"issue":"1","key":"495_CR38","doi-asserted-by":"publisher","first-page":"39:1","DOI":"10.1007\/s13278-015-0277-x","volume":"5","author":"C Sarraute","year":"2015","unstructured":"Sarraute C, Brea J, Burroni J, Blanc P (2015) Inference of demographic attributes based on mobile phone usage patterns and social network topology. Soc Netw Anal Min 5(1):39:1\u201339:18","journal-title":"Soc Netw Anal Min"},{"issue":"3","key":"495_CR39","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1023\/A:1009690117684","volume":"16","author":"SJ Soltysiak","year":"1998","unstructured":"Soltysiak SJ, Crabtree IB (1998) Automatic learning of user profiles\u2014towards the personalisation of agent services. BT Technol J 16(3):110\u2013117","journal-title":"BT Technol J"},{"key":"495_CR40","unstructured":"Szell M, Thurner S (2012) How women organize social networks different from men. ArXiv preprint arXiv:1205.4683"},{"key":"495_CR43","doi-asserted-by":"crossref","unstructured":"Tang J, Hong M, Li J, Liang B (2006) Tree-structured conditional random fields for semantic annotation. In: Proceedings of the 5th international conference on the semantic web, pp 640\u2013653","DOI":"10.1007\/11926078_46"},{"key":"495_CR44","doi-asserted-by":"crossref","unstructured":"Tang J, Hong M, Zhang D, Liang B, Li J (2007a) Emerging technologies of text mining: techniques and applications. Chap. Information extraction: methodologies and applications, pp 1\u201333. Idea Group Inc.","DOI":"10.4018\/978-1-59904-373-9.ch001"},{"key":"495_CR47","doi-asserted-by":"crossref","unstructured":"Tang J, Zhang D, Yao L (2007b) Social network extraction of academic researchers. In: Proceedings of the 7th IEEE international conference on data mining, pp 292\u2013301","DOI":"10.1109\/ICDM.2007.30"},{"key":"495_CR48","doi-asserted-by":"crossref","unstructured":"Tang J, Zhang J, Yao L, Li J, Zhang L, Su Z (2008) Arnetminer: extraction and mining of academic social networks. In: Proceedings of the 14th ACM SIGKDD international conference on knowledge discovery and data mining, pp 990\u2013998","DOI":"10.1145\/1401890.1402008"},{"issue":"1","key":"495_CR46","doi-asserted-by":"publisher","first-page":"2:1","DOI":"10.1145\/1870096.1870098","volume":"5","author":"J Tang","year":"2010","unstructured":"Tang J, Yao L, Zhang D, Zhang J (2010) A combination approach to web user profiling. ACM Trans Knowl Discov Data 5(1):2:1\u20132:44","journal-title":"ACM Trans Knowl Discov Data"},{"key":"495_CR49","doi-asserted-by":"crossref","unstructured":"Tang W, Zhuang H, Tang J (2011a) Learning to infer social ties in large networks. In: ECML\/PKDD\u201911, pp 381\u2013397","DOI":"10.1007\/978-3-642-23808-6_25"},{"key":"495_CR41","doi-asserted-by":"crossref","unstructured":"Tang C, Ross K, Saxena N, Chen R (2011b) What\u2019s in a name: a study of names, gender inference, and gender behavior in Facebook. In: Proceedings of the 16th international conference on database systems for advanced applications, pp 344\u2013356","DOI":"10.1007\/978-3-642-20244-5_33"},{"key":"495_CR42","unstructured":"Tang J, Fang Z, Sun J (2013) Incorporating social context and domain knowledge for entity recognition. In: Proceedings of the 24th international conference on world wide web, pp 517\u2013526"},{"issue":"2","key":"495_CR45","doi-asserted-by":"publisher","first-page":"7:1","DOI":"10.1145\/2746230","volume":"34","author":"J Tang","year":"2016","unstructured":"Tang J, Lou T, Kleinberg J, Wu S (2016) Transfer learning to infer social ties across heterogeneous networks. ACM Trans Inf Syst 34(2):7:1\u20137:43","journal-title":"ACM Trans Inf Syst"},{"key":"495_CR50","doi-asserted-by":"crossref","unstructured":"Weninger T, Han J (2013) Exploring structure and content on the web: extraction and integration of the semi-structured web. In: Proceedings of the 6th ACM international conference on web search and data mining, pp 779\u2013780","DOI":"10.1145\/2433396.2433499"},{"key":"495_CR51","doi-asserted-by":"crossref","unstructured":"Weninger T, Hsu WH, Han J (2010) CETR: content extraction via tag ratios. In: Proceedings of the 19th international conference on world wide web, pp 971\u2013980","DOI":"10.1145\/1772690.1772789"},{"key":"495_CR53","doi-asserted-by":"crossref","unstructured":"Wu S, Liu J, Fan J (2015) Automatic web content extraction by combination of learning and grouping. In: Proceedings of the 24th international conference on world wide web, pp 1264\u20131274","DOI":"10.1145\/2736277.2741659"},{"key":"495_CR52","doi-asserted-by":"crossref","unstructured":"Wu L, Ge Y, Liu Q, Chen E, Long B, Huang Z (2016) Modeling users\u2019 preferences and social links in social networking services: a joint-evolving perspective. In: Proceedings of the 30th AAAI conference on artificial intelligence, pp 279\u2013286","DOI":"10.1609\/aaai.v30i1.9980"},{"key":"495_CR54","unstructured":"Yedidia JS, Freeman WT, Weiss Y (2000) Generalized belief propagation. In: Proceedings of the 13th neural information processing systems, pp 689\u2013695"},{"key":"495_CR55","doi-asserted-by":"crossref","unstructured":"Yu K, Guan G, Zhou M (2005) Resume information extraction with cascaded hybrid model. In: Proceedings of the 43rd annual meeting on association for computational linguistics, pp 499\u2013506","DOI":"10.3115\/1219840.1219902"}],"container-title":["Social Network Analysis and Mining"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13278-018-0495-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13278-018-0495-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13278-018-0495-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T06:31:03Z","timestamp":1719901863000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13278-018-0495-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,3,22]]},"references-count":55,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["495"],"URL":"https:\/\/doi.org\/10.1007\/s13278-018-0495-0","relation":{},"ISSN":["1869-5450","1869-5469"],"issn-type":[{"value":"1869-5450","type":"print"},{"value":"1869-5469","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,3,22]]},"assertion":[{"value":"16 January 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 February 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 March 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"24"}}