@incollection{SisLab2257, volume = {642}, month = {February}, author = {Tran Duc Duong and Bao Son Pham and Hanh Tan}, booktitle = {Recent Developments in Intelligent Information and Database Systems}, title = {Using Content-Based Features for Author Profiling of Vietnamese Forum Posts}, publisher = {Springer International Publishing}, year = {2016}, journal = {Recent Developments in Intelligent Information and Database Systems}, pages = {287--296}, url = {https://eprints.uet.vnu.edu.vn/eprints/id/eprint/2257/}, abstract = {This paper reports the results of author profiling task for Vietnamese forum posts to identify the personal traits, such as gender, age, occupation, and location of the author using content-based features. Experiments were conducted on the different types of features, including stylometric features (such as lexical, syntactic, structural features) as well as content-based features (the most important words) to compare the performance and on the data sets we collected from the various forums in Vietnamese. Three learning methods, consisting of Decision Tree, Bayes Network, Support Vector Machine (SVM), were tested and the SVM achieved the best results. The results show that these kinds of features work well on such a kind of short and free style messages as forum posts, in which, content-based features yielded much better results than stylometric features.} }