@conference { ISI:000371495900008, title = {Some methods to address the problem of Unbalanced Sentiment Classification in an Arabic context}, booktitle = {2012 COLLOQUIUM ON INFORMATION SCIENCE AND TECHNOLOGY (CIST{\textquoteright}12)}, series = {Colloquium in Information Science and Technology}, year = {2011}, note = {IEEE Colloquium on Information Science and Technology (CIST), Fez, MOROCCO, OCT 22-24, 2012}, pages = {43-48}, publisher = {IEEE; IEEE Morocco Sect; IEEE Morocco Comp \& Commun Joint Chapter; USMBA IEEE Student Branch; Faculty of Sciences Dhar Mahraz; Faculty of Technical Sciences of Fez; IEEE Comp Soc; IEEE Commun Soc}, organization = {IEEE; IEEE Morocco Sect; IEEE Morocco Comp \& Commun Joint Chapter; USMBA IEEE Student Branch; Faculty of Sciences Dhar Mahraz; Faculty of Technical Sciences of Fez; IEEE Comp Soc; IEEE Commun Soc}, abstract = {The rise of social media (such as online web forums and social networking sites) has attracted interests to mining and analyzing opinions available on the web. The online opinion has become the object of studies in many research areas; especially that called {\textquoteleft}{\textquoteleft}Opinion Mining and Sentiment Analysis{{\textquoteright}{\textquoteright}}. Several interesting and advanced works were performed on few languages (in particular English). However, there were very few studies on some languages such as Arabic. This paper presents the study we have carried out to address the problem of unbalanced data sets in supervised sentiment classification in an Arabic context. We propose three different methods to under-sample the majority class documents. Our goal is to compare the effectiveness of the proposed methods with the common random under-sampling. We also aim to evaluate the behavior of the classifier toward different under-sampling rates. We use two different common classifiers, namely Naive Bayes and Support Vector Machines. The experiments are carried out on an Arabic data set that we have built from Aljazeera{\textquoteright}s web site and labeled manually. The results show that Naive Bayes is sensitive to data set size, the more we reduce the data the more the results degrade. However, it is not sensitive to unbalanced data sets on the contrary of Support Vector Machines which is highly sensitive to unbalanced data sets. The results show also that we can rely on the proposed techniques and that they are typically competitive with random under-sampling.}, isbn = {978-1-4673-2725-1}, issn = {2327-185X}, author = {Mountassir, Asmaa and Benbrahim, Houda and Berrada, Ilham}, editor = {ElMohajir, M and Begdouri, A and ElMohajir, BE and Zarghili, A} }