Twitter sentiment analysis

Hi there!

This post is the continuation of previous post.

You need to read the twitter archive and store tweets in tweets variable (refer previous post for steps).

Load required libraries.

library(tm)
library(stringr)
library(wordcloud)
library(syuzhet) #this library contain sentiment dictionary
library(lubridate) #provides tools that make it easier to parse and manipulate dates
library(ggplot2)
library(scales)
library(reshape2)
library(dplyr ) #dplyr provides a flexible grammar of data manipulation


#read tweets again (previous one is modified)
tweets <- read.csv("./tweets.csv", stringsAsFactors = FALSE)

# remove the Twitter handlers
nohandles <- str_replace_all(tweets$text, "@\\w+", "")

#clean up the remaining text
wordCorpus <- Corpus(VectorSource(nohandles))
wordCorpus <- tm_map(wordCorpus, removePunctuation)
wordCorpus <- tm_map(wordCorpus, content_transformer(tolower))
wordCorpus <- tm_map(wordCorpus, removeWords, stopwords("english"))
wordCorpus <- tm_map(wordCorpus, removeWords, c("like", "video"))
wordCorpus <- tm_map(wordCorpus, stripWhitespace)
wordCorpus <- tm_map(wordCorpus, stemDocument)
pal <- brewer.pal(9,"YlGnBu")
pal <- pal[-(1:4)]
set.seed(123)

#create a word cloud
wordcloud(words = wordCorpus, scale=c(5,1), max.words=100, random.order=FALSE,    rot.per=0.35, use.r.layout=FALSE, colors=pal)

#this is the wordcloud of my tweets


#document term matrix creation
tdm <- TermDocumentMatrix(wordCorpus)
tdm


#analyse the twitter handler
friends <- str_extract_all(tweets$text, "@\\w+")
namesCorpus <- Corpus(VectorSource(friends))

#wordcloud of twitter handlers
set.seed(146)
wordcloud(words = namesCorpus, scale=c(3,0.5), max.words=40, random.order=FALSE,
rot.per=0.10, use.r.layout=FALSE, colors=pal)

#here is my twitter handler wordcloud 

#let us move to sentiment analysis
#fetch sentiment words from tweets
mySentiment <- get_nrc_sentiment(tweets$text)
head(mySentiment)
tweets <- cbind(tweets, mySentiment)

#count the sentiment words by category
sentimentTotals <- data.frame(colSums(tweets[,c(11:18)]))
names(sentimentTotals) <- "count"
sentimentTotals <- cbind("sentiment" = rownames(sentimentTotals), sentimentTotals)
rownames(sentimentTotals) <- NULL

#total sentiment score of all tweets
ggplot(data = sentimentTotals, aes(x = sentiment, y = count)) +
geom_bar(aes(fill = sentiment), stat = "identity") +
theme(legend.position = "none") +
xlab("Sentiment") + ylab("Total Count") + ggtitle("Total Sentiment Score for All Tweets")

#my output


#categorize by time
tweets$timestamp <- with_tz(ymd_hms(tweets$timestamp), "Asia/Kolkata")
posnegtime <- tweets %>%
group_by(timestamp = cut(timestamp, breaks="2 months")) %>%
summarise(negative = mean(negative),
positive = mean(positive)) %>% melt
names(posnegtime) <- c("timestamp", "sentiment", "meanvalue")
posnegtime$sentiment = factor(posnegtime$sentiment,levels(posnegtime$sentiment)[c(2,1)])

#sentiment over time
ggplot(data = posnegtime, aes(x = as.Date(timestamp), y = meanvalue, group = sentiment)) +
geom_line(size = 2.5, alpha = 0.7, aes(color = sentiment)) +
geom_point(size = 0.5) +
ylim(0, NA) +
scale_colour_manual(values = c("springgreen4", "firebrick3")) +
theme(legend.title=element_blank(), axis.title.x = element_blank()) +
scale_x_date(breaks = date_breaks("9 months"),
labels = date_format("%Y-%b")) +
ylab("Average sentiment score") +
ggtitle("Sentiment Over Time")


#Sentiment During the Week
tweets$weekday <- wday(tweets$timestamp, label = TRUE)
weeklysentiment <- tweets %>% group_by(weekday) %>%
summarise(anger = mean(anger),
anticipation = mean(anticipation),
disgust = mean(disgust),
fear = mean(fear),
joy = mean(joy),
sadness = mean(sadness),
surprise = mean(surprise),
trust = mean(trust)) %>% melt
names(weeklysentiment) <- c("weekday", "sentiment", "meanvalue")

#plot Sentiment During the Week
ggplot(data = weeklysentiment, aes(x = weekday, y = meanvalue, group = sentiment)) +
geom_line(size = 2.5, alpha = 0.7, aes(color = sentiment)) +
geom_point(size = 0.5) +
ylim(0, 0.6) +
theme(legend.title=element_blank(), axis.title.x = element_blank()) +
ylab("Average sentiment score") +
ggtitle("Sentiment During the Week")


#Sentiment During the Year
tweets$month <- month(tweets$timestamp, label = TRUE)
monthlysentiment <- tweets %>% group_by(month) %>%
summarise(anger = mean(anger),
anticipation = mean(anticipation),
disgust = mean(disgust),
fear = mean(fear),
joy = mean(joy),
sadness = mean(sadness),
surprise = mean(surprise),
trust = mean(trust)) %>% melt
names(monthlysentiment) <- c("month", "sentiment", "meanvalue")

#Sentiment During the Year
ggplot(data = monthlysentiment, aes(x = month, y = meanvalue, group = sentiment)) +
geom_line(size = 2.5, alpha = 0.7, aes(color = sentiment)) +
geom_point(size = 0.5) +
ylim(0, NA) +
theme(legend.title=element_blank(), axis.title.x = element_blank()) +
ylab("Average sentiment score") +
ggtitle("Sentiment During the Year") 

I will be writing about WhatsApp sentiment analysis in the next post.

Thanks for visiting my blog. I always love to hear constructive feedback. Please give your feedback in the comment section below or write to me personally here.

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s