library(tm) library(wordcloud) library(SnowballC) library(readxl) # set your workspace to where the .csv file for the ENM2020 question responses is setwd("") sheet <- read.csv("ENM 2020 Questions (Responses) - Form Responses.csv") dt <- Corpus(VectorSource(sheet$Please.provide.your.question.here.)) toSomething <- content_transformer(function (x , pattern, y) gsub(pattern, y, x)) dt <- tm_map(dt, toSomething, "specie", "species") dt <- tm_map(dt, toSomething, "speciess", "species") # Convert the text to lower case dt <- tm_map(dt, content_transformer(tolower)) # Remove numbers dt <- tm_map(dt, removeNumbers) # Remove English common stopwords dt <- tm_map(dt, removeWords, stopwords("english")) # Remove your own stop word # specify your stopwords as a character vector dt <- tm_map(dt, removeWords, c("like","can","used","way","dear","one","best","use", "question","need","week","different","will","questions", "affect","understand","using","know","thank","run","well", "also","really","type","due","consider","better","however", "find","lot","possible","account","certain","much","new", "many","useful","mentioned","think","case","particular","sorry", "yes","put","file","specifically","depending","per","explained", "remove","choosing","weeks","close","start","con","wondering", "emilio","add","single","often","tell","big","sincerely", "got","presented","long","far","probably","end","atte","taken", "included","selecting","making","least","seen","looking", "sometimes","others","provide","usually","actually","going", "smaller","una","three","might","read","already","show", "made","words","previous","los","las","rather","especially", "interested","para","must","trying","try","quite","among", "works","ones","thinking","second","instead","higher","something", "last","given","sure","done","low","large","seems","taking", "enough","found","able","less","bit","deal","little", "now","maybe","within","ask","said","set","according","always", "without","give","right","several","still","que","high","say", "etc","great","take","may","considering","first","please", "just","want","get","make","two","hello","good","thanks")) # Remove punctuations dt <- tm_map(dt, removePunctuation) # Eliminate extra white spaces dt <- tm_map(dt, stripWhitespace) dtm <- TermDocumentMatrix(dt) m <- as.matrix(dtm) v <- sort(rowSums(m),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) set.seed(1234) wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words=200, random.order=FALSE, rot.per=0.3, colors=brewer.pal(8, "Dark2"))