BjfHZNCn

· 7 years ago · Apr 20, 2018, 08:28 PM
1#Ryan Sherry, Cletus Andoh, Xin Tang, Vincent Zhang, Eric Liu 
2#Cis 400
3#Introduction to Social Media Data Mining
4#Final Project
5#4/4/2018
6
7from datetime import datetime
8import twitter
9import json
10import operator
11import json
12import networkx as nx
13import pandas as pd   # handle data
14from textblob import TextBlob  #sentiment analysis
15import re
16import IMDB_Data
17import matplotlib.pyplot as plt
18import numpy as np #for number computing
19from operator import neg
20
21import IMDB_Data 
22
23#global variables for sentiment
24
25gPos = 0 
26gNeut = 0
27gNeg = 0
28
29def oauth_login():
30    
31    CONSUMER_KEY = '4A34OfnrNnjRU3LYoDszUgTnM'
32    CONSUMER_SECRET = 'LFlM8myxCr5FicvBdX6QTn93D0iuk79z82kbyscIDDUaG5lOuX'
33    OAUTH_TOKEN = '913439754-YnDyGZaxD8TGJhbCLKQtSQb9e5Kpegcnl0JHJaXI'
34    OAUTH_TOKEN_SECRET = 'XH4OLERcapvG9BGryUWDDj904qRj4VPEMoncD6FN9rBaJ'    
35    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
36    
37    twitter_api = twitter.Twitter(auth=auth)
38    return twitter_api
39
40from functools import partial
41from sys import maxint
42
43# code from https://dev.to/rodolfoferro/sentiment-analysis-on-trumpss-tweets-using-python-
44# code used from there includes clean_tweet and analise_sentiment
45
46def clean_tweet(tweet):
47    '''
48    Utility function to clean the text in a tweet by removing 
49    links and special characters using regex.
50    '''
51    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
52
53def analize_sentiment(tweet):
54    '''
55    Utility function to classify the polarity of a tweet
56    using textblob.
57    '''
58    analysis = TextBlob(clean_tweet(tweet))
59    if analysis.sentiment.polarity > 0:
60        return 1
61    elif analysis.sentiment.polarity == 0:
62        return 0
63    else:
64        return -1
65
66#original code from cookbook fro CIS 400SU
67def twitter_search(twitter_api, q, max_results=200, **kw):
68
69    # See https://dev.twitter.com/docs/api/1.1/get/search/tweets and 
70    # https://dev.twitter.com/docs/using-search for details on advanced 
71    # search criteria that may be useful for keyword arguments
72    
73    # See https://dev.twitter.com/docs/api/1.1/get/search/tweets    
74    search_results = twitter_api.search.tweets(q=q, count=100, **kw)
75    
76    statuses = search_results['statuses']
77    
78    # Iterate through batches of results by following the cursor until we
79    # reach the desired number of results, keeping in mind that OAuth users
80    # can "only" make 180 search queries per 15-minute interval. See
81    # https://dev.twitter.com/docs/rate-limiting/1.1/limits
82    # for details. A reasonable number of results is ~1000, although
83    # that number of results may not exist for all queries.
84    
85    # Enforce a reasonable limit
86    max_results = min(1000, max_results)
87    
88    for _ in range(10): # 10*100 = 1000
89        try:
90            next_results = search_results['search_metadata']['next_results']
91        except KeyError, e: # No more results when next_results doesn't exist
92            break
93            
94        # Create a dictionary from next_results, which has the following form:
95        # ?max_id=313519052523986943&q=NCAA&include_entities=1
96        kwargs = dict([ kv.split('=') 
97                        for kv in next_results[1:].split("&") ])
98        
99        search_results = twitter_api.search.tweets(**kwargs)
100        statuses += search_results['statuses']
101        
102        if len(statuses) > max_results: 
103            break
104            
105    return statuses
106
107#usage of code
108twitter_api = oauth_login()
109
110
111def searchTweets():
112    q = raw_input("What movie would you like to search?")
113    tCount = input("How many tweets would you like to gather?")
114
115    results = twitter_search(twitter_api, q, max_results=tCount)
116    
117    # Show one sample search result by slicing the list...
118    #print json.dumps(results[1], indent=1)
119    
120    result2 = []
121    
122    #harvest data from json 
123    x = 0 
124    while x < tCount:
125        result2.append(results[x]['text'])
126        #print x
127        x+=1
128    
129    #print result2 
130    
131    
132        #create Data Frame
133    data = pd.DataFrame(data=result2, columns=['Tweets'])
134    
135    #Display the first 10 elements of the data frame
136    #display(data.head(10))
137    
138    #prints all elements of the dataFrame
139    #print data.values
140    
141    # add length column to data frame for each tweet
142    data['len'] = np.array([len(result2[tweet]) for tweet in range(tCount)])
143    data['SA'] = np.array([ analize_sentiment(tweet) for tweet in data['Tweets'] ])
144    #print "length" 
145    #print data.values
146    
147    #sentiment analysis for tweet
148    pos_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] > 0]
149    neu_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] == 0]
150    neg_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] < 0]
151    
152    posCount = len(pos_tweets)*100/len(data['Tweets'])
153    neutCount = len(neu_tweets)*100/len(data['Tweets'])
154    negCount = len(neg_tweets)*100/len(data['Tweets'])
155
156    setgPos(posCount)
157    setgNeut(neutCount)
158    setgNeg(negCount)
159    
160    print "SENTIMENT ANALYSIS OF '%s' for '%s' number of tweets" % (q,tCount)
161    print("Positive tweets: {}%".format(posCount))
162    print("Neutral tweets: {}%".format(neutCount))
163    print("Negative tweets: {}%".format(negCount))
164
165    labels = 'Positive', 'Neutral', 'Negative'
166    sizes = [posCount, neutCount, negCount]
167    colors = ['green', 'yellow', 'red']
168    patches, texts = plt.pie(sizes, colors=colors, shadow=True, startangle=90)
169    plt.legend(patches, labels, loc="best")
170    plt.axis('equal')
171    plt.tight_layout()
172    plt.show()
173    
174
175def setgPos(n):
176    global gPos
177    gPos = n
178def setgNeut(n):
179    global gNeut
180    gNeut = n
181def setgNeg(n):
182    global gNeg
183    gNeg = n
184
185def compareScores(IMDB_score, pos, neut, neg):
186    Iscore = float(IMDB_score)
187    Pscore = float(pos)
188    Tscore = float(neut)
189    Nscore = float(neg)
190
191    score_positive = abs((Iscore/10) - (Pscore/100)) *100
192    score_Neutral = abs((Iscore/10) - (Tscore/100)) *100
193    score_Negative = abs((Iscore/10) - (Nscore/100)) *100
194
195    score = [score_positive, score_Neutral, score_Negative]
196
197    return score 
198
199
200#-------------------------------------------------------------------------------------
201def main():
202    print("This program fetches tweets about movies and performs SA on them")
203    
204    searchTweets()
205
206    IMDB_Data.getRating()
207
208    score = compareScores(IMDB_Data.gIMDB,gPos,gNeut,gNeg)
209
210
211    print("variation between IMDB and Positive Sentiment :  {}%".format(score[0]))
212    #print("variation between IMDB and Neutral Sentiment :  {}%".format(score[1]))
213    #print("variation between IMDB and Negative Sentiment:  {}%".format(score[2]))
214
215    
216if __name__=='__main__':
217    main()