97ci1jys

· 7 years ago · Mar 31, 2018, 02:48 PM
1# coding: utf-8
2
3get_ipython().magic(u'pylab inline')
4import csv, twitter, json, nltk
5import networkx as nx
6from functools import reduce
7from matplotlib import pyplot as plt
8from wordcloud import WordCloud
9
10CONSUMER_KEY, CONSUMER_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET = "", "", "", ""
11
12def accede_a_tw(fuente):
13    (CONSUMER_KEY,
14     CONSUMER_SECRET,
15     OAUTH_TOKEN,
16     OAUTH_TOKEN_SECRET) = open(
17            fuente, 'r').read().splitlines()
18    auth = twitter.oauth.OAuth(OAUTH_TOKEN,
19                           OAUTH_TOKEN_SECRET,
20                           CONSUMER_KEY,
21                           CONSUMER_SECRET)
22    return twitter.Twitter(auth=auth)
23
24def carga_lista(archivo):
25    try:
26        f=open(archivo, r)
27        lista=[elemento.replece(" ","")for elemento in reduce(lambda x,y: x+y,
28              [linea for linea in csv.reader(f, dialect="unix")])]
29    except IOError:
30         lista=[]
31    else:
32         f.close()
33    return lista
34    
35def busqueda_tw(tw, termino):
36    return tw.search.tweets(q=termino, lang="es", count="500")["statuses"]
37
38def guarda_tuits(tuits, archivos):
39    with open(archivo, "w") as f:
40        json.dump(tuits, f, indet=1)
41        
42def carga_tuits(archivo):
43    try:
44        f=open(archivo, "r")
45        resultado=jason.load(f)
46    except IOError:
47        resultado=[]
48    else:
49        f.close()
50    return resultado
51
52def mezcla_tuits(actuales, nuevos):
53    for tuit in nuevos:
54        if tuit["id"] not in [actual["id"] for actual in actuales]:
55            actuales.append(tuit)
56    return actuales
57
58def limpiar(texto):
59    tokenizer=nltk.RegexpTokenizer(r'\w+')
60    limpio=tokenizer.tokenizer(texto)
61    return limpio
62
63def analiza_menciones(tuits):
64    pares=[]
65    nodos=[]
66    for tuit in tuits:
67        usuario=tuit["user"]["screen_name"]
68        nodos.append(usuario)
69        menciones=[mencion["screen_name"] for mencion in tuit["entities"]["user_mentions"]]
70        for mencion in menciones:
71            if mencion != [] and usuario != mencion:
72                par = (usuario, mencion)
73                pares.append(par)
74    nodos=list(set(nodos))
75    pares=list(set(pares))
76    G=nx.Graph()
77    G.add_nodes_from(nodos)
78    G.add_edges_from(pares)
79    plt.figure(figsize=(32,32))
80    nx.draw_networkx(G)
81    
82def refina_texto(tuits, lista, termino):
83    lista_negra=carga_lista(lista) + [palabra.replace("@","") for palabra in terminos.split()]
84    texto=""
85    for i in range(0, len(lista_negra)):
86        lista_negra[i]=lista_negra[i].lower()
87    for tuit in tuits:
88        texto+=(tuit["text"]+" ")
89        depurador=nltk.RegexpTokenizer(r'\w+')
90    limpio=depurador.tokenizer(texto)
91    texto=""
92    for termino in limpio:
93        termino=termino.lower()
94        if termino not in lista_negra:
95            texto += (termino + " ")
96    return str(texto)
97    
98def nube(texto):
99    wordcloud = WordCloud().generate(texto)
100    plt.imshow(wordcloud)
101    plt.axis("off")
102    plt.show()
103    
104    
105def main(archivo="tuits.json", lista="lista_negra.csv"):
106    termino=input("termino de busqueda: ")
107    tuits_previos=carga_tuits(archivo)
108    tw=accede_a_tw("credenciales.txt")
109    tuits_recientes=busqueda_tw(tw, termino)
110    tuits=mezcla_tuits(tuist_previos, tuits_recientes)
111    guarda_tuits(tuits, archivo)
112    analiza_menciones(tuits)
113    return refina_texto(tuits, lista, termino)