· 6 years ago · Mar 26, 2020, 07:50 PM
1from googleapiclient.discovery import build
2from oauth2client.service_account import ServiceAccountCredentials
3
4def get_service(api_name, api_version, scopes, key_file_location):
5 """Get a service that communicates to a Google API.
6
7 Args:
8 api_name: The name of the api to connect to.
9 api_version: The api version to connect to.
10 scopes: A list auth scopes to authorize for the application.
11 key_file_location: The path to a valid service account JSON key file.
12
13 Returns:
14 A service that is connected to the specified API.
15 """
16
17 credentials = ServiceAccountCredentials.from_json_keyfile_name(
18 key_file_location, scopes=scopes)
19
20 # Build the service object.
21 service = build(api_name, api_version, credentials=credentials)
22
23 return service
24
25
26def get_first_profile_id(service):
27 # Use the Analytics service object to get the first profile id.
28
29 # Get a list of all Google Analytics accounts for this user
30 accounts = service.management().accounts().list().execute()
31
32 if accounts.get('items'):
33 # Get the first Google Analytics account.
34 account = accounts.get('items')[0].get('id')
35
36 # Get a list of all the properties for the first account.
37 properties = service.management().webproperties().list(
38 accountId=account).execute()
39
40 if properties.get('items'):
41 # Get the first property id.
42 property = properties.get('items')[0].get('id')
43
44 # Get a list of all views (profiles) for the first property.
45 profiles = service.management().profiles().list(
46 accountId=account,
47 webPropertyId=property).execute()
48
49 if profiles.get('items'):
50 # return the first view (profile) id.
51 return "166440381"
52
53 return None
54def get_data_from_ga(service, profile_id, **kwargs):
55 """
56 - Gets at most 10000 rows from google analytics
57 - kwargs has the parameters for the query
58 """
59
60 data = service.data().ga().get(
61 ids='ga:' + profile_id,
62 start_date=kwargs['start_date'],
63 end_date=kwargs['end_date'],
64 metrics=kwargs['metrics'],
65 dimensions=kwargs['dimensions'],
66 filters=kwargs['filters'],
67 max_results=kwargs['max_results'],
68 start_index=kwargs['start_index']
69 ).execute()
70 print(data)
71
72 return data['rows']
73
74def get_all_data_from_ga(service, profile_id, **kwargs):
75 """
76 - Gets all available data from google analytics
77 - kwargs has the parameters for the query
78 """
79
80 data = []
81
82 index = 1
83
84 while(True):
85 kwargs['start_index'] = index
86 new_data = get_data_from_ga(service, profile_id, **kwargs)
87 data.extend(new_data)
88 index += 10000
89 if(len(new_data) != 10000):
90 break
91 print('New iteration', len(data))
92 return data
93# Define the auth scopes to request.
94scope = 'https://www.googleapis.com/auth/analytics.readonly'
95key_file_location = './gc-la-voz-7037a49a6aff.json'
96
97# Authenticate and construct service.
98service = get_service(
99 api_name='analytics',
100 api_version='v3',
101 scopes=[scope],
102 key_file_location=key_file_location
103 )
104
105profile_id = get_first_profile_id(service)
106def get_scroll_mobile(service, profile_id, url, name, start, end):
107 kwargs = {
108 'start_date': start,
109 'end_date': end,
110 'metrics':'ga:TotalEvents',
111 'dimensions':'ga:EventLabel',
112 'filters':'ga:EventCategory==HeatMap-' + url,
113 'max_results':10000
114 }
115 data = get_all_data_from_ga(service, profile_id, **kwargs)
116
117 import pandas as pd
118
119 user_id, times = list(zip(*data))
120 df = pd.DataFrame({'count': times}, index=user_id)
121 df.index = df.index.map(lambda x: 0 if x == '(not set)' else x)
122 df = df.astype(int)
123 df.index = df.index.map(int)
124 df = df.sort_index()
125
126 df.to_csv('csv/scroll_' + name + '_mobile.csv')
127
128 return df
129
130def get_scroll_desktop(service, profile_id, url, name, start, end):
131 kwargs = {
132 'start_date': start,
133 'end_date': end,
134 'metrics':'ga:TotalEvents',
135 'dimensions':'ga:EventLabel',
136 'filters':f'ga:EventCategory==HeatMap-' + url,
137 'max_results':10000
138 }
139 data = get_all_data_from_ga(service, profile_id, **kwargs)
140
141 import pandas as pd
142
143 user_id, times = list(zip(*data))
144 df = pd.DataFrame({'count': times}, index=user_id)
145 df.index = df.index.map(lambda x: 0 if x == '(not set)' else x)
146 df = df.astype(int)
147 df.index = df.index.map(int)
148 df = df.sort_index()
149
150 df.to_csv('csv/scroll_' + name + '_desktop.csv')
151
152 return data
153
154def get_scroll_website(service, profile_id, urld, urlm, name, start, end):
155 get_scroll_mobile(service, profile_id, urlm, name, start, end)
156 get_scroll_desktop(service, profile_id, urld, name, start, end)
157def get_clicks_mobile(service, profile_id, url, name, start, end):
158 kwargs = {
159 'start_date': start,
160 'end_date': end,
161 'metrics':'ga:TotalEvents',
162 'dimensions':'ga:EventLabel',
163 'filters':f'ga:EventCategory==Click-' + url,
164 'max_results':10000
165 }
166
167 data = get_all_data_from_ga(service, profile_id, **kwargs)
168
169 import pandas as pd
170
171 label, times = list(zip(*data))
172
173 df = pd.DataFrame({'count': times}, index=label)
174
175 df.to_csv('csv/clicks_' + name + '_mobile.csv')
176
177def get_clicks_desktop(service, profile_id, url, name, start, end):
178 kwargs = {
179 'start_date': start,
180 'end_date': end,
181 'metrics':'ga:TotalEvents',
182 'dimensions':'ga:EventLabel',
183 'filters':f'ga:EventCategory==Click-' + url,
184 'max_results':10000
185 }
186
187 data = get_all_data_from_ga(service, profile_id, **kwargs)
188
189 import pandas as pd
190
191 label, times = list(zip(*data))
192
193 df = pd.DataFrame({'count': times}, index=label)
194
195 df.to_csv('csv/clicks_' + name + '_desktop.csv')
196
197def get_clicks_website(service, profile_id, urld, urlm, name, start, end):
198 get_clicks_mobile(service, profile_id, urlm, name, start, end)
199 get_clicks_desktop(service, profile_id, urld, name, start, end)
200def get_data(urld, urlm, name, start='7daysAgo', end='today'):
201 get_scroll_website(service, profile_id, urld, urlm, name, start, end)
202 get_clicks_website(service, profile_id, urld, urlm, name, start, end)
203############################################################################
204get_data('https://suscripcion.lavoz.com.ar/suscripcion/P3M40OFF',
205 'SDFormBasico', '2020-02-10', '2020-02-16')
206print(1)
207get_data('https://suscripcion.lavoz.com.ar/suscripcion/P3M40OFF2',
208 'SDFormClv', '2020-02-10', '2020-02-16')
209print(2)
210get_data('https://suscripcion.lavoz.com.ar/suscripcion/P3M40OFF3',
211 'SDFormPdf', '2020-02-10', '2020-02-16')
212print(3)
213get_data('https://suscripcion.lavoz.com.ar/',
214 'SD', '2020-02-10', '2020-02-16')
215############################################################################
216# get_data('url_mobile', 'url_desktop', 'name', 'start-date', 'end-date')
217# url_mobile is the mobile url of the site you want to analyze
218# url_desktop is the desktop url of the site you want to analyze
219# name is the name you chose previously
220# start and end dates are the interval of time where to seek data. They should be in the format:
221# YYYY-MM-DD or today or yesterday or NdaysAgo (Where N is a number).
222# The default values of start and end date (if you do not provide them) are '7daysAgo' and 'today' respectively
223
224# get_data('https://vos.lavoz.com.ar/tv/la-fuerte-revelacion-de-pampita-fui-a-llevarle-flores-a-mi-hija-y-ahi-le-pedi-un-companero-de-vid',
225# 'https://mvos.lavoz.com.ar/tv/la-fuerte-revelacion-de-pampita-fui-a-llevarle-flores-a-mi-hija-y-ahi-le-pedi-un-companero-de-vid',
226# 'pampita')
227{'kind': 'analytics#gaData', 'id': 'https://www.googleapis.com/analytics/v3/data/ga?ids=ga:166440381&dimensions=ga:EventLabel&metrics=ga:TotalEvents&filters=ga:EventCategory%3D%3DHeatMap-SDFormBasico&start-date=2020-02-16&end-date=today&start-index=1&max-results=10000', 'query': {'start-date': '2020-02-16', 'end-date': 'today', 'ids': 'ga:166440381', 'dimensions': 'ga:EventLabel', 'metrics': ['ga:TotalEvents'], 'filters': 'ga:EventCategory==HeatMap-SDFormBasico', 'start-index': 1, 'max-results': 10000}, 'itemsPerPage': 10000, 'totalResults': 0, 'selfLink': 'https://www.googleapis.com/analytics/v3/data/ga?ids=ga:166440381&dimensions=ga:EventLabel&metrics=ga:TotalEvents&filters=ga:EventCategory%3D%3DHeatMap-SDFormBasico&start-date=2020-02-16&end-date=today&start-index=1&max-results=10000', 'profileInfo': {'profileId': '166440381', 'accountId': '1407167', 'webPropertyId': 'UA-1407167-1', 'internalWebPropertyId': '2464290', 'profileName': 'RED La Voz del Interior (Todos los sitios)', 'tableId': 'ga:166440381'}, 'containsSampledData': False, 'columnHeaders': [{'name': 'ga:EventLabel', 'columnType': 'DIMENSION', 'dataType': 'STRING'}, {'name': 'ga:TotalEvents', 'columnType': 'METRIC', 'dataType': 'INTEGER'}], 'totalsForAllResults': {'ga:TotalEvents': '0'}}
228---------------------------------------------------------------------------
229KeyError Traceback (most recent call last)
230<ipython-input-41-322f99c786d6> in <module>()
231 1 ############################################################################
232 2 get_data('https://suscripcion.lavoz.com.ar/suscripcion/P3M40OFF',
233----> 3 'SDFormBasico', '2020-02-10', '2020-02-16')
234 4 print(1)
235 5 get_data('https://suscripcion.lavoz.com.ar/suscripcion/P3M40OFF2',
236
237<ipython-input-40-959d057b0ff7> in get_data(urld, urlm, name, start, end)
238 1 def get_data(urld, urlm, name, start='7daysAgo', end='today'):
239----> 2 get_scroll_website(service, profile_id, urld, urlm, name, start, end)
240 3 get_clicks_website(service, profile_id, urld, urlm, name, start, end)
241
242<ipython-input-38-b6ee1d84f3e1> in get_scroll_website(service, profile_id, urld, urlm, name, start, end)
243 48
244 49 def get_scroll_website(service, profile_id, urld, urlm, name, start, end):
245---> 50 get_scroll_mobile(service, profile_id, urlm, name, start, end)
246 51 get_scroll_desktop(service, profile_id, urld, name, start, end)
247
248<ipython-input-38-b6ee1d84f3e1> in get_scroll_mobile(service, profile_id, url, name, start, end)
249 8 'max_results':10000
250 9 }
251---> 10 data = get_all_data_from_ga(service, profile_id, **kwargs)
252 11
253 12 import pandas as pd
254
255<ipython-input-36-2e81835db606> in get_all_data_from_ga(service, profile_id, **kwargs)
256 31 while(True):
257 32 kwargs['start_index'] = index
258---> 33 new_data = get_data_from_ga(service, profile_id, **kwargs)
259 34 data.extend(new_data)
260 35 index += 10000
261
262<ipython-input-36-2e81835db606> in get_data_from_ga(service, profile_id, **kwargs)
263 17 print(data)
264 18
265---> 19 return data['rows']
266 20
267 21 def get_all_data_from_ga(service, profile_id, **kwargs):
268
269KeyError: 'rows'