· 6 years ago · Jun 12, 2019, 08:38 AM
1def processing(request, id=None):
2 template = 'kmeans/processing.html'
3 q = get_object_or_404(FileDetails, id = id, user = request.user)
4 print(q)
5 c = q.data_columns.replace(',', '\n').replace('[', '').replace(']', '').replace("'", '') # cleaning data columns
6
7 import os
8 media_path = settings.MEDIA_ROOT # reading media path
9 file_path = os.path.join(media_path, q.file_name) # makeing file url by joining media_path & filtered file name
10 data = pd.read_csv(file_path) # reading selected file
11 data['id'] = range(0, len(data)) # create unique id for csv file
12 head = data.head()
13 km = KMeans(n_clusters=3) # defining no. of cluster
14
15 if request.method == 'POST':
16 v = request.POST.getlist('sel') # taking file column as input for clustering
17 print(v)
18 X_std = StandardScaler().fit_transform(data[v]) # data scaling
19 pca = PCA(n_components=2) # applying pca
20 x_pca = pca.fit_transform(X_std) # fitting & tramsformation
21 cl_data = pd.DataFrame(columns=['id','PCA1', 'PCA2', 'cluster']) # create new dataframe
22 cl_data['id'] = data['id'] # adding id
23 cl_data['PCA1'] = [xx[0] for xx in x_pca] # adding pca1
24 cl_data['PCA2'] = [xx[1] for xx in x_pca] # adding pca2
25 y_predicted = km.fit_predict(x_pca) # making predicting
26 cl_data['cluster'] = y_predicted # adding prediction to cluster
27 data['cluster'] = y_predicted
28 # creating a table dynamically
29 from django.db import connection
30 cur = connection.cursor()
31
32 cl = list(data.columns) # saving data columns in a list
33 clms = [re.sub('\W+', '',i) for i in cl] # cleaning column names
34 import datetime
35 fl = re.sub('\W+', '',q.file_name+str(datetime.datetime.now().strftime("%X")))
36 # using session to store table name
37 request.session['fl'] = fl
38
39 # creating query dynamically
40 def cr(s):
41 return s + ' varchar(255)'
42 zz = "CREATE TABLE IF NOT EXISTS "+fl+" (" + ', '.join(map(cr, clms)) + ')' # creating query
43 cur.execute(zz) # executing query
44
45 # setting up database
46 database_url = 'mysql+pymysql://{user}:{password}@127.0.0.1/{database_name}'.format(
47 user='root',
48 password='user1234',
49 database_name="clustered",
50 )
51 engine = create_engine(database_url, echo=False)
52 data.to_sql(name=fl, con=engine, if_exists='append', index=False) # saving data into a table
53
54 # inserting data into database table
55 for uid, pca1, pca2, cluster in zip(cl_data['id'], cl_data['PCA1'], cl_data['PCA2'], cl_data['cluster']):
56 ClusterData.objects.create(file_id=q, u_id=uid, pca1=pca1, pca2=pca2, cluster=cluster)
57
58 FileDetails.objects.filter(user= request.user).filter(file_name=q.file_name).update(status = True, sel_columns = v, table_name = fl)
59
60 return HttpResponse('<h2>Your file has been successfully proceed.</h2>')
61
62 context = {'q':q, 'columns':c.split(), 'head':head.to_html}
63 return render(request, template, context)
64
65# dashboard function
66def dashboard(request, id=None):
67 template = 'kmeans/dashboard.html'
68 request.session['fid'] = id
69 q = FileDetails.objects.filter(id = id)
70
71 for qq in q:
72 aa = qq.sel_columns
73 xx = aa.replace(',', '\n').replace('[', '').replace(']', '').replace("'", '')
74 context = {'q':q, 'col':xx.split()}
75 return render(request, template, context)