· 6 years ago · May 27, 2019, 12:10 AM
1# ----- CONFIGURE YOUR EDITOR TO USE 4 SPACES PER TAB ----- #
2import sys, os
3sys.path.append(os.path.join(os.path.split(os.path.abspath(__file__))[0], 'lib'))
4import pymysql as db
5import settings
6
7
8def connection():
9 ''' User this function to create your connections '''
10 con = db.connect(
11 settings.mysql_host,
12 settings.mysql_user,
13 settings.mysql_passwd,
14 settings.mysql_schema)
15
16 return con
17
18def sortFourth(val):
19 return val[3]
20
21def Classify_Create_Final(topn,category_list):
22 final=[]
23 # Add table header
24 tmpt=("title","class","subclass", "weightsum")
25 final.append(tmpt)
26
27 # For each article list containing the first-topn categories of the article
28 for article_block in category_list:
29 # Add all the category data at first and then if there are blanks fill spaces so that for every article 7 rows are displayed (optional)
30 for i in range(0,len(article_block)):
31 tmpt=(article_block[i][0],article_block[i][1],article_block[i][2],article_block[i][3])
32 final.append(tmpt)
33 # Add (topn-category amount) blank values
34 for i in range(len(article_block),topn):
35 tmpt=("----","----","----","----")
36 final.append(tmpt)
37
38
39
40
41 return final
42
43
44def classify(topn):
45
46 # Create a new connection
47 con=connection()
48
49 # Create a cursor on the connection
50 cur=con.cursor()
51
52 # Query that returns the title and the summary for every article without a category (the first is with join , the second without join)
53 # que="SELECT art.title,art.summary FROM articles art LEFT JOIN article_has_class ahc ON (ahc.articles_id=art.id) WHERE (ahc.articles_id IS NULL)"
54 que="SELECT title , summary FROM articles WHERE articles.id NOT IN (SELECT articles.id FROM articles , article_has_class WHERE (article_has_class.articles_id=articles.id))"
55 cur.execute(que)
56
57 data=cur.fetchall()
58
59 art_category=[]
60
61 # For each article that is returned by the query , take the summary(sum) and split it into single words
62 for article_row in data:
63 category_list=[]
64 title=article_row[0]
65 sum=article_row[1]
66 words=sum.split()
67 # For each word run the following query which returns all categories that are linked with it (and their weights)
68 for w in words:
69 que="SELECT * FROM classes WHERE (term=%s)"
70 input=(w)
71 row_c=cur.execute(que,input)
72
73 if row_c > 0:
74 category_res=cur.fetchall()
75 # For each category returned by the query prepare the data and create a temporary list
76 for category_row in category_res:
77 class1=category_row[0]
78 subclass=category_row[1]
79 term=category_row[2]
80 weight=float(category_row[3])
81 tmpt=[title,class1,subclass,weight]
82
83 # Search if the temporary list's category already exists in the general array
84 found=False
85 for tmpcat in category_list:
86 cl=tmpcat[1]
87 subcl=tmpcat[2]
88 # If it exists , simply add the weight of the current temporary list to the one that is inside the general array
89 if cl==class1 and subcl==subclass:
90 tmpcat[3]=weight+tmpcat[3]
91 found=True
92 break
93
94 # If it doesn't exist , insert a new category
95 if found==False:
96 category_list.append(tmpt)
97
98 # After all the insertions , sort the general array by the 'weightsum' in descending order
99 category_list.sort(key = sortFourth , reverse=True)
100 # Slice the general array into its first 'topn' elements and insert it into the (almost) final 3D matrix
101 art_category.append(category_list[:int(topn)])
102
103
104
105 final_tuple=Classify_Create_Final(int(topn),art_category)
106 return final_tuple
107
108
109def updateweight(class1,subclass,weight):
110
111 # Create a new connection
112
113 con=connection()
114
115 # Create a cursor on the connection
116 cur=con.cursor()
117
118
119
120 return [("result",),]
121
122
123def selectTopNClasses(fromdate, todate, n ):
124
125 # Create a new connection
126
127 con=connection()
128
129 # Create a cursor on the connection
130 cur=con.cursor()
131
132 que="""select article_has_class.class , article_has_class.subclass, count(article_has_class.class)
133 from article_has_class , articles
134 where articles.id=article_has_class.articles_id in (
135 select articles.id
136 from articles , article_has_class , classes
137 where ( classes.class = article_has_class.class and
138 classes.subclass = article_has_class.subclass and
139 article_has_class.articles_id = articles.id and
140 articles.date > %s and articles.date < %s
141
142 )
143 )
144 group by article_has_class.class
145 order by count(class) desc
146 limit %d"""
147 input=( fromdate, todate,int(n) )
148
149
150 cur.execute(que,input)
151 answer=cur.fetchall()
152 return [("class","subclass", "count"),results]
153
154def countArticles(class1,subclass):
155 # Create a new connection
156 con=connection()
157 # Create a cursor on the connection
158 cur=con.cursor()
159
160 query="""SELECT COUNT(*)
161 FROM (SELECT art.title
162 from classes c
163 inner join article_has_class ahc on (ahc.class=c.class AND ahc.subclass=c.subclass)
164 inner join articles art on (art.id=ahc.articles_id)
165 where (c.class=%s AND c.subclass=%s)
166 group by art.id) as t1"""
167
168 input=(class1,subclass)
169
170 cur.execute(query,input)
171
172 ans=cur.fetchall()
173
174 return [("count",),ans[0]]
175
176
177def findSimilarArticles(articleId,n):
178 # Create a new connection
179 con=connection()
180 # Create a cursor on the connection
181 cur=con.cursor()
182
183
184
185 return [("articleid",),]