7dLeSp51

· 6 years ago · May 27, 2019, 12:10 AM
1# ----- CONFIGURE YOUR EDITOR TO USE 4 SPACES PER TAB ----- #
2import sys, os
3sys.path.append(os.path.join(os.path.split(os.path.abspath(__file__))[0], 'lib'))
4import pymysql as db
5import settings
6
7
8def connection():
9    ''' User this function to create your connections '''
10    con = db.connect(
11        settings.mysql_host,
12        settings.mysql_user,
13        settings.mysql_passwd,
14        settings.mysql_schema)
15
16    return con
17
18def sortFourth(val):
19    return val[3]
20
21def Classify_Create_Final(topn,category_list):
22    final=[]
23    # Add table header
24    tmpt=("title","class","subclass", "weightsum")
25    final.append(tmpt)
26
27    # For each article list containing the first-topn categories of the article
28    for article_block in category_list:
29        # Add all the category data at first and then if there are blanks fill spaces so that for every article 7 rows are displayed (optional)
30        for i in range(0,len(article_block)):
31            tmpt=(article_block[i][0],article_block[i][1],article_block[i][2],article_block[i][3])
32            final.append(tmpt)
33        # Add (topn-category amount) blank values
34        for i in range(len(article_block),topn):
35            tmpt=("----","----","----","----")
36            final.append(tmpt)
37
38
39
40
41    return final
42
43
44def classify(topn):
45
46    # Create a new connection
47    con=connection()
48
49    # Create a cursor on the connection
50    cur=con.cursor()
51
52    # Query that returns the title and the summary for every article without a category (the first is with join , the second without join)
53    # que="SELECT art.title,art.summary FROM articles art LEFT JOIN article_has_class ahc ON (ahc.articles_id=art.id) WHERE (ahc.articles_id IS NULL)"
54    que="SELECT title , summary FROM articles WHERE articles.id NOT IN (SELECT articles.id FROM articles , article_has_class WHERE (article_has_class.articles_id=articles.id))"
55    cur.execute(que)
56
57    data=cur.fetchall()
58
59    art_category=[]
60
61    # For each article that is returned by the query , take the summary(sum) and split it into single words
62    for article_row in data:
63        category_list=[]
64        title=article_row[0]
65        sum=article_row[1]
66        words=sum.split()
67        # For each word run the following query which returns all categories that are linked with it (and their weights)
68        for w in words:
69            que="SELECT * FROM classes WHERE (term=%s)"
70            input=(w)
71            row_c=cur.execute(que,input)
72
73            if row_c > 0:
74                category_res=cur.fetchall()
75                # For each category returned by the query prepare the data and create a temporary list
76                for category_row in category_res:
77                    class1=category_row[0]
78                    subclass=category_row[1]
79                    term=category_row[2]
80                    weight=float(category_row[3])
81                    tmpt=[title,class1,subclass,weight]
82
83                    # Search if the temporary list's category already exists in the general array
84                    found=False
85                    for tmpcat in category_list:
86                        cl=tmpcat[1]
87                        subcl=tmpcat[2]
88                        # If it exists , simply add the weight of the current temporary list to the one that is inside the general array
89                        if cl==class1 and subcl==subclass:
90                            tmpcat[3]=weight+tmpcat[3]
91                            found=True
92                            break
93
94                    # If it doesn't exist , insert a new category
95                    if found==False:
96                        category_list.append(tmpt)
97
98        # After all the insertions , sort the general array by the 'weightsum' in descending order
99        category_list.sort(key = sortFourth , reverse=True)
100        # Slice the general array into its first 'topn' elements and insert it into the (almost) final 3D matrix
101        art_category.append(category_list[:int(topn)])
102
103
104
105    final_tuple=Classify_Create_Final(int(topn),art_category)
106    return final_tuple
107
108
109def updateweight(class1,subclass,weight):
110
111   # Create a new connection
112
113    con=connection()
114
115    # Create a cursor on the connection
116    cur=con.cursor()
117
118
119
120    return [("result",),]
121
122
123def selectTopNClasses(fromdate, todate, n ):
124
125    # Create a new connection
126
127    con=connection()
128
129    # Create a cursor on the connection
130    cur=con.cursor()
131
132    que="""select article_has_class.class , article_has_class.subclass, count(article_has_class.class)
133            from article_has_class , articles
134            where articles.id=article_has_class.articles_id in (
135                select articles.id
136                from articles , article_has_class , classes
137                where  ( classes.class = article_has_class.class and
138	               classes.subclass = article_has_class.subclass and
139                   article_has_class.articles_id = articles.id and
140                   articles.date > %s and articles.date < %s
141
142                        )
143                                                                )
144            group by article_has_class.class
145            order by count(class) desc
146            limit %d"""
147    input=( fromdate, todate,int(n) )
148
149
150    cur.execute(que,input)
151    answer=cur.fetchall()
152    return [("class","subclass", "count"),results]
153
154def countArticles(class1,subclass):
155    # Create a new connection
156    con=connection()
157    # Create a cursor on the connection
158    cur=con.cursor()
159
160    query="""SELECT COUNT(*)
161            FROM (SELECT art.title
162	        from classes c
163	        inner join article_has_class ahc on (ahc.class=c.class AND ahc.subclass=c.subclass)
164	        inner join articles art on (art.id=ahc.articles_id)
165	        where (c.class=%s AND c.subclass=%s)
166	        group by art.id) as t1"""
167
168    input=(class1,subclass)
169
170    cur.execute(query,input)
171
172    ans=cur.fetchall()
173
174    return [("count",),ans[0]]
175
176
177def findSimilarArticles(articleId,n):
178    # Create a new connection
179    con=connection()
180    # Create a cursor on the connection
181    cur=con.cursor()
182
183
184
185    return [("articleid",),]