· 5 years ago · Jun 12, 2020, 05:50 PM
1# required imports
2from sklearn import tree
3from sklearn.feature_extraction.text import CountVectorizer
4import http.client
5import json
6import requests
7
8# API key from https://developers.themoviedb.org/3/
9APIKey = "API_KEY"
10
11
12
13
14
15
16# http get request
17# returns overview in a string
18def getData(name):
19 httpRequest = "https://api.themoviedb.org/3/search/movie?include_adult=false&page=1&query=" + name + "&language=en-US&api_key=" + APIKey
20 response = requests.get(httpRequest)
21 data = response.json()
22 a = data["results"][0]["overview"]
23 return a
24
25
26
27
28
29
30
31# movie names
32# These variables can be defined anywhere above this line
33good_movies = []
34bad_movies = []
35testing_movies = []
36
37
38
39# gets user inputs
40# ================
41# NOTICE
42# ================
43# breaks if
44# when asking for a number inputs in a string
45# when asking for a movie name and inputs in a non-existant movie title
46
47# asks user for a number
48askedInput = input("How many good movies do you want to input? ").strip()
49for x in range(int(askedInput)):
50 # asks user for a movie name
51 movie = input("What is a movie you like? ").strip()
52 good_movies.append(movie)
53
54# asks user for a number
55askedInput = input("\nHow many bad movies do you want to input? ")
56for x in range(int(askedInput)):
57 # asks user for a movie name
58 movie = input("What is a movie you don't like? ").strip()
59 bad_movies.append(movie)
60
61# asks user for a number
62askedInput = input("\nHow many movies do you want to test? ").strip()
63for x in range(int(askedInput)):
64 # asks user for a movie name
65 movie = input("What is a movie you would like to watch? ").strip()
66 testing_movies.append(movie)
67
68
69
70
71
72
73
74
75
76# movie overviews
77# These variables can be defined anywhere above this line
78overview_good = []
79overview_bad = []
80overview_unsure = []
81
82
83
84# gets overview for each movie and puts it in respective array
85for movie in good_movies:
86 d = getData(movie)
87 overview_good.append(d)
88for movie in bad_movies:
89 d = getData(movie)
90 overview_bad.append(d)
91for movie in testing_movies:
92 d = getData(movie)
93 overview_unsure.append(d)
94
95
96
97
98
99
100
101
102
103
104
105
106
107# machine learning stuff
108training_texts = good_movies + bad_movies
109training_labels = ["good"] * len(overview_good) + ["bad"] * len(overview_bad)
110
111vectorizer = CountVectorizer()
112vectorizer.fit(training_texts)
113training_vectors = vectorizer.transform(training_texts)
114testing_vectors = vectorizer.transform(overview_unsure)
115classifier = tree.DecisionTreeClassifier()
116classifier.fit(training_vectors, training_labels)
117Results = classifier.predict(testing_vectors)
118
119# exports to tree.dot file
120tree.export_graphviz(
121 classifier,
122 out_file='tree.dot',
123 feature_names=vectorizer.get_feature_names(),
124 class_names=["good", "bad"]
125)
126
127# prints prediction for each test movie
128i = 0
129for movie in testing_movies:
130 print(movie + " is a " + Results[i] + " movie")
131 i = i + 1