· 2 years ago · Dec 26, 2022, 09:30 PM
1import PyPDF2
2import re
3import openai
4
5# Step 1: Convert the PDF file into a text file using a Python script
6def pdf_to_text(pdf_file):
7 # Open the PDF file of your choice
8 with open(pdf_file, 'rb') as pdf:
9 reader = PyPDF2.PdfReader(pdf, strict=False)
10 # no_pages = len(reader.pages)
11 pdf_text = ""
12
13 for page in reader.pages:
14 content = page.extract_text()
15 pdf_text += content
16
17 #return the text
18 return pdf_text
19
20
21# Step 2: Slice the 70,000 + words into chunks
22def slice_text(text, chunk_size):
23 # Split the text into a list of words
24 words = re.split('\W+', text)
25
26 # Initialize a list to hold the chunks
27 chunks = []
28
29 # Iterate over the list of words
30 for i in range(0, len(words), chunk_size):
31 # Append a chunk of the specified size to the list
32 chunks.append(words[i:i+chunk_size])
33
34 # Return the list of chunks
35 return chunks
36
37# Step 3: Summarize each of the chunks
38def summarize_chunks(chunks):
39 # Initialize a list to hold the summaries
40 summaries = []
41
42 # Set the OpenAI API key
43 openai.api_key = "API KEY"
44
45 # Iterate over the chunks
46 for chunk in chunks:
47 i=0
48 # Use the OpenAI API to summarize the chunk
49 summary = openai.Completion.create(
50 engine="text-davinci-002",
51 prompt=f"Summarize this text:\n{chunk}",
52 max_tokens=1024,
53 temperature=0.5,
54 top_p=1,
55 frequency_penalty=1,
56 presence_penalty=1
57 )
58 i += 1
59 # Append the summary to the list
60 summaries.append(summary)
61
62 # Return the list of summaries
63 return summaries
64
65# Step 4: Merge all of the chunks into one text file
66def merge_summaries(summaries):
67 # Initialize a string to hold the merged summaries
68 merged_summaries = ""
69
70 # Iterate over the summaries
71 for summary in summaries:
72 # Add the summary to the merged summaries
73 merged_summaries += summary #!!!! I get an error here
74
75 # Return the merged summaries
76 return merged_summaries
77
78# Step 5: Write a new summary from the merged chunks of text
79def write_summary(summary_text, output_file):
80 # Use the OpenAI API to generate a summary from the merged text
81 summary = openai.Completion.create(
82 engine="text-davinci-002",
83 prompt=f"Write a summary of this text:\n{summary_text}",
84 max_tokens=1024,
85 temperature=0.5,
86 top_p=1,
87 frequency_penalty=1,
88 presence_penalty=1
89 )
90
91 # Open the output file in write mode
92 with open(output_file, 'w') as f:
93 # Write the summary to the file
94 f.write(summary.text)
95
96# Step 6: Generate key notes from the summary
97def generate_key_notes(summary_text):
98 # Use the OpenAI API to generate key notes from the summary text
99 key_notes = openai.Completion.create(
100 engine="text-davinci-002",
101 prompt=f"Generate key notes from this text:\n{summary_text}",
102 max_tokens=1024,
103 temperature=0.5,
104 top_p=1,
105 frequency_penalty=1,
106 presence_penalty=1
107 )
108
109 # Return the key notes
110 return key_notes.text
111
112# Step 7: Create a step-by-step guide from the key notes
113def create_step_by_step_guide(key_notes):
114 # Use the OpenAI API to generate a step-by-step guide from the key notes
115 step_by_step_guide = openai.Completion.create(
116 engine="text-davinci-002",
117 prompt=f"Create a step-by-step guide from these key notes:\n{key_notes}",
118 max_tokens=1024,
119 temperature=0.5,
120 top_p=1,
121 frequency_penalty=1,
122 presence_penalty=1
123 )
124
125 # Return the step-by-step guide
126 return step_by_step_guide.text
127
128# Step 8: Summarize the notes into the bare essentials of the book
129def summarize_to_bare_essentials(summary_text):
130 # Use the OpenAI API to summarize the text to the bare essentials
131 bare_essentials = openai.Completion.create(
132 engine="text-davinci-002",
133 prompt=f"Summarize this text to the bare essentials:\n{summary_text}",
134 max_tokens=1024,
135 temperature=0.5,
136 top_p=1,
137 frequency_penalty=1,
138 presence_penalty=1
139 )
140
141 # Return the bare essentials
142 return bare_essentials.text
143
144# Step 9: Write a blog post from the notes
145def write_blog_post(key_notes, output_file):
146 # Use the OpenAI API to generate a blog post from the key notes
147 blog_post = openai.Completion.create(
148 engine="text-davinci-002",
149 prompt=f"Write a blog post from these key notes:\n{key_notes}",
150 max_tokens=1024,
151 temperature=0.5,
152 top_p=1,
153 frequency_penalty=1,
154 presence_penalty=1
155 )
156
157 # Open the output file in write mode
158 with open(output_file, 'w') as f:
159 # Write the blog post to the file
160 f.write(blog_post.text)
161
162# Step 10: Generate some mid-journey prompts from the notes
163def generate_mid_journey_prompts(key_notes):
164 # Use the OpenAI API to generate mid-journey prompts from the key notes
165 prompts = openai.Completion.create(
166 engine="text-davinci-002",
167 prompt=f"Generate some mid-journey prompts from these key notes:\n{key_notes}",
168 max_tokens=1024,
169 temperature=0.5,
170 top_p=1,
171 frequency_penalty=1,
172 presence_penalty=1
173 )
174
175 # Return the prompts
176 return prompts.text
177
178# Read the PDF file
179pdf_file = "mlops.pdf"
180text = pdf_to_text(pdf_file)
181
182# Slice the text into chunks
183chunks = slice_text(text, 500)
184print ("Slicing completed. Summarizing chunks...")
185
186# Summarize the chunks
187summaries = summarize_chunks(chunks)
188print ("Summarizing completed. Merging summaries...")
189
190# Merge the summaries
191summary_text = merge_summaries(summaries)
192
193# Write the summary to a file
194output_file = "summary.txt"
195write_summary(summary_text, output_file)
196
197# Generate key notes from the summary
198key_notes = generate_key_notes(summary_text)
199
200# Create a step-by-step guide from the key notes
201step_by_step_guide = create_step_by_step_guide(key_notes)
202
203# Generate key notes from the summary
204key_notes = generate_key_notes(summary_text)
205
206# Create a step-by-step guide from the key notes
207step_by_step_guide = create_step_by_step_guide(key_notes)
208
209# Summarize the notes to the bare essentials
210bare_essentials = summarize_to_bare_essentials(summary_text)
211
212# Write a blog post from the notes
213output_file = "blog_post.txt"
214write_blog_post(key_notes, output_file)
215
216# Generate some mid-journey prompts from the notes
217prompts = generate_mid_journey_prompts(key_notes)
218
219# Print the step-by-step guide, bare essentials, and prompts
220print(step_by_step_guide)
221print(bare_essentials)
222print(prompts)
223
224