Spaces:
Runtime error
Runtime error
File size: 3,218 Bytes
fa7ab06 8aa24e3 fa7ab06 e5d260a fa7ab06 e5d260a 8aa24e3 e5d260a 8aa24e3 fa7ab06 e5d260a fa7ab06 e5d260a fa7ab06 e5d260a fa7ab06 8aa24e3 e5d260a 8aa24e3 fa7ab06 8aa24e3 e5d260a fa7ab06 8aa24e3 e5d260a 8aa24e3 fa7ab06 e5d260a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# standard
import configparser
import os
import time
import re
# 3rd party
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain import LLMChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate
# read config
config = configparser.ConfigParser()
config.read('config.ini')
# read config variables
if not os.getenv("OPENAI_API_KEY"):
os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key']
# LangChain Config
# llm
llm = OpenAI(temperature=0)
# prompt
prompt = PromptTemplate(
template="Write a concise summary of the following: {transcript}",
input_variables=['transcript']
)
# chain
chain = LLMChain(
prompt=prompt,
llm=llm,
verbose=False
)
def load_transcript(input_file):
# Google Meet Transcripts have a header which we don't want to be summarized
header_lines = 5
file_text = input_file.readlines()
head = file_text[:header_lines]
transcript = "".join(file_text[header_lines:])
return head, transcript
def create_meeting_notes(transcript_file):
# read config variables
# if not os.getenv("OPENAI_API_KEY"):
# os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key']
# transcript_filepath = config['OPTIONAL']['transcript-filepath']
# notes_filepath = config['OPTIONAL']['notes-filepath']
head, transcript = load_transcript(transcript_file)
# split the transcript on the 5-min timestamps
regex_pattern = r"[0-9]{2}:[0-9]{2}:0{2}"
five_min_chunks = re.split(regex_pattern, transcript)
# create a textsplitter to subdivide those chunks into appropriately sized chunks.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
# list the meeting time and the chunks associated with it
timestamped_summaries = []
print(f"Summarizing {len(five_min_chunks)*5} minute meeting")
start_time = time.time()
# summarize the
for i, five_minutes_chunk in enumerate(five_min_chunks):
timestamp = time.strftime('%H:%M:%S', time.gmtime(60 * 5 * i))
sub_chunks = text_splitter.split_text(five_minutes_chunk)
summaries = []
for j, chunk in enumerate(sub_chunks, 1):
summaries.append(chain.run(chunk))
print(f"{timestamp}: Chunk {j}/{len(sub_chunks)}")
timestamped_summaries.append((timestamp, summaries))
elapsed_time = time.time() - start_time
minutes = elapsed_time // 60
print(f"Summarized first {5 * (i+1)} minutes of meeting, {minutes:.0f} minutes {elapsed_time - 60 * minutes:.2f} seconds elapsed")
first_line = re.split(r"[()]", head[0])
# Transcript Notes
meeting_notes = f'''# {first_line[0]}
{first_line[1]}
## Attendees
{head[2]}## Meeting Notes
'''
for timestamp, summaries in timestamped_summaries:
meeting_notes += f'### {timestamp}\n'
for summary in summaries:
meeting_notes += f"- {summary.strip()}\n"
meeting_notes += "\nEnd of Meeting"
return meeting_notes
# with open(notes_filepath, 'w+') as f:
# f.write(meeting_notes)
# print(f"Export to file {notes_filepath} completed")
|