Spaces:
Runtime error
Runtime error
take off dataset update
Browse files- app_dialogue.py +0 -57
app_dialogue.py
CHANGED
|
@@ -304,60 +304,6 @@ def model_inference(
|
|
| 304 |
print("Success - generated the following text:", acc_text)
|
| 305 |
print("-----")
|
| 306 |
|
| 307 |
-
|
| 308 |
-
def csv_to_hf_dataset(csv_file):
|
| 309 |
-
df = pd.read_csv(csv_file)
|
| 310 |
-
|
| 311 |
-
FEATURES = datasets.Features(
|
| 312 |
-
{
|
| 313 |
-
"images": datasets.Sequence(datasets.Image(decode=True)),
|
| 314 |
-
"conversation": [
|
| 315 |
-
{
|
| 316 |
-
"user": datasets.Value("string"),
|
| 317 |
-
"assistant": datasets.Value("string"),
|
| 318 |
-
}
|
| 319 |
-
],
|
| 320 |
-
}
|
| 321 |
-
)
|
| 322 |
-
|
| 323 |
-
def parse_and_download(data_row):
|
| 324 |
-
# Parse the JSON-like structure in the second column
|
| 325 |
-
discussion_data = json.loads(data_row[1].replace('""', '"'))
|
| 326 |
-
|
| 327 |
-
images = []
|
| 328 |
-
conversation = []
|
| 329 |
-
for entry in discussion_data:
|
| 330 |
-
if isinstance(entry[0], dict) and 'file' in entry[0]:
|
| 331 |
-
# Get images
|
| 332 |
-
image = load_image_from_url(entry[0]['file'])
|
| 333 |
-
images.append(image)
|
| 334 |
-
elif isinstance(entry, list):
|
| 335 |
-
# Get conversations
|
| 336 |
-
conversation.append({"user": entry[0], "assistant": entry[1]})
|
| 337 |
-
|
| 338 |
-
return images, conversation
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
# Apply parsing and downloading function
|
| 342 |
-
df['processed_data'] = df.apply(parse_and_download, axis=1)
|
| 343 |
-
|
| 344 |
-
# Create a Hugging Face dataset
|
| 345 |
-
data_dict = {
|
| 346 |
-
"images": df['processed_data'].apply(lambda x: x[0]),
|
| 347 |
-
"conversation": df['processed_data'].apply(lambda x: x[1])
|
| 348 |
-
}
|
| 349 |
-
|
| 350 |
-
dataset = datasets.Dataset.from_dict(data_dict, features=FEATURES)
|
| 351 |
-
return dataset
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
def update_dope_problematic_dataset_fn():
|
| 355 |
-
dope_dataset = csv_to_hf_dataset("gradio_dope_data_points/log.csv")
|
| 356 |
-
dope_dataset.push_to_hub("HuggingFaceM4/dope_chatty_dataset", private=True)
|
| 357 |
-
problematic_dataset = csv_to_hf_dataset("gradio_problematic_data_points/log.csv")
|
| 358 |
-
problematic_dataset.push_to_hub("HuggingFaceM4/problematic_chatty_dataset", private=True)
|
| 359 |
-
|
| 360 |
-
|
| 361 |
# Hyper-parameters for generation
|
| 362 |
max_new_tokens = gr.Slider(
|
| 363 |
minimum=8,
|
|
@@ -535,8 +481,5 @@ with gr.Blocks(fill_height=True, css=""".gradio-container .avatar-container {hei
|
|
| 535 |
None,
|
| 536 |
preprocess=False,
|
| 537 |
)
|
| 538 |
-
update_dope_problematic_dataset.click(
|
| 539 |
-
fn=update_dope_problematic_dataset_fn,
|
| 540 |
-
)
|
| 541 |
|
| 542 |
demo.launch()
|
|
|
|
| 304 |
print("Success - generated the following text:", acc_text)
|
| 305 |
print("-----")
|
| 306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
# Hyper-parameters for generation
|
| 308 |
max_new_tokens = gr.Slider(
|
| 309 |
minimum=8,
|
|
|
|
| 481 |
None,
|
| 482 |
preprocess=False,
|
| 483 |
)
|
|
|
|
|
|
|
|
|
|
| 484 |
|
| 485 |
demo.launch()
|