C10X commited on
Commit
b12c475
·
verified ·
1 Parent(s): 7f5dfa7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -33
app.py CHANGED
@@ -178,17 +178,15 @@ def process_dataset(
178
  stats_file_path = f.name
179
  json.dump(stats_dict, f, indent=2)
180
 
181
- mean_score_str = f"{stats_dict['statistics']['mean']:.3f}"
182
- median_score_str = f"{stats_dict['statistics']['median']:.3f}"
183
- samples_str = f"{actual_samples:,}"
184
-
185
- summary_md = dedent(f"""
186
- #### Scoring Completed!
187
- - **Dataset:** `{model_id}`
188
- - **Processed Samples:** `{samples_str}`
189
- - **Mean Score:** `{mean_score_str}`
190
- - **Median Score:** `{median_score_str}`
191
- """)
192
 
193
  yield update_log("Process finished successfully!")
194
 
@@ -222,28 +220,21 @@ def upload_to_hub(
222
  if plot_file and os.path.exists(plot_file):
223
  upload_file(path_or_fileobj=plot_file, path_in_repo="quality_distribution.png", repo_id=repo_id, repo_type="dataset", token=hf_token)
224
 
225
- # ==============================================================================
226
- # --- HATAYI GİDEREN KESİN ÇÖZÜM BURADA ---
227
- # Önce formatlama içermeyen bir şablon oluşturuyoruz.
228
- # ==============================================================================
229
- readme_template = """
230
- ---
231
- license: apache-2.0
232
- ---
233
- # Quality-Scored Dataset: {repo_name}
234
- This dataset was scored for quality using the [Dataset Quality Scorer Space](https://huggingface.co/spaces/ggml-org/dataset-quality-scorer).
235
- ![Quality Distribution](quality_distribution.png)
236
- ## Usage
237
- ```python
238
- from datasets import load_dataset
239
- dataset = load_dataset("{repo_id}", split="train")
240
- ```
241
- """
242
- # Sonra dedent ve format işlemlerini ayrı ayrı yapıyoruz.
243
- readme_content = dedent(readme_template).format(
244
- repo_name=repo_id.split('/')[-1],
245
- repo_id=repo_id
246
- )
247
 
248
  upload_file(path_or_fileobj=readme_content.encode(), path_in_repo="README.md", repo_id=repo_id, repo_type="dataset", token=hf_token)
249
  progress(1.0, "Done!")
 
178
  stats_file_path = f.name
179
  json.dump(stats_dict, f, indent=2)
180
 
181
+ # --- KESİN ÇÖZÜM: Metni satır satır bir liste ile oluşturmak ---
182
+ summary_lines = [
183
+ "#### Scoring Completed!",
184
+ f"- **Dataset:** `{model_id}`",
185
+ f"- **Processed Samples:** `{actual_samples:,}`",
186
+ f"- **Mean Score:** `{stats_dict['statistics']['mean']:.3f}`",
187
+ f"- **Median Score:** `{stats_dict['statistics']['median']:.3f}`"
188
+ ]
189
+ summary_md = "\n".join(summary_lines)
 
 
190
 
191
  yield update_log("Process finished successfully!")
192
 
 
220
  if plot_file and os.path.exists(plot_file):
221
  upload_file(path_or_fileobj=plot_file, path_in_repo="quality_distribution.png", repo_id=repo_id, repo_type="dataset", token=hf_token)
222
 
223
+ # --- KESİN ÇÖZÜM: README metnini de satır satır bir liste ile oluşturmak ---
224
+ readme_lines = [
225
+ "---",
226
+ "license: apache-2.0",
227
+ "---",
228
+ f"# Quality-Scored Dataset: {repo_id.split('/')[-1]}",
229
+ "This dataset was scored for quality using the [Dataset Quality Scorer Space](https://huggingface.co/spaces/ggml-org/dataset-quality-scorer).",
230
+ "![Quality Distribution](quality_distribution.png)",
231
+ "## Usage",
232
+ "```python",
233
+ "from datasets import load_dataset",
234
+ f'dataset = load_dataset("{repo_id}", split="train")',
235
+ "```"
236
+ ]
237
+ readme_content = "\n".join(readme_lines)
 
 
 
 
 
 
 
238
 
239
  upload_file(path_or_fileobj=readme_content.encode(), path_in_repo="README.md", repo_id=repo_id, repo_type="dataset", token=hf_token)
240
  progress(1.0, "Done!")