Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -178,17 +178,15 @@ def process_dataset(
|
|
178 |
stats_file_path = f.name
|
179 |
json.dump(stats_dict, f, indent=2)
|
180 |
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
- **
|
188 |
-
|
189 |
-
|
190 |
-
- **Median Score:** `{median_score_str}`
|
191 |
-
""")
|
192 |
|
193 |
yield update_log("Process finished successfully!")
|
194 |
|
@@ -222,28 +220,21 @@ def upload_to_hub(
|
|
222 |
if plot_file and os.path.exists(plot_file):
|
223 |
upload_file(path_or_fileobj=plot_file, path_in_repo="quality_distribution.png", repo_id=repo_id, repo_type="dataset", token=hf_token)
|
224 |
|
225 |
-
#
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
```
|
241 |
-
"""
|
242 |
-
# Sonra dedent ve format işlemlerini ayrı ayrı yapıyoruz.
|
243 |
-
readme_content = dedent(readme_template).format(
|
244 |
-
repo_name=repo_id.split('/')[-1],
|
245 |
-
repo_id=repo_id
|
246 |
-
)
|
247 |
|
248 |
upload_file(path_or_fileobj=readme_content.encode(), path_in_repo="README.md", repo_id=repo_id, repo_type="dataset", token=hf_token)
|
249 |
progress(1.0, "Done!")
|
|
|
178 |
stats_file_path = f.name
|
179 |
json.dump(stats_dict, f, indent=2)
|
180 |
|
181 |
+
# --- KESİN ÇÖZÜM: Metni satır satır bir liste ile oluşturmak ---
|
182 |
+
summary_lines = [
|
183 |
+
"#### ✅ Scoring Completed!",
|
184 |
+
f"- **Dataset:** `{model_id}`",
|
185 |
+
f"- **Processed Samples:** `{actual_samples:,}`",
|
186 |
+
f"- **Mean Score:** `{stats_dict['statistics']['mean']:.3f}`",
|
187 |
+
f"- **Median Score:** `{stats_dict['statistics']['median']:.3f}`"
|
188 |
+
]
|
189 |
+
summary_md = "\n".join(summary_lines)
|
|
|
|
|
190 |
|
191 |
yield update_log("Process finished successfully!")
|
192 |
|
|
|
220 |
if plot_file and os.path.exists(plot_file):
|
221 |
upload_file(path_or_fileobj=plot_file, path_in_repo="quality_distribution.png", repo_id=repo_id, repo_type="dataset", token=hf_token)
|
222 |
|
223 |
+
# --- KESİN ÇÖZÜM: README metnini de satır satır bir liste ile oluşturmak ---
|
224 |
+
readme_lines = [
|
225 |
+
"---",
|
226 |
+
"license: apache-2.0",
|
227 |
+
"---",
|
228 |
+
f"# Quality-Scored Dataset: {repo_id.split('/')[-1]}",
|
229 |
+
"This dataset was scored for quality using the [Dataset Quality Scorer Space](https://huggingface.co/spaces/ggml-org/dataset-quality-scorer).",
|
230 |
+
"",
|
231 |
+
"## Usage",
|
232 |
+
"```python",
|
233 |
+
"from datasets import load_dataset",
|
234 |
+
f'dataset = load_dataset("{repo_id}", split="train")',
|
235 |
+
"```"
|
236 |
+
]
|
237 |
+
readme_content = "\n".join(readme_lines)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
|
239 |
upload_file(path_or_fileobj=readme_content.encode(), path_in_repo="README.md", repo_id=repo_id, repo_type="dataset", token=hf_token)
|
240 |
progress(1.0, "Done!")
|