Spaces:
Running
Running
Update src/aibom_generator/generator.py
Browse files- src/aibom_generator/generator.py +258 -198
src/aibom_generator/generator.py
CHANGED
@@ -78,35 +78,12 @@ class AIBOMGenerator:
|
|
78 |
# Calculate final score with industry-neutral approach if enabled
|
79 |
final_score = calculate_completeness_score(aibom, validate=True, use_best_practices=use_best_practices)
|
80 |
|
81 |
-
#
|
82 |
if "metadata" in aibom and "properties" not in aibom["metadata"]:
|
83 |
aibom["metadata"]["properties"] = []
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
# Add score information
|
88 |
-
aibom["metadata"]["properties"].append({"name": "aibom:quality-score", "value": str(final_score["total_score"])})
|
89 |
-
aibom["metadata"]["properties"].append({"name": "aibom:quality-breakdown", "value": json.dumps(final_score["section_scores"])})
|
90 |
-
aibom["metadata"]["properties"].append({"name": "aibom:max-scores", "value": json.dumps(final_score["max_scores"])})
|
91 |
-
|
92 |
-
# Add completeness profile information if available (from industry-neutral approach)
|
93 |
-
if use_best_practices and "completeness_profile" in final_score:
|
94 |
-
aibom["metadata"]["properties"].append({
|
95 |
-
"name": "aibom:completeness-profile",
|
96 |
-
"value": final_score["completeness_profile"]["name"]
|
97 |
-
})
|
98 |
-
aibom["metadata"]["properties"].append({
|
99 |
-
"name": "aibom:completeness-description",
|
100 |
-
"value": final_score["completeness_profile"]["description"]
|
101 |
-
})
|
102 |
-
|
103 |
-
# Add AI enhancement information
|
104 |
-
if ai_enhanced:
|
105 |
-
aibom["metadata"]["properties"].append({"name": "aibom:ai-enhanced", "value": "true"})
|
106 |
-
aibom["metadata"]["properties"].append({"name": "aibom:ai-model", "value": ai_model_name})
|
107 |
-
aibom["metadata"]["properties"].append({"name": "aibom:original-score", "value": str(original_score["total_score"])})
|
108 |
-
aibom["metadata"]["properties"].append({"name": "aibom:score-improvement",
|
109 |
-
"value": str(round(final_score["total_score"] - original_score["total_score"], 2))})
|
110 |
|
111 |
if output_file:
|
112 |
with open(output_file, 'w') as f:
|
@@ -137,27 +114,38 @@ class AIBOMGenerator:
|
|
137 |
"version": 1,
|
138 |
"metadata": {
|
139 |
"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
|
140 |
-
"tools":
|
141 |
-
"
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
"component": {
|
146 |
-
"
|
|
|
147 |
"name": model_id.split("/")[-1],
|
148 |
-
"
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
},
|
154 |
"components": [{
|
|
|
155 |
"type": "machine-learning-model",
|
156 |
-
"bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}",
|
157 |
"name": model_id.split("/")[-1],
|
158 |
-
"
|
|
|
159 |
}],
|
160 |
-
"dependencies": [
|
|
|
|
|
|
|
161 |
}
|
162 |
|
163 |
def get_enhancement_report(self):
|
@@ -183,6 +171,14 @@ class AIBOMGenerator:
|
|
183 |
model_id: str,
|
184 |
metadata: Dict[str, Any],
|
185 |
) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
aibom = {
|
187 |
"bomFormat": "CycloneDX",
|
188 |
"specVersion": "1.6",
|
@@ -192,8 +188,8 @@ class AIBOMGenerator:
|
|
192 |
"components": [self._create_component_section(model_id, metadata)],
|
193 |
"dependencies": [
|
194 |
{
|
195 |
-
"ref": f"pkg:generic/{model_id.replace('/', '%2F')}",
|
196 |
-
"dependsOn": ["pkg:
|
197 |
}
|
198 |
]
|
199 |
}
|
@@ -325,8 +321,6 @@ class AIBOMGenerator:
|
|
325 |
limitations_section = card_text.split("## Limitations")[1].split("##")[0].strip()
|
326 |
if limitations_section:
|
327 |
enhanced_metadata["limitations"] = limitations_section
|
328 |
-
# Map to industry-neutral field (silently aligned with SPDX)
|
329 |
-
enhanced_metadata["limitation"] = limitations_section
|
330 |
|
331 |
# Extract ethical considerations if present
|
332 |
if "ethical_considerations" not in enhanced_metadata:
|
@@ -335,8 +329,6 @@ class AIBOMGenerator:
|
|
335 |
section = card_text.split(heading)[1].split("##")[0].strip()
|
336 |
if section:
|
337 |
enhanced_metadata["ethical_considerations"] = section
|
338 |
-
# Map to industry-neutral field (silently aligned with SPDX)
|
339 |
-
enhanced_metadata["safetyRiskAssessment"] = section
|
340 |
break
|
341 |
|
342 |
# Extract risks if present
|
@@ -346,23 +338,29 @@ class AIBOMGenerator:
|
|
346 |
if risks_section:
|
347 |
enhanced_metadata["risks"] = risks_section
|
348 |
|
349 |
-
# Extract
|
350 |
-
if "
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
except Exception as e:
|
367 |
print(f"Error extracting unstructured metadata: {e}")
|
368 |
|
@@ -370,181 +368,243 @@ class AIBOMGenerator:
|
|
370 |
|
371 |
def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
372 |
timestamp = datetime.datetime.utcnow().isoformat() + "Z"
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
|
|
|
379 |
authors = []
|
380 |
-
if
|
381 |
-
# Use email instead of url to comply with CycloneDX schema
|
382 |
authors.append({
|
383 |
-
"name": metadata["author"]
|
384 |
-
"email": f"{metadata['author']}@huggingface.co"
|
385 |
})
|
386 |
|
|
|
387 |
component = {
|
388 |
-
"
|
389 |
-
"
|
390 |
-
"
|
|
|
|
|
|
|
391 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
|
|
393 |
properties = []
|
394 |
-
|
395 |
-
|
396 |
-
if
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
elif isinstance(value, datetime.datetime):
|
401 |
-
value = value.isoformat() + "Z"
|
402 |
-
properties.append({"name": key, "value": str(value)})
|
403 |
-
except Exception as e:
|
404 |
-
print(f"Error processing metadata property {key}: {e}")
|
405 |
|
|
|
406 |
metadata_section = {
|
407 |
"timestamp": timestamp,
|
408 |
"tools": tools,
|
409 |
"component": component
|
410 |
}
|
411 |
|
412 |
-
if authors:
|
413 |
-
metadata_section["authors"] = authors
|
414 |
if properties:
|
415 |
metadata_section["properties"] = properties
|
416 |
|
417 |
return metadata_section
|
418 |
|
419 |
def _create_component_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
# Create PURL with version information if commit is available
|
421 |
purl = f"pkg:huggingface/{model_id.replace('/', '/')}"
|
422 |
-
if
|
423 |
purl = f"{purl}@{metadata['commit']}"
|
|
|
|
|
424 |
|
425 |
component = {
|
|
|
426 |
"type": "machine-learning-model",
|
427 |
-
"
|
428 |
-
"
|
|
|
429 |
"purl": purl
|
430 |
}
|
431 |
-
|
432 |
-
# Add
|
433 |
-
if
|
434 |
-
component["description"] = metadata["description"]
|
435 |
-
|
436 |
-
# Add license if available
|
437 |
-
if metadata and "license" in metadata and metadata["license"]:
|
438 |
-
license_id = metadata["license"]
|
439 |
component["licenses"] = [{
|
440 |
"license": {
|
441 |
-
"id":
|
|
|
442 |
}
|
443 |
}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
444 |
|
445 |
-
# Add
|
446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
|
448 |
-
# Add
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
449 |
model_parameters = {}
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
"name": metadata["datasets"]
|
472 |
-
})
|
473 |
-
except Exception as e:
|
474 |
-
print(f"Error processing datasets: {e}")
|
475 |
-
|
476 |
-
if model_parameters:
|
477 |
-
model_card["modelParameters"] = model_parameters
|
478 |
-
|
479 |
-
# Add quantitative analysis if available
|
480 |
-
if metadata and "eval_results" in metadata and metadata["eval_results"]:
|
481 |
-
try:
|
482 |
-
quantitative_analysis = {
|
483 |
-
"performanceMetrics": []
|
484 |
-
}
|
485 |
-
|
486 |
-
eval_results = metadata["eval_results"]
|
487 |
-
if isinstance(eval_results, dict):
|
488 |
-
for metric, value in eval_results.items():
|
489 |
-
quantitative_analysis["performanceMetrics"].append({
|
490 |
-
"type": metric,
|
491 |
-
"value": str(value)
|
492 |
})
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
|
|
|
|
505 |
|
506 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
considerations = {}
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
considerations["technicalLimitations"] = metadata["limitations"]
|
512 |
-
|
513 |
-
# Ethical considerations
|
514 |
-
if "ethical_considerations" in metadata and metadata["ethical_considerations"]:
|
515 |
-
considerations["ethicalConsiderations"] = metadata["ethical_considerations"]
|
516 |
-
|
517 |
-
# Risks
|
518 |
-
if "risks" in metadata and metadata["risks"]:
|
519 |
-
considerations["risks"] = metadata["risks"]
|
520 |
-
|
521 |
-
# Environmental considerations (for industry-neutral scoring)
|
522 |
-
if "energyConsumption" in metadata and metadata["energyConsumption"]:
|
523 |
-
considerations["environmentalConsiderations"] = metadata["energyConsumption"]
|
524 |
-
|
525 |
if considerations:
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
component["modelCard"] = model_card
|
530 |
-
|
531 |
-
# Add external references if available
|
532 |
-
external_references = []
|
533 |
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
"
|
538 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
|
540 |
-
|
541 |
-
if metadata and "commit_url" in metadata and metadata["commit_url"]:
|
542 |
-
external_references.append({
|
543 |
-
"type": "vcs",
|
544 |
-
"url": metadata["commit_url"]
|
545 |
-
})
|
546 |
-
|
547 |
-
if external_references:
|
548 |
-
component["externalReferences"] = external_references
|
549 |
-
|
550 |
-
return component
|
|
|
78 |
# Calculate final score with industry-neutral approach if enabled
|
79 |
final_score = calculate_completeness_score(aibom, validate=True, use_best_practices=use_best_practices)
|
80 |
|
81 |
+
# Ensure metadata.properties exists
|
82 |
if "metadata" in aibom and "properties" not in aibom["metadata"]:
|
83 |
aibom["metadata"]["properties"] = []
|
84 |
|
85 |
+
# Note: Quality score information is no longer added to the AIBOM metadata
|
86 |
+
# This was removed as requested by the user
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
if output_file:
|
89 |
with open(output_file, 'w') as f:
|
|
|
114 |
"version": 1,
|
115 |
"metadata": {
|
116 |
"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
|
117 |
+
"tools": {
|
118 |
+
"components": [{
|
119 |
+
"bom-ref": "pkg:generic/@cybeats/aetheris-aibom-generator@0.1.0",
|
120 |
+
"type": "application",
|
121 |
+
"name": "aetheris-aibom-generator",
|
122 |
+
"version": "0.1.0",
|
123 |
+
"manufacturer": {
|
124 |
+
"name": "Aetheris AI"
|
125 |
+
}
|
126 |
+
}]
|
127 |
+
},
|
128 |
"component": {
|
129 |
+
"bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}@1.0",
|
130 |
+
"type": "application",
|
131 |
"name": model_id.split("/")[-1],
|
132 |
+
"description": f"AI model {model_id}",
|
133 |
+
"version": "1.0",
|
134 |
+
"purl": f"pkg:generic/{model_id.replace('/', '%2F')}@1.0",
|
135 |
+
"copyright": "NOASSERTION"
|
136 |
+
}
|
137 |
},
|
138 |
"components": [{
|
139 |
+
"bom-ref": f"pkg:huggingface/{model_id.replace('/', '/')}@1.0",
|
140 |
"type": "machine-learning-model",
|
|
|
141 |
"name": model_id.split("/")[-1],
|
142 |
+
"version": "1.0",
|
143 |
+
"purl": f"pkg:huggingface/{model_id.replace('/', '/')}@1.0"
|
144 |
}],
|
145 |
+
"dependencies": [{
|
146 |
+
"ref": f"pkg:generic/{model_id.replace('/', '%2F')}@1.0",
|
147 |
+
"dependsOn": [f"pkg:huggingface/{model_id.replace('/', '/')}@1.0"]
|
148 |
+
}]
|
149 |
}
|
150 |
|
151 |
def get_enhancement_report(self):
|
|
|
171 |
model_id: str,
|
172 |
metadata: Dict[str, Any],
|
173 |
) -> Dict[str, Any]:
|
174 |
+
# Extract owner and model name from model_id
|
175 |
+
parts = model_id.split("/")
|
176 |
+
group = parts[0] if len(parts) > 1 else ""
|
177 |
+
name = parts[1] if len(parts) > 1 else parts[0]
|
178 |
+
|
179 |
+
# Get version from metadata or use default
|
180 |
+
version = metadata.get("commit", "1.0")
|
181 |
+
|
182 |
aibom = {
|
183 |
"bomFormat": "CycloneDX",
|
184 |
"specVersion": "1.6",
|
|
|
188 |
"components": [self._create_component_section(model_id, metadata)],
|
189 |
"dependencies": [
|
190 |
{
|
191 |
+
"ref": f"pkg:generic/{model_id.replace('/', '%2F')}@{version}",
|
192 |
+
"dependsOn": [f"pkg:huggingface/{model_id.replace('/', '/')}@{version}"]
|
193 |
}
|
194 |
]
|
195 |
}
|
|
|
321 |
limitations_section = card_text.split("## Limitations")[1].split("##")[0].strip()
|
322 |
if limitations_section:
|
323 |
enhanced_metadata["limitations"] = limitations_section
|
|
|
|
|
324 |
|
325 |
# Extract ethical considerations if present
|
326 |
if "ethical_considerations" not in enhanced_metadata:
|
|
|
329 |
section = card_text.split(heading)[1].split("##")[0].strip()
|
330 |
if section:
|
331 |
enhanced_metadata["ethical_considerations"] = section
|
|
|
|
|
332 |
break
|
333 |
|
334 |
# Extract risks if present
|
|
|
338 |
if risks_section:
|
339 |
enhanced_metadata["risks"] = risks_section
|
340 |
|
341 |
+
# Extract datasets if present
|
342 |
+
if "datasets" not in enhanced_metadata:
|
343 |
+
datasets = []
|
344 |
+
if "## Dataset" in card_text or "## Datasets" in card_text:
|
345 |
+
dataset_section = ""
|
346 |
+
if "## Dataset" in card_text:
|
347 |
+
dataset_section = card_text.split("## Dataset")[1].split("##")[0].strip()
|
348 |
+
elif "## Datasets" in card_text:
|
349 |
+
dataset_section = card_text.split("## Datasets")[1].split("##")[0].strip()
|
350 |
+
|
351 |
+
if dataset_section:
|
352 |
+
# Simple parsing to extract dataset names
|
353 |
+
lines = dataset_section.split("\n")
|
354 |
+
for line in lines:
|
355 |
+
if line.strip() and not line.startswith("#"):
|
356 |
+
datasets.append({
|
357 |
+
"type": "dataset",
|
358 |
+
"name": line.strip().split()[0] if line.strip().split() else "Unknown",
|
359 |
+
"description": line.strip()
|
360 |
+
})
|
361 |
+
|
362 |
+
if datasets:
|
363 |
+
enhanced_metadata["datasets"] = datasets
|
364 |
except Exception as e:
|
365 |
print(f"Error extracting unstructured metadata: {e}")
|
366 |
|
|
|
368 |
|
369 |
def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
370 |
timestamp = datetime.datetime.utcnow().isoformat() + "Z"
|
371 |
+
|
372 |
+
# Get version from metadata or use default
|
373 |
+
version = metadata.get("commit", "1.0")
|
374 |
+
|
375 |
+
# Create tools section with components array
|
376 |
+
tools = {
|
377 |
+
"components": [{
|
378 |
+
"bom-ref": "pkg:generic/@cybeats/[email protected]",
|
379 |
+
"type": "application",
|
380 |
+
"name": "aetheris-aibom-generator",
|
381 |
+
"version": "0.1.0",
|
382 |
+
"manufacturer": {
|
383 |
+
"name": "Aetheris AI"
|
384 |
+
}
|
385 |
+
}]
|
386 |
+
}
|
387 |
|
388 |
+
# Create authors array
|
389 |
authors = []
|
390 |
+
if "author" in metadata and metadata["author"]:
|
|
|
391 |
authors.append({
|
392 |
+
"name": metadata["author"]
|
|
|
393 |
})
|
394 |
|
395 |
+
# Create component section for metadata
|
396 |
component = {
|
397 |
+
"bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}@{version}",
|
398 |
+
"type": "application",
|
399 |
+
"name": metadata.get("name", model_id.split("/")[-1]),
|
400 |
+
"description": metadata.get("description", f"AI model {model_id}"),
|
401 |
+
"version": version,
|
402 |
+
"purl": f"pkg:generic/{model_id.replace('/', '%2F')}@{version}"
|
403 |
}
|
404 |
+
|
405 |
+
# Add authors to component if available
|
406 |
+
if authors:
|
407 |
+
component["authors"] = authors
|
408 |
+
|
409 |
+
# Add publisher and supplier if author is available
|
410 |
+
if "author" in metadata and metadata["author"]:
|
411 |
+
component["publisher"] = metadata["author"]
|
412 |
+
component["supplier"] = {
|
413 |
+
"name": metadata["author"]
|
414 |
+
}
|
415 |
+
component["manufacturer"] = {
|
416 |
+
"name": metadata["author"]
|
417 |
+
}
|
418 |
+
|
419 |
+
# Add copyright
|
420 |
+
component["copyright"] = "NOASSERTION"
|
421 |
|
422 |
+
# Create properties array for additional metadata
|
423 |
properties = []
|
424 |
+
for key, value in metadata.items():
|
425 |
+
if key not in ["name", "author", "license", "description", "commit"] and value is not None:
|
426 |
+
if isinstance(value, (list, dict)):
|
427 |
+
if not isinstance(value, str):
|
428 |
+
value = json.dumps(value)
|
429 |
+
properties.append({"name": key, "value": str(value)})
|
|
|
|
|
|
|
|
|
|
|
430 |
|
431 |
+
# Assemble metadata section
|
432 |
metadata_section = {
|
433 |
"timestamp": timestamp,
|
434 |
"tools": tools,
|
435 |
"component": component
|
436 |
}
|
437 |
|
|
|
|
|
438 |
if properties:
|
439 |
metadata_section["properties"] = properties
|
440 |
|
441 |
return metadata_section
|
442 |
|
443 |
def _create_component_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
444 |
+
# Extract owner and model name from model_id
|
445 |
+
parts = model_id.split("/")
|
446 |
+
group = parts[0] if len(parts) > 1 else ""
|
447 |
+
name = parts[1] if len(parts) > 1 else parts[0]
|
448 |
+
|
449 |
+
# Get version from metadata or use default
|
450 |
+
version = metadata.get("commit", "1.0")
|
451 |
+
|
452 |
# Create PURL with version information if commit is available
|
453 |
purl = f"pkg:huggingface/{model_id.replace('/', '/')}"
|
454 |
+
if "commit" in metadata:
|
455 |
purl = f"{purl}@{metadata['commit']}"
|
456 |
+
else:
|
457 |
+
purl = f"{purl}@{version}"
|
458 |
|
459 |
component = {
|
460 |
+
"bom-ref": f"pkg:huggingface/{model_id.replace('/', '/')}@{version}",
|
461 |
"type": "machine-learning-model",
|
462 |
+
"group": group,
|
463 |
+
"name": name,
|
464 |
+
"version": version,
|
465 |
"purl": purl
|
466 |
}
|
467 |
+
|
468 |
+
# Add licenses if available
|
469 |
+
if "license" in metadata:
|
|
|
|
|
|
|
|
|
|
|
470 |
component["licenses"] = [{
|
471 |
"license": {
|
472 |
+
"id": metadata["license"],
|
473 |
+
"url": self._get_license_url(metadata["license"])
|
474 |
}
|
475 |
}]
|
476 |
+
|
477 |
+
# Add description if available
|
478 |
+
if "description" in metadata:
|
479 |
+
component["description"] = metadata["description"]
|
480 |
+
|
481 |
+
# Add external references
|
482 |
+
external_refs = [{
|
483 |
+
"type": "website",
|
484 |
+
"url": f"https://huggingface.co/{model_id}"
|
485 |
+
}]
|
486 |
+
if "commit_url" in metadata:
|
487 |
+
external_refs.append({
|
488 |
+
"type": "vcs",
|
489 |
+
"url": metadata["commit_url"]
|
490 |
+
})
|
491 |
+
component["externalReferences"] = external_refs
|
492 |
+
|
493 |
+
# Add authors, publisher, supplier, manufacturer
|
494 |
+
if "author" in metadata and metadata["author"]:
|
495 |
+
component["authors"] = [{"name": metadata["author"]}]
|
496 |
+
component["publisher"] = metadata["author"]
|
497 |
+
component["supplier"] = {
|
498 |
+
"name": metadata["author"],
|
499 |
+
"url": [f"https://huggingface.co/{metadata['author']}"]
|
500 |
+
}
|
501 |
+
component["manufacturer"] = {
|
502 |
+
"name": metadata["author"],
|
503 |
+
"url": [f"https://huggingface.co/{metadata['author']}"]
|
504 |
+
}
|
505 |
|
506 |
+
# Add copyright
|
507 |
+
component["copyright"] = "NOASSERTION"
|
508 |
+
|
509 |
+
# Add model card section
|
510 |
+
component["modelCard"] = self._create_model_card_section(metadata)
|
511 |
+
|
512 |
+
return component
|
513 |
+
|
514 |
+
def _create_model_card_section(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
515 |
+
model_card_section = {}
|
516 |
+
|
517 |
+
# Add quantitative analysis section
|
518 |
+
if "eval_results" in metadata:
|
519 |
+
model_card_section["quantitativeAnalysis"] = {
|
520 |
+
"performanceMetrics": metadata["eval_results"],
|
521 |
+
"graphics": {} # Empty graphics object as in the example
|
522 |
+
}
|
523 |
+
else:
|
524 |
+
model_card_section["quantitativeAnalysis"] = {"graphics": {}}
|
525 |
|
526 |
+
# Add properties section
|
527 |
+
properties = []
|
528 |
+
for key, value in metadata.items():
|
529 |
+
if key in ["author", "library_name", "license", "downloads", "likes", "tags", "created_at", "last_modified"]:
|
530 |
+
properties.append({"name": key, "value": str(value)})
|
531 |
+
|
532 |
+
if properties:
|
533 |
+
model_card_section["properties"] = properties
|
534 |
+
|
535 |
+
# Create model parameters section
|
536 |
model_parameters = {}
|
537 |
+
|
538 |
+
# Add outputs array
|
539 |
+
model_parameters["outputs"] = [{"format": "generated-text"}]
|
540 |
+
|
541 |
+
# Add task
|
542 |
+
model_parameters["task"] = metadata.get("pipeline_tag", "text-generation")
|
543 |
+
|
544 |
+
# Add architecture information
|
545 |
+
model_parameters["architectureFamily"] = "llama" if "llama" in metadata.get("name", "").lower() else "transformer"
|
546 |
+
model_parameters["modelArchitecture"] = f"{metadata.get('name', 'Unknown')}ForCausalLM"
|
547 |
+
|
548 |
+
# Add datasets array with proper structure
|
549 |
+
if "datasets" in metadata:
|
550 |
+
datasets = []
|
551 |
+
if isinstance(metadata["datasets"], list):
|
552 |
+
for dataset in metadata["datasets"]:
|
553 |
+
if isinstance(dataset, str):
|
554 |
+
datasets.append({
|
555 |
+
"type": "dataset",
|
556 |
+
"name": dataset,
|
557 |
+
"description": f"Dataset used for training {metadata.get('name', 'the model')}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
558 |
})
|
559 |
+
elif isinstance(dataset, dict) and "name" in dataset:
|
560 |
+
# Ensure dataset has the required structure
|
561 |
+
dataset_entry = {
|
562 |
+
"type": dataset.get("type", "dataset"),
|
563 |
+
"name": dataset["name"],
|
564 |
+
"description": dataset.get("description", f"Dataset: {dataset['name']}")
|
565 |
+
}
|
566 |
+
datasets.append(dataset_entry)
|
567 |
+
elif isinstance(metadata["datasets"], str):
|
568 |
+
datasets.append({
|
569 |
+
"type": "dataset",
|
570 |
+
"name": metadata["datasets"],
|
571 |
+
"description": f"Dataset used for training {metadata.get('name', 'the model')}"
|
572 |
+
})
|
573 |
|
574 |
+
if datasets:
|
575 |
+
model_parameters["datasets"] = datasets
|
576 |
+
|
577 |
+
# Add inputs array
|
578 |
+
model_parameters["inputs"] = [{"format": "text"}]
|
579 |
+
|
580 |
+
# Add model parameters to model card section
|
581 |
+
model_card_section["modelParameters"] = model_parameters
|
582 |
+
|
583 |
+
# Add considerations section
|
584 |
considerations = {}
|
585 |
+
for k in ["limitations", "ethical_considerations", "bias", "risks"]:
|
586 |
+
if k in metadata:
|
587 |
+
considerations[k] = metadata[k]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
588 |
if considerations:
|
589 |
+
model_card_section["considerations"] = considerations
|
590 |
+
|
591 |
+
return model_card_section
|
|
|
|
|
|
|
|
|
592 |
|
593 |
+
def _get_license_url(self, license_id: str) -> str:
|
594 |
+
"""Get the URL for a license based on its SPDX ID."""
|
595 |
+
license_urls = {
|
596 |
+
"Apache-2.0": "https://www.apache.org/licenses/LICENSE-2.0",
|
597 |
+
"MIT": "https://opensource.org/licenses/MIT",
|
598 |
+
"BSD-3-Clause": "https://opensource.org/licenses/BSD-3-Clause",
|
599 |
+
"GPL-3.0": "https://www.gnu.org/licenses/gpl-3.0.en.html",
|
600 |
+
"CC-BY-4.0": "https://creativecommons.org/licenses/by/4.0/",
|
601 |
+
"CC-BY-SA-4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
|
602 |
+
"CC-BY-NC-4.0": "https://creativecommons.org/licenses/by-nc/4.0/",
|
603 |
+
"CC-BY-ND-4.0": "https://creativecommons.org/licenses/by-nd/4.0/",
|
604 |
+
"CC-BY-NC-SA-4.0": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
|
605 |
+
"CC-BY-NC-ND-4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/",
|
606 |
+
"LGPL-3.0": "https://www.gnu.org/licenses/lgpl-3.0.en.html",
|
607 |
+
"MPL-2.0": "https://www.mozilla.org/en-US/MPL/2.0/",
|
608 |
+
}
|
609 |
|
610 |
+
return license_urls.get(license_id, "https://spdx.org/licenses/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|