a1c00l commited on
Commit
854b81e
·
verified ·
1 Parent(s): 25493c7

Update src/aibom_generator/generator.py

Browse files
Files changed (1) hide show
  1. src/aibom_generator/generator.py +258 -198
src/aibom_generator/generator.py CHANGED
@@ -78,35 +78,12 @@ class AIBOMGenerator:
78
  # Calculate final score with industry-neutral approach if enabled
79
  final_score = calculate_completeness_score(aibom, validate=True, use_best_practices=use_best_practices)
80
 
81
- # Add score and enhancement info to metadata properties
82
  if "metadata" in aibom and "properties" not in aibom["metadata"]:
83
  aibom["metadata"]["properties"] = []
84
 
85
-
86
- if "metadata" in aibom and "properties" in aibom["metadata"]:
87
- # Add score information
88
- aibom["metadata"]["properties"].append({"name": "aibom:quality-score", "value": str(final_score["total_score"])})
89
- aibom["metadata"]["properties"].append({"name": "aibom:quality-breakdown", "value": json.dumps(final_score["section_scores"])})
90
- aibom["metadata"]["properties"].append({"name": "aibom:max-scores", "value": json.dumps(final_score["max_scores"])})
91
-
92
- # Add completeness profile information if available (from industry-neutral approach)
93
- if use_best_practices and "completeness_profile" in final_score:
94
- aibom["metadata"]["properties"].append({
95
- "name": "aibom:completeness-profile",
96
- "value": final_score["completeness_profile"]["name"]
97
- })
98
- aibom["metadata"]["properties"].append({
99
- "name": "aibom:completeness-description",
100
- "value": final_score["completeness_profile"]["description"]
101
- })
102
-
103
- # Add AI enhancement information
104
- if ai_enhanced:
105
- aibom["metadata"]["properties"].append({"name": "aibom:ai-enhanced", "value": "true"})
106
- aibom["metadata"]["properties"].append({"name": "aibom:ai-model", "value": ai_model_name})
107
- aibom["metadata"]["properties"].append({"name": "aibom:original-score", "value": str(original_score["total_score"])})
108
- aibom["metadata"]["properties"].append({"name": "aibom:score-improvement",
109
- "value": str(round(final_score["total_score"] - original_score["total_score"], 2))})
110
 
111
  if output_file:
112
  with open(output_file, 'w') as f:
@@ -137,27 +114,38 @@ class AIBOMGenerator:
137
  "version": 1,
138
  "metadata": {
139
  "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
140
- "tools": [{
141
- "vendor": "Aetheris AI",
142
- "name": "aetheris-aibom-generator",
143
- "version": "0.1.0"
144
- }],
 
 
 
 
 
 
145
  "component": {
146
- "type": "machine-learning-model",
 
147
  "name": model_id.split("/")[-1],
148
- "bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}"
149
- },
150
- "properties": [
151
- {"name": "aibom:error", "value": "Error generating complete AIBOM"}
152
- ]
153
  },
154
  "components": [{
 
155
  "type": "machine-learning-model",
156
- "bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}",
157
  "name": model_id.split("/")[-1],
158
- "purl": f"pkg:huggingface/{model_id.replace('/', '/')}"
 
159
  }],
160
- "dependencies": []
 
 
 
161
  }
162
 
163
  def get_enhancement_report(self):
@@ -183,6 +171,14 @@ class AIBOMGenerator:
183
  model_id: str,
184
  metadata: Dict[str, Any],
185
  ) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
186
  aibom = {
187
  "bomFormat": "CycloneDX",
188
  "specVersion": "1.6",
@@ -192,8 +188,8 @@ class AIBOMGenerator:
192
  "components": [self._create_component_section(model_id, metadata)],
193
  "dependencies": [
194
  {
195
- "ref": f"pkg:generic/{model_id.replace('/', '%2F')}",
196
- "dependsOn": ["pkg:pypi/torch@1.13.0"]
197
  }
198
  ]
199
  }
@@ -325,8 +321,6 @@ class AIBOMGenerator:
325
  limitations_section = card_text.split("## Limitations")[1].split("##")[0].strip()
326
  if limitations_section:
327
  enhanced_metadata["limitations"] = limitations_section
328
- # Map to industry-neutral field (silently aligned with SPDX)
329
- enhanced_metadata["limitation"] = limitations_section
330
 
331
  # Extract ethical considerations if present
332
  if "ethical_considerations" not in enhanced_metadata:
@@ -335,8 +329,6 @@ class AIBOMGenerator:
335
  section = card_text.split(heading)[1].split("##")[0].strip()
336
  if section:
337
  enhanced_metadata["ethical_considerations"] = section
338
- # Map to industry-neutral field (silently aligned with SPDX)
339
- enhanced_metadata["safetyRiskAssessment"] = section
340
  break
341
 
342
  # Extract risks if present
@@ -346,23 +338,29 @@ class AIBOMGenerator:
346
  if risks_section:
347
  enhanced_metadata["risks"] = risks_section
348
 
349
- # Extract energy consumption if present (for industry-neutral scoring)
350
- if "energy" not in enhanced_metadata:
351
- for heading in ["## Energy", "## Energy Consumption", "## Environmental Impact"]:
352
- if heading in card_text:
353
- section = card_text.split(heading)[1].split("##")[0].strip()
354
- if section:
355
- enhanced_metadata["energyConsumption"] = section
356
- break
357
-
358
- # Extract hyperparameters if present (for industry-neutral scoring)
359
- if "hyperparameters" not in enhanced_metadata:
360
- for heading in ["## Hyperparameters", "## Training Hyperparameters", "## Model Hyperparameters"]:
361
- if heading in card_text:
362
- section = card_text.split(heading)[1].split("##")[0].strip()
363
- if section:
364
- enhanced_metadata["hyperparameter"] = section
365
- break
 
 
 
 
 
 
366
  except Exception as e:
367
  print(f"Error extracting unstructured metadata: {e}")
368
 
@@ -370,181 +368,243 @@ class AIBOMGenerator:
370
 
371
  def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
372
  timestamp = datetime.datetime.utcnow().isoformat() + "Z"
373
- tools = [{
374
- "vendor": "Aetheris AI",
375
- "name": "aetheris-aibom-generator",
376
- "version": "0.1.0"
377
- }]
 
 
 
 
 
 
 
 
 
 
 
378
 
 
379
  authors = []
380
- if metadata and "author" in metadata and metadata["author"]:
381
- # Use email instead of url to comply with CycloneDX schema
382
  authors.append({
383
- "name": metadata["author"],
384
- "email": f"{metadata['author']}@huggingface.co"
385
  })
386
 
 
387
  component = {
388
- "type": "machine-learning-model",
389
- "name": metadata.get("name", model_id.split("/")[-1]) if metadata else model_id.split("/")[-1],
390
- "bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}"
 
 
 
391
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
 
393
  properties = []
394
- if metadata:
395
- for key, value in metadata.items():
396
- if key not in ["name", "author", "license"] and value is not None:
397
- try:
398
- if isinstance(value, (list, dict)):
399
- value = json.dumps(value)
400
- elif isinstance(value, datetime.datetime):
401
- value = value.isoformat() + "Z"
402
- properties.append({"name": key, "value": str(value)})
403
- except Exception as e:
404
- print(f"Error processing metadata property {key}: {e}")
405
 
 
406
  metadata_section = {
407
  "timestamp": timestamp,
408
  "tools": tools,
409
  "component": component
410
  }
411
 
412
- if authors:
413
- metadata_section["authors"] = authors
414
  if properties:
415
  metadata_section["properties"] = properties
416
 
417
  return metadata_section
418
 
419
  def _create_component_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
420
  # Create PURL with version information if commit is available
421
  purl = f"pkg:huggingface/{model_id.replace('/', '/')}"
422
- if metadata and "commit" in metadata:
423
  purl = f"{purl}@{metadata['commit']}"
 
 
424
 
425
  component = {
 
426
  "type": "machine-learning-model",
427
- "name": metadata.get("name", model_id.split("/")[-1]) if metadata else model_id.split("/")[-1],
428
- "bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}",
 
429
  "purl": purl
430
  }
431
-
432
- # Add description if available
433
- if metadata and "description" in metadata and metadata["description"]:
434
- component["description"] = metadata["description"]
435
-
436
- # Add license if available
437
- if metadata and "license" in metadata and metadata["license"]:
438
- license_id = metadata["license"]
439
  component["licenses"] = [{
440
  "license": {
441
- "id": license_id
 
442
  }
443
  }]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
 
445
- # Add model card if available
446
- model_card = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
- # Add model parameters
 
 
 
 
 
 
 
 
 
449
  model_parameters = {}
450
- if metadata:
451
- for key in ["ai:type", "ai:task", "ai:framework", "base_model", "library_name"]:
452
- if key in metadata and metadata[key]:
453
- if "properties" not in model_parameters:
454
- model_parameters["properties"] = []
455
- model_parameters["properties"].append({
456
- "name": key,
457
- "value": metadata[key]
458
- })
459
-
460
- # Add datasets if available
461
- if "datasets" in metadata and metadata["datasets"]:
462
- model_parameters["datasets"] = []
463
- try:
464
- if isinstance(metadata["datasets"], list):
465
- for dataset in metadata["datasets"]:
466
- model_parameters["datasets"].append({
467
- "name": dataset
468
- })
469
- elif isinstance(metadata["datasets"], str):
470
- model_parameters["datasets"].append({
471
- "name": metadata["datasets"]
472
- })
473
- except Exception as e:
474
- print(f"Error processing datasets: {e}")
475
-
476
- if model_parameters:
477
- model_card["modelParameters"] = model_parameters
478
-
479
- # Add quantitative analysis if available
480
- if metadata and "eval_results" in metadata and metadata["eval_results"]:
481
- try:
482
- quantitative_analysis = {
483
- "performanceMetrics": []
484
- }
485
-
486
- eval_results = metadata["eval_results"]
487
- if isinstance(eval_results, dict):
488
- for metric, value in eval_results.items():
489
- quantitative_analysis["performanceMetrics"].append({
490
- "type": metric,
491
- "value": str(value)
492
  })
493
- elif isinstance(eval_results, list):
494
- for result in eval_results:
495
- if isinstance(result, dict) and "metric" in result and "value" in result:
496
- quantitative_analysis["performanceMetrics"].append({
497
- "type": result["metric"],
498
- "value": str(result["value"])
499
- })
500
-
501
- if quantitative_analysis["performanceMetrics"]:
502
- model_card["quantitativeAnalysis"] = quantitative_analysis
503
- except Exception as e:
504
- print(f"Error processing evaluation results: {e}")
 
 
505
 
506
- # Add considerations if available
 
 
 
 
 
 
 
 
 
507
  considerations = {}
508
- if metadata:
509
- # Technical limitations
510
- if "limitations" in metadata and metadata["limitations"]:
511
- considerations["technicalLimitations"] = metadata["limitations"]
512
-
513
- # Ethical considerations
514
- if "ethical_considerations" in metadata and metadata["ethical_considerations"]:
515
- considerations["ethicalConsiderations"] = metadata["ethical_considerations"]
516
-
517
- # Risks
518
- if "risks" in metadata and metadata["risks"]:
519
- considerations["risks"] = metadata["risks"]
520
-
521
- # Environmental considerations (for industry-neutral scoring)
522
- if "energyConsumption" in metadata and metadata["energyConsumption"]:
523
- considerations["environmentalConsiderations"] = metadata["energyConsumption"]
524
-
525
  if considerations:
526
- model_card["considerations"] = considerations
527
-
528
- if model_card:
529
- component["modelCard"] = model_card
530
-
531
- # Add external references if available
532
- external_references = []
533
 
534
- # Add model card URL
535
- external_references.append({
536
- "type": "documentation",
537
- "url": f"https://huggingface.co/{model_id}"
538
- })
 
 
 
 
 
 
 
 
 
 
 
539
 
540
- # Add commit URL if available
541
- if metadata and "commit_url" in metadata and metadata["commit_url"]:
542
- external_references.append({
543
- "type": "vcs",
544
- "url": metadata["commit_url"]
545
- })
546
-
547
- if external_references:
548
- component["externalReferences"] = external_references
549
-
550
- return component
 
78
  # Calculate final score with industry-neutral approach if enabled
79
  final_score = calculate_completeness_score(aibom, validate=True, use_best_practices=use_best_practices)
80
 
81
+ # Ensure metadata.properties exists
82
  if "metadata" in aibom and "properties" not in aibom["metadata"]:
83
  aibom["metadata"]["properties"] = []
84
 
85
+ # Note: Quality score information is no longer added to the AIBOM metadata
86
+ # This was removed as requested by the user
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  if output_file:
89
  with open(output_file, 'w') as f:
 
114
  "version": 1,
115
  "metadata": {
116
  "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
117
+ "tools": {
118
+ "components": [{
119
+ "bom-ref": "pkg:generic/@cybeats/aetheris-aibom-generator@0.1.0",
120
+ "type": "application",
121
+ "name": "aetheris-aibom-generator",
122
+ "version": "0.1.0",
123
+ "manufacturer": {
124
+ "name": "Aetheris AI"
125
+ }
126
+ }]
127
+ },
128
  "component": {
129
+ "bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}@1.0",
130
+ "type": "application",
131
  "name": model_id.split("/")[-1],
132
+ "description": f"AI model {model_id}",
133
+ "version": "1.0",
134
+ "purl": f"pkg:generic/{model_id.replace('/', '%2F')}@1.0",
135
+ "copyright": "NOASSERTION"
136
+ }
137
  },
138
  "components": [{
139
+ "bom-ref": f"pkg:huggingface/{model_id.replace('/', '/')}@1.0",
140
  "type": "machine-learning-model",
 
141
  "name": model_id.split("/")[-1],
142
+ "version": "1.0",
143
+ "purl": f"pkg:huggingface/{model_id.replace('/', '/')}@1.0"
144
  }],
145
+ "dependencies": [{
146
+ "ref": f"pkg:generic/{model_id.replace('/', '%2F')}@1.0",
147
+ "dependsOn": [f"pkg:huggingface/{model_id.replace('/', '/')}@1.0"]
148
+ }]
149
  }
150
 
151
  def get_enhancement_report(self):
 
171
  model_id: str,
172
  metadata: Dict[str, Any],
173
  ) -> Dict[str, Any]:
174
+ # Extract owner and model name from model_id
175
+ parts = model_id.split("/")
176
+ group = parts[0] if len(parts) > 1 else ""
177
+ name = parts[1] if len(parts) > 1 else parts[0]
178
+
179
+ # Get version from metadata or use default
180
+ version = metadata.get("commit", "1.0")
181
+
182
  aibom = {
183
  "bomFormat": "CycloneDX",
184
  "specVersion": "1.6",
 
188
  "components": [self._create_component_section(model_id, metadata)],
189
  "dependencies": [
190
  {
191
+ "ref": f"pkg:generic/{model_id.replace('/', '%2F')}@{version}",
192
+ "dependsOn": [f"pkg:huggingface/{model_id.replace('/', '/')}@{version}"]
193
  }
194
  ]
195
  }
 
321
  limitations_section = card_text.split("## Limitations")[1].split("##")[0].strip()
322
  if limitations_section:
323
  enhanced_metadata["limitations"] = limitations_section
 
 
324
 
325
  # Extract ethical considerations if present
326
  if "ethical_considerations" not in enhanced_metadata:
 
329
  section = card_text.split(heading)[1].split("##")[0].strip()
330
  if section:
331
  enhanced_metadata["ethical_considerations"] = section
 
 
332
  break
333
 
334
  # Extract risks if present
 
338
  if risks_section:
339
  enhanced_metadata["risks"] = risks_section
340
 
341
+ # Extract datasets if present
342
+ if "datasets" not in enhanced_metadata:
343
+ datasets = []
344
+ if "## Dataset" in card_text or "## Datasets" in card_text:
345
+ dataset_section = ""
346
+ if "## Dataset" in card_text:
347
+ dataset_section = card_text.split("## Dataset")[1].split("##")[0].strip()
348
+ elif "## Datasets" in card_text:
349
+ dataset_section = card_text.split("## Datasets")[1].split("##")[0].strip()
350
+
351
+ if dataset_section:
352
+ # Simple parsing to extract dataset names
353
+ lines = dataset_section.split("\n")
354
+ for line in lines:
355
+ if line.strip() and not line.startswith("#"):
356
+ datasets.append({
357
+ "type": "dataset",
358
+ "name": line.strip().split()[0] if line.strip().split() else "Unknown",
359
+ "description": line.strip()
360
+ })
361
+
362
+ if datasets:
363
+ enhanced_metadata["datasets"] = datasets
364
  except Exception as e:
365
  print(f"Error extracting unstructured metadata: {e}")
366
 
 
368
 
369
  def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
370
  timestamp = datetime.datetime.utcnow().isoformat() + "Z"
371
+
372
+ # Get version from metadata or use default
373
+ version = metadata.get("commit", "1.0")
374
+
375
+ # Create tools section with components array
376
+ tools = {
377
+ "components": [{
378
+ "bom-ref": "pkg:generic/@cybeats/[email protected]",
379
+ "type": "application",
380
+ "name": "aetheris-aibom-generator",
381
+ "version": "0.1.0",
382
+ "manufacturer": {
383
+ "name": "Aetheris AI"
384
+ }
385
+ }]
386
+ }
387
 
388
+ # Create authors array
389
  authors = []
390
+ if "author" in metadata and metadata["author"]:
 
391
  authors.append({
392
+ "name": metadata["author"]
 
393
  })
394
 
395
+ # Create component section for metadata
396
  component = {
397
+ "bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}@{version}",
398
+ "type": "application",
399
+ "name": metadata.get("name", model_id.split("/")[-1]),
400
+ "description": metadata.get("description", f"AI model {model_id}"),
401
+ "version": version,
402
+ "purl": f"pkg:generic/{model_id.replace('/', '%2F')}@{version}"
403
  }
404
+
405
+ # Add authors to component if available
406
+ if authors:
407
+ component["authors"] = authors
408
+
409
+ # Add publisher and supplier if author is available
410
+ if "author" in metadata and metadata["author"]:
411
+ component["publisher"] = metadata["author"]
412
+ component["supplier"] = {
413
+ "name": metadata["author"]
414
+ }
415
+ component["manufacturer"] = {
416
+ "name": metadata["author"]
417
+ }
418
+
419
+ # Add copyright
420
+ component["copyright"] = "NOASSERTION"
421
 
422
+ # Create properties array for additional metadata
423
  properties = []
424
+ for key, value in metadata.items():
425
+ if key not in ["name", "author", "license", "description", "commit"] and value is not None:
426
+ if isinstance(value, (list, dict)):
427
+ if not isinstance(value, str):
428
+ value = json.dumps(value)
429
+ properties.append({"name": key, "value": str(value)})
 
 
 
 
 
430
 
431
+ # Assemble metadata section
432
  metadata_section = {
433
  "timestamp": timestamp,
434
  "tools": tools,
435
  "component": component
436
  }
437
 
 
 
438
  if properties:
439
  metadata_section["properties"] = properties
440
 
441
  return metadata_section
442
 
443
  def _create_component_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
444
+ # Extract owner and model name from model_id
445
+ parts = model_id.split("/")
446
+ group = parts[0] if len(parts) > 1 else ""
447
+ name = parts[1] if len(parts) > 1 else parts[0]
448
+
449
+ # Get version from metadata or use default
450
+ version = metadata.get("commit", "1.0")
451
+
452
  # Create PURL with version information if commit is available
453
  purl = f"pkg:huggingface/{model_id.replace('/', '/')}"
454
+ if "commit" in metadata:
455
  purl = f"{purl}@{metadata['commit']}"
456
+ else:
457
+ purl = f"{purl}@{version}"
458
 
459
  component = {
460
+ "bom-ref": f"pkg:huggingface/{model_id.replace('/', '/')}@{version}",
461
  "type": "machine-learning-model",
462
+ "group": group,
463
+ "name": name,
464
+ "version": version,
465
  "purl": purl
466
  }
467
+
468
+ # Add licenses if available
469
+ if "license" in metadata:
 
 
 
 
 
470
  component["licenses"] = [{
471
  "license": {
472
+ "id": metadata["license"],
473
+ "url": self._get_license_url(metadata["license"])
474
  }
475
  }]
476
+
477
+ # Add description if available
478
+ if "description" in metadata:
479
+ component["description"] = metadata["description"]
480
+
481
+ # Add external references
482
+ external_refs = [{
483
+ "type": "website",
484
+ "url": f"https://huggingface.co/{model_id}"
485
+ }]
486
+ if "commit_url" in metadata:
487
+ external_refs.append({
488
+ "type": "vcs",
489
+ "url": metadata["commit_url"]
490
+ })
491
+ component["externalReferences"] = external_refs
492
+
493
+ # Add authors, publisher, supplier, manufacturer
494
+ if "author" in metadata and metadata["author"]:
495
+ component["authors"] = [{"name": metadata["author"]}]
496
+ component["publisher"] = metadata["author"]
497
+ component["supplier"] = {
498
+ "name": metadata["author"],
499
+ "url": [f"https://huggingface.co/{metadata['author']}"]
500
+ }
501
+ component["manufacturer"] = {
502
+ "name": metadata["author"],
503
+ "url": [f"https://huggingface.co/{metadata['author']}"]
504
+ }
505
 
506
+ # Add copyright
507
+ component["copyright"] = "NOASSERTION"
508
+
509
+ # Add model card section
510
+ component["modelCard"] = self._create_model_card_section(metadata)
511
+
512
+ return component
513
+
514
+ def _create_model_card_section(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
515
+ model_card_section = {}
516
+
517
+ # Add quantitative analysis section
518
+ if "eval_results" in metadata:
519
+ model_card_section["quantitativeAnalysis"] = {
520
+ "performanceMetrics": metadata["eval_results"],
521
+ "graphics": {} # Empty graphics object as in the example
522
+ }
523
+ else:
524
+ model_card_section["quantitativeAnalysis"] = {"graphics": {}}
525
 
526
+ # Add properties section
527
+ properties = []
528
+ for key, value in metadata.items():
529
+ if key in ["author", "library_name", "license", "downloads", "likes", "tags", "created_at", "last_modified"]:
530
+ properties.append({"name": key, "value": str(value)})
531
+
532
+ if properties:
533
+ model_card_section["properties"] = properties
534
+
535
+ # Create model parameters section
536
  model_parameters = {}
537
+
538
+ # Add outputs array
539
+ model_parameters["outputs"] = [{"format": "generated-text"}]
540
+
541
+ # Add task
542
+ model_parameters["task"] = metadata.get("pipeline_tag", "text-generation")
543
+
544
+ # Add architecture information
545
+ model_parameters["architectureFamily"] = "llama" if "llama" in metadata.get("name", "").lower() else "transformer"
546
+ model_parameters["modelArchitecture"] = f"{metadata.get('name', 'Unknown')}ForCausalLM"
547
+
548
+ # Add datasets array with proper structure
549
+ if "datasets" in metadata:
550
+ datasets = []
551
+ if isinstance(metadata["datasets"], list):
552
+ for dataset in metadata["datasets"]:
553
+ if isinstance(dataset, str):
554
+ datasets.append({
555
+ "type": "dataset",
556
+ "name": dataset,
557
+ "description": f"Dataset used for training {metadata.get('name', 'the model')}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  })
559
+ elif isinstance(dataset, dict) and "name" in dataset:
560
+ # Ensure dataset has the required structure
561
+ dataset_entry = {
562
+ "type": dataset.get("type", "dataset"),
563
+ "name": dataset["name"],
564
+ "description": dataset.get("description", f"Dataset: {dataset['name']}")
565
+ }
566
+ datasets.append(dataset_entry)
567
+ elif isinstance(metadata["datasets"], str):
568
+ datasets.append({
569
+ "type": "dataset",
570
+ "name": metadata["datasets"],
571
+ "description": f"Dataset used for training {metadata.get('name', 'the model')}"
572
+ })
573
 
574
+ if datasets:
575
+ model_parameters["datasets"] = datasets
576
+
577
+ # Add inputs array
578
+ model_parameters["inputs"] = [{"format": "text"}]
579
+
580
+ # Add model parameters to model card section
581
+ model_card_section["modelParameters"] = model_parameters
582
+
583
+ # Add considerations section
584
  considerations = {}
585
+ for k in ["limitations", "ethical_considerations", "bias", "risks"]:
586
+ if k in metadata:
587
+ considerations[k] = metadata[k]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  if considerations:
589
+ model_card_section["considerations"] = considerations
590
+
591
+ return model_card_section
 
 
 
 
592
 
593
+ def _get_license_url(self, license_id: str) -> str:
594
+ """Get the URL for a license based on its SPDX ID."""
595
+ license_urls = {
596
+ "Apache-2.0": "https://www.apache.org/licenses/LICENSE-2.0",
597
+ "MIT": "https://opensource.org/licenses/MIT",
598
+ "BSD-3-Clause": "https://opensource.org/licenses/BSD-3-Clause",
599
+ "GPL-3.0": "https://www.gnu.org/licenses/gpl-3.0.en.html",
600
+ "CC-BY-4.0": "https://creativecommons.org/licenses/by/4.0/",
601
+ "CC-BY-SA-4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
602
+ "CC-BY-NC-4.0": "https://creativecommons.org/licenses/by-nc/4.0/",
603
+ "CC-BY-ND-4.0": "https://creativecommons.org/licenses/by-nd/4.0/",
604
+ "CC-BY-NC-SA-4.0": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
605
+ "CC-BY-NC-ND-4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/",
606
+ "LGPL-3.0": "https://www.gnu.org/licenses/lgpl-3.0.en.html",
607
+ "MPL-2.0": "https://www.mozilla.org/en-US/MPL/2.0/",
608
+ }
609
 
610
+ return license_urls.get(license_id, "https://spdx.org/licenses/")