order button (#118)
Browse files- order button (d70bcf5dd0982c9071c03fd221222772a5f475ff)
Co-authored-by: Leandro von Werra <[email protected]>
- convert_to_md.py +110 -0
- dist/index.html +7 -1
- dist/style.css +46 -0
- src/index.html +7 -1
- src/style.css +46 -0
- ultra_blog.md +0 -0
convert_to_md.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
HTML to Markdown Converter
|
4 |
+
|
5 |
+
This script converts HTML files to Markdown format.
|
6 |
+
Usage: python html_to_md.py input.html [output.md]
|
7 |
+
If no output file is specified, it will use the input filename with .md extension.
|
8 |
+
"""
|
9 |
+
|
10 |
+
import sys
|
11 |
+
import os
|
12 |
+
import argparse
|
13 |
+
import html2text
|
14 |
+
import requests
|
15 |
+
from urllib.parse import urlparse
|
16 |
+
|
17 |
+
def is_url(path):
|
18 |
+
"""Check if the given path is a URL."""
|
19 |
+
parsed = urlparse(path)
|
20 |
+
return parsed.scheme != '' and parsed.netloc != ''
|
21 |
+
|
22 |
+
def convert_html_to_markdown(html_content, **options):
|
23 |
+
"""Convert HTML content to Markdown."""
|
24 |
+
converter = html2text.HTML2Text()
|
25 |
+
|
26 |
+
# Configure converter options
|
27 |
+
converter.ignore_links = options.get('ignore_links', False)
|
28 |
+
converter.ignore_images = options.get('ignore_images', False)
|
29 |
+
converter.ignore_tables = options.get('ignore_tables', False)
|
30 |
+
converter.body_width = options.get('body_width', 0) # 0 means no wrapping
|
31 |
+
converter.unicode_snob = options.get('unicode_snob', True) # Use Unicode instead of ASCII
|
32 |
+
converter.wrap_links = options.get('wrap_links', False)
|
33 |
+
converter.inline_links = options.get('inline_links', True)
|
34 |
+
|
35 |
+
# Convert HTML to Markdown
|
36 |
+
return converter.handle(html_content)
|
37 |
+
|
38 |
+
def main():
|
39 |
+
parser = argparse.ArgumentParser(description='Convert HTML to Markdown')
|
40 |
+
parser.add_argument('input', help='Input HTML file or URL')
|
41 |
+
parser.add_argument('output', nargs='?', help='Output Markdown file (optional)')
|
42 |
+
parser.add_argument('--ignore-links', action='store_true', help='Ignore links in the HTML')
|
43 |
+
parser.add_argument('--ignore-images', action='store_true', help='Ignore images in the HTML')
|
44 |
+
parser.add_argument('--ignore-tables', action='store_true', help='Ignore tables in the HTML')
|
45 |
+
parser.add_argument('--body-width', type=int, default=0, help='Wrap text at this width (0 for no wrapping)')
|
46 |
+
parser.add_argument('--unicode', action='store_true', help='Use Unicode characters instead of ASCII approximations')
|
47 |
+
parser.add_argument('--wrap-links', action='store_true', help='Wrap links in angle brackets')
|
48 |
+
parser.add_argument('--reference-links', action='store_true', help='Use reference style links instead of inline links')
|
49 |
+
|
50 |
+
args = parser.parse_args()
|
51 |
+
|
52 |
+
# Determine input
|
53 |
+
if is_url(args.input):
|
54 |
+
try:
|
55 |
+
response = requests.get(args.input)
|
56 |
+
response.raise_for_status()
|
57 |
+
html_content = response.text
|
58 |
+
except requests.exceptions.RequestException as e:
|
59 |
+
print(f"Error fetching URL: {e}", file=sys.stderr)
|
60 |
+
return 1
|
61 |
+
else:
|
62 |
+
try:
|
63 |
+
with open(args.input, 'r', encoding='utf-8') as f:
|
64 |
+
html_content = f.read()
|
65 |
+
except IOError as e:
|
66 |
+
print(f"Error reading file: {e}", file=sys.stderr)
|
67 |
+
return 1
|
68 |
+
|
69 |
+
# Configure conversion options
|
70 |
+
options = {
|
71 |
+
'ignore_links': args.ignore_links,
|
72 |
+
'ignore_images': args.ignore_images,
|
73 |
+
'ignore_tables': args.ignore_tables,
|
74 |
+
'body_width': args.body_width,
|
75 |
+
'unicode_snob': args.unicode,
|
76 |
+
'wrap_links': args.wrap_links,
|
77 |
+
'inline_links': not args.reference_links,
|
78 |
+
}
|
79 |
+
|
80 |
+
# Convert HTML to Markdown
|
81 |
+
markdown_content = convert_html_to_markdown(html_content, **options)
|
82 |
+
|
83 |
+
# Determine output
|
84 |
+
if args.output:
|
85 |
+
output_file = args.output
|
86 |
+
else:
|
87 |
+
if is_url(args.input):
|
88 |
+
# Generate a filename from the URL
|
89 |
+
url_parts = urlparse(args.input)
|
90 |
+
base_name = os.path.basename(url_parts.path) or 'index'
|
91 |
+
if not base_name.endswith('.html'):
|
92 |
+
base_name += '.html'
|
93 |
+
output_file = os.path.splitext(base_name)[0] + '.md'
|
94 |
+
else:
|
95 |
+
# Generate a filename from the input file
|
96 |
+
output_file = os.path.splitext(args.input)[0] + '.md'
|
97 |
+
|
98 |
+
# Write output
|
99 |
+
try:
|
100 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
101 |
+
f.write(markdown_content)
|
102 |
+
print(f"Conversion successful! Output saved to: {output_file}")
|
103 |
+
except IOError as e:
|
104 |
+
print(f"Error writing file: {e}", file=sys.stderr)
|
105 |
+
return 1
|
106 |
+
|
107 |
+
return 0
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
sys.exit(main())
|
dist/index.html
CHANGED
@@ -76,10 +76,16 @@
|
|
76 |
Thousands of GPUs humming in perfect harmony. That's what it takes to train today's most powerful AI models β a symphony of computing power that until recently was the exclusive domain of elite research labs. Open source has transformed this landscape, but not completely. Yes, you can download the latest <a href="https://huggingface.co/meta-llama">Llama</a> or <a href="https://huggingface.co/deepseek-ai">DeepSeek</a> models. Yes, you can read their <a href="https://ai.meta.com/research/publications/the-llama-3-herd-of-models/">technical</a> and <a href="https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf">experiment</a> reports. But the most challenging part β the training code, the knowledge and techniques necessary to coordinate GPUs to train these massive systems β remains shrouded in complexity and spread around in a series of disconnected papers and often private codebases.
|
77 |
</p>
|
78 |
<aside>Reading time: 2-4 days. <br>For the best reading experience, we recommend not using a mobile phone.</aside>
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
80 |
This open source book is here to change that. Starting from the basics, we'll walk you through the knowledge necessary to scale the training of large language models (LLMs) from one GPU to tens, hundreds, and even thousands of GPUs, illustrating theory with practical code examples and reproducible benchmarks.
|
81 |
</p>
|
82 |
|
|
|
83 |
<p>As the size of the clusters used to train these models has grown, various techniques, such as data parallelism, tensor parallelism, pipeline parallelism, and context parallelism as well as ZeRO and kernel fusion, have been invented to make sure that GPUs are highly utilized at all times. This significantly reduces training time and makes the most efficient use of this expensive hardware. These distributed training techniques are not only important for building initial models but have also become essential for fine-tuning large models on specialized data, which often produces the best results. In this book, we'll progressively go over all of these techniques β from the simplest to the most refined ones β while maintaining a single story line to help you understand where each method comes from.</p>
|
84 |
|
85 |
<aside>If you have questions or remarks, open a discussion on the <a href="https://huggingface.co/spaces/nanotron/ultrascale-playbook/discussions?status=open&type=discussion">Community tab</a>!</aside>
|
|
|
76 |
Thousands of GPUs humming in perfect harmony. That's what it takes to train today's most powerful AI models β a symphony of computing power that until recently was the exclusive domain of elite research labs. Open source has transformed this landscape, but not completely. Yes, you can download the latest <a href="https://huggingface.co/meta-llama">Llama</a> or <a href="https://huggingface.co/deepseek-ai">DeepSeek</a> models. Yes, you can read their <a href="https://ai.meta.com/research/publications/the-llama-3-herd-of-models/">technical</a> and <a href="https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf">experiment</a> reports. But the most challenging part β the training code, the knowledge and techniques necessary to coordinate GPUs to train these massive systems β remains shrouded in complexity and spread around in a series of disconnected papers and often private codebases.
|
77 |
</p>
|
78 |
<aside>Reading time: 2-4 days. <br>For the best reading experience, we recommend not using a mobile phone.</aside>
|
79 |
+
<div class="order-button-container">
|
80 |
+
<button class="order-button" onclick="window.open('https://www.lulu.com/shop/nouamane-tazi-and-ferdinand-mom-and-haojun-zhao-and-phuc-nguyen/the-ultra-scale-playbook/paperback/product-45yk9dj.html?page=1&pageSize=4', '_blank')">
|
81 |
+
Order Book Here
|
82 |
+
</button>
|
83 |
+
</div>
|
84 |
+
<p>
|
85 |
This open source book is here to change that. Starting from the basics, we'll walk you through the knowledge necessary to scale the training of large language models (LLMs) from one GPU to tens, hundreds, and even thousands of GPUs, illustrating theory with practical code examples and reproducible benchmarks.
|
86 |
</p>
|
87 |
|
88 |
+
|
89 |
<p>As the size of the clusters used to train these models has grown, various techniques, such as data parallelism, tensor parallelism, pipeline parallelism, and context parallelism as well as ZeRO and kernel fusion, have been invented to make sure that GPUs are highly utilized at all times. This significantly reduces training time and makes the most efficient use of this expensive hardware. These distributed training techniques are not only important for building initial models but have also become essential for fine-tuning large models on specialized data, which often produces the best results. In this book, we'll progressively go over all of these techniques β from the simplest to the most refined ones β while maintaining a single story line to help you understand where each method comes from.</p>
|
90 |
|
91 |
<aside>If you have questions or remarks, open a discussion on the <a href="https://huggingface.co/spaces/nanotron/ultrascale-playbook/discussions?status=open&type=discussion">Community tab</a>!</aside>
|
dist/style.css
CHANGED
@@ -597,3 +597,49 @@ select[name="presets"] {
|
|
597 |
border-radius: 8px;
|
598 |
}
|
599 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
597 |
border-radius: 8px;
|
598 |
}
|
599 |
|
600 |
+
.order-button {
|
601 |
+
background: linear-gradient(135deg, #6DB4C4, #D4A5B8);
|
602 |
+
color: white;
|
603 |
+
font-size: 18px;
|
604 |
+
font-weight: 600;
|
605 |
+
padding: 16px 32px;
|
606 |
+
border: none;
|
607 |
+
border-radius: 12px;
|
608 |
+
cursor: pointer;
|
609 |
+
text-transform: uppercase;
|
610 |
+
letter-spacing: 1px;
|
611 |
+
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
|
612 |
+
transition: all 0.3s ease;
|
613 |
+
position: relative;
|
614 |
+
overflow: hidden;
|
615 |
+
}
|
616 |
+
|
617 |
+
.order-button:hover {
|
618 |
+
transform: translateY(-2px);
|
619 |
+
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.25);
|
620 |
+
}
|
621 |
+
|
622 |
+
.order-button:active {
|
623 |
+
transform: translateY(0);
|
624 |
+
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.2);
|
625 |
+
}
|
626 |
+
|
627 |
+
.order-button::before {
|
628 |
+
content: '';
|
629 |
+
position: absolute;
|
630 |
+
top: 0;
|
631 |
+
left: -100%;
|
632 |
+
width: 100%;
|
633 |
+
height: 100%;
|
634 |
+
background: linear-gradient(135deg, rgba(255, 255, 255, 0.2), rgba(255, 255, 255, 0));
|
635 |
+
transition: left 0.5s ease;
|
636 |
+
}
|
637 |
+
|
638 |
+
.order-button:hover::before {
|
639 |
+
left: 100%;
|
640 |
+
}
|
641 |
+
.order-button-container {
|
642 |
+
display: flex;
|
643 |
+
justify-content: center;
|
644 |
+
margin: 40px 0;
|
645 |
+
}
|
src/index.html
CHANGED
@@ -76,10 +76,16 @@
|
|
76 |
Thousands of GPUs humming in perfect harmony. That's what it takes to train today's most powerful AI models β a symphony of computing power that until recently was the exclusive domain of elite research labs. Open source has transformed this landscape, but not completely. Yes, you can download the latest <a href="https://huggingface.co/meta-llama">Llama</a> or <a href="https://huggingface.co/deepseek-ai">DeepSeek</a> models. Yes, you can read their <a href="https://ai.meta.com/research/publications/the-llama-3-herd-of-models/">technical</a> and <a href="https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf">experiment</a> reports. But the most challenging part β the training code, the knowledge and techniques necessary to coordinate GPUs to train these massive systems β remains shrouded in complexity and spread around in a series of disconnected papers and often private codebases.
|
77 |
</p>
|
78 |
<aside>Reading time: 2-4 days. <br>For the best reading experience, we recommend not using a mobile phone.</aside>
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
80 |
This open source book is here to change that. Starting from the basics, we'll walk you through the knowledge necessary to scale the training of large language models (LLMs) from one GPU to tens, hundreds, and even thousands of GPUs, illustrating theory with practical code examples and reproducible benchmarks.
|
81 |
</p>
|
82 |
|
|
|
83 |
<p>As the size of the clusters used to train these models has grown, various techniques, such as data parallelism, tensor parallelism, pipeline parallelism, and context parallelism as well as ZeRO and kernel fusion, have been invented to make sure that GPUs are highly utilized at all times. This significantly reduces training time and makes the most efficient use of this expensive hardware. These distributed training techniques are not only important for building initial models but have also become essential for fine-tuning large models on specialized data, which often produces the best results. In this book, we'll progressively go over all of these techniques β from the simplest to the most refined ones β while maintaining a single story line to help you understand where each method comes from.</p>
|
84 |
|
85 |
<aside>If you have questions or remarks, open a discussion on the <a href="https://huggingface.co/spaces/nanotron/ultrascale-playbook/discussions?status=open&type=discussion">Community tab</a>!</aside>
|
|
|
76 |
Thousands of GPUs humming in perfect harmony. That's what it takes to train today's most powerful AI models β a symphony of computing power that until recently was the exclusive domain of elite research labs. Open source has transformed this landscape, but not completely. Yes, you can download the latest <a href="https://huggingface.co/meta-llama">Llama</a> or <a href="https://huggingface.co/deepseek-ai">DeepSeek</a> models. Yes, you can read their <a href="https://ai.meta.com/research/publications/the-llama-3-herd-of-models/">technical</a> and <a href="https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf">experiment</a> reports. But the most challenging part β the training code, the knowledge and techniques necessary to coordinate GPUs to train these massive systems β remains shrouded in complexity and spread around in a series of disconnected papers and often private codebases.
|
77 |
</p>
|
78 |
<aside>Reading time: 2-4 days. <br>For the best reading experience, we recommend not using a mobile phone.</aside>
|
79 |
+
<div class="order-button-container">
|
80 |
+
<button class="order-button" onclick="window.open('https://www.lulu.com/shop/nouamane-tazi-and-ferdinand-mom-and-haojun-zhao-and-phuc-nguyen/the-ultra-scale-playbook/paperback/product-45yk9dj.html?page=1&pageSize=4', '_blank')">
|
81 |
+
Order Book Here
|
82 |
+
</button>
|
83 |
+
</div>
|
84 |
+
<p>
|
85 |
This open source book is here to change that. Starting from the basics, we'll walk you through the knowledge necessary to scale the training of large language models (LLMs) from one GPU to tens, hundreds, and even thousands of GPUs, illustrating theory with practical code examples and reproducible benchmarks.
|
86 |
</p>
|
87 |
|
88 |
+
|
89 |
<p>As the size of the clusters used to train these models has grown, various techniques, such as data parallelism, tensor parallelism, pipeline parallelism, and context parallelism as well as ZeRO and kernel fusion, have been invented to make sure that GPUs are highly utilized at all times. This significantly reduces training time and makes the most efficient use of this expensive hardware. These distributed training techniques are not only important for building initial models but have also become essential for fine-tuning large models on specialized data, which often produces the best results. In this book, we'll progressively go over all of these techniques β from the simplest to the most refined ones β while maintaining a single story line to help you understand where each method comes from.</p>
|
90 |
|
91 |
<aside>If you have questions or remarks, open a discussion on the <a href="https://huggingface.co/spaces/nanotron/ultrascale-playbook/discussions?status=open&type=discussion">Community tab</a>!</aside>
|
src/style.css
CHANGED
@@ -597,3 +597,49 @@ select[name="presets"] {
|
|
597 |
border-radius: 8px;
|
598 |
}
|
599 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
597 |
border-radius: 8px;
|
598 |
}
|
599 |
|
600 |
+
.order-button {
|
601 |
+
background: linear-gradient(135deg, #6DB4C4, #D4A5B8);
|
602 |
+
color: white;
|
603 |
+
font-size: 18px;
|
604 |
+
font-weight: 600;
|
605 |
+
padding: 16px 32px;
|
606 |
+
border: none;
|
607 |
+
border-radius: 12px;
|
608 |
+
cursor: pointer;
|
609 |
+
text-transform: uppercase;
|
610 |
+
letter-spacing: 1px;
|
611 |
+
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
|
612 |
+
transition: all 0.3s ease;
|
613 |
+
position: relative;
|
614 |
+
overflow: hidden;
|
615 |
+
}
|
616 |
+
|
617 |
+
.order-button:hover {
|
618 |
+
transform: translateY(-2px);
|
619 |
+
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.25);
|
620 |
+
}
|
621 |
+
|
622 |
+
.order-button:active {
|
623 |
+
transform: translateY(0);
|
624 |
+
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.2);
|
625 |
+
}
|
626 |
+
|
627 |
+
.order-button::before {
|
628 |
+
content: '';
|
629 |
+
position: absolute;
|
630 |
+
top: 0;
|
631 |
+
left: -100%;
|
632 |
+
width: 100%;
|
633 |
+
height: 100%;
|
634 |
+
background: linear-gradient(135deg, rgba(255, 255, 255, 0.2), rgba(255, 255, 255, 0));
|
635 |
+
transition: left 0.5s ease;
|
636 |
+
}
|
637 |
+
|
638 |
+
.order-button:hover::before {
|
639 |
+
left: 100%;
|
640 |
+
}
|
641 |
+
.order-button-container {
|
642 |
+
display: flex;
|
643 |
+
justify-content: center;
|
644 |
+
margin: 40px 0;
|
645 |
+
}
|
ultra_blog.md
ADDED
The diff for this file is too large to render.
See raw diff
|
|