In [79]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Tools

## Wikipedia Tools

In [4]:
import re
import requests
from bs4 import BeautifulSoup, NavigableString, Tag
from markdownify import markdownify as md
from smolagents import tool

@tool
def get_wikipedia_markdown(
    title: str,
    lang: str = 'en',
    ignore_references: bool = True,
    ignore_links: bool = False
) -> str:
    """
    Fetches the main content of a Wikipedia page and returns it as Markdown.
    
    Args:
        title (str): Title of the Wikipedia page (e.g., "Python_(programming_language)").
        lang (str): Language code for the Wikipedia version (default 'en' for English).
        ignore_references (bool): If True, drop the "References" section entirely.
        ignore_links (bool): If True, strip out all <a> tags and keep only link text.
    
    Returns:
        str: The Markdown representation of the page's main content.
    """
    # 1. Fetch the raw HTML of the Wikipedia page
    url = f"https://{lang}.wikipedia.org/wiki/{title}"
    response = requests.get(url)
    response.raise_for_status()                                                    # :contentReference[oaicite:6]{index=6}
    html = response.text

    # 2. Parse with BeautifulSoup and isolate the div that holds article text
    soup = BeautifulSoup(html, "lxml")
    content_div = soup.find("div", class_="mw-parser-output")                       # :contentReference[oaicite:7]{index=7}
    if content_div is None:
        raise ValueError(f"Could not find main content for page '{title}'")

    # 3. Remove unwanted elements: infoboxes, toc, navboxes, thumbnails, metadata, and images
    #    a) Infoboxes (sidebars)
    for infobox in content_div.find_all("table", class_=re.compile(r"infobox")):
        infobox.decompose()                                                          # :contentReference[oaicite:8]{index=8}

    #    b) Table of Contents
    toc = content_div.find("div", id="toc")
    if toc:
        toc.decompose()                                                              # :contentReference[oaicite:9]{index=9}

    #    c) Navigation boxes and other Wikipedia templates
    for nav in content_div.find_all(["div", "table"], class_=re.compile(r"navbox|vertical-navbox|metadata")):
        nav.decompose()                                                               # :contentReference[oaicite:10]{index=10}

    #    d) Thumbnails / image wrappers
    for thumb in content_div.find_all("div", class_=re.compile(r"thumb")):
        thumb.decompose()                                                             # :contentReference[oaicite:11]{index=11}

    #    e) Remove any raw <img> tags
    for img in content_div.find_all("img"):
        img.decompose()                                                               # :contentReference[oaicite:12]{index=12}

    # Helper to convert a BeautifulSoup <table> to markdown
    def table_to_markdown(table_tag: Tag) -> str:
        """
        Converts a <table> tag into a Markdown-formatted table, preserving headers.
        """
        # Extract header names (if any)
        headers = []
        header_row = table_tag.find("tr")
        if header_row:
            for th in header_row.find_all(["th"]):
                headers.append(th.get_text(strip=True))
        # Build header line
        md_table = ""
        if headers:
            md_table += "| " + " | ".join(headers) + " |\n"
            md_table += "| " + " | ".join("---" for _ in headers) + " |\n"
        
        # Extract all data rows
        for row in table_tag.find_all("tr")[1:]:
            cells = row.find_all(["td", "th"])
            if not cells:
                continue
            row_texts = [cell.get_text(strip=True) for cell in cells]
            md_table += "| " + " | ".join(row_texts) + " |\n"
        
        return md_table.rstrip()

    # 4. Convert each content table (e.g., wikitable, sortable) into Markdown and replace in-place
    for table in content_div.find_all("table"):
        # Skip any tables that were already removed (infobox, nav, etc.)
        if "infobox" in table.get("class", []) or table.get("role") == "navigation":
            continue
        # Convert & replace
        markdown_table = table_to_markdown(table)                                     # :contentReference[oaicite:13]{index=13}
        new_node = soup.new_string("\n\n" + markdown_table + "\n\n")
        table.replace_with(new_node)

    # 5. Handle ignoring the References section if requested
    if ignore_references:
        # Find the heading that marks "References"
        ref_heading = None
        for span in content_div.find_all("span", class_="mw-headline"):
            if span.get_text(strip=True).lower() == "references":
                ref_heading = span.parent  # Usually the <h2> or <h3> containing the <span>
                break
        if ref_heading:
            # Remove the heading itself
            ref_heading.decompose()
            # Remove everything until the next same-level section
            for sibling in list(ref_heading.find_next_siblings()):
                # Stop if we reach another h2/h3 (i.e., new top-level section)
                if sibling.name and re.match(r"h[2-3]", sibling.name):
                    break
                sibling.decompose()                                                      # :contentReference[oaicite:14]{index=14}

    # 6. Convert remaining HTML in content_div to Markdown
    #    a) If ignoring links: strip <a> tags entirely
    markdown_options = {}
    if ignore_links:
        markdown_options["strip"] = ["a"]                                              # :contentReference[oaicite:15]{index=15}
    #    b) Otherwise, let markdownify convert <a> to [text](URL)
    #        We ensure that it does not convert other tags we might still have
    raw_html = "".join(str(child) for child in content_div.children)

    # Final Markdown conversion
    markdown_text = md(raw_html, **markdown_options)                                  # :contentReference[oaicite:16]{index=16}

    # Clean up possible multiple blank lines
    markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text).strip()

    return markdown_text


In [None]:
import re
import requests
from bs4 import BeautifulSoup, Tag
from markdownify import markdownify as md

def get_wikipedia_markdown(
    title: str,
    lang: str = 'en',
    ignore_references: bool = True,
    ignore_links: bool = False
) -> str:
    """
    Fetches the main content of a Wikipedia page and returns it as Markdown.
    
    Args:
        title (str): Title of the Wikipedia page (e.g., "Python_(programming_language)").
        lang (str): Language code for the Wikipedia version (default 'en').
        ignore_references (bool): If True, drop the entire "References" section.
        ignore_links (bool): If True, strip out all <a> tags entirely (only their inner text remains).
    
    Returns:
        str: The Markdown representation of the page's main content.
    """
    # 1. Fetch raw HTML of the Wikipedia page
    url = f"https://{lang}.wikipedia.org/wiki/{title}"
    response = requests.get(url)
    response.raise_for_status()  # :contentReference[oaicite:0]{index=0}
    html = response.text

    # 2. Parse with BeautifulSoup and isolate the div that holds article text
    soup = BeautifulSoup(html, "lxml")
    content_div = soup.find("div", class_="mw-parser-output")  # :contentReference[oaicite:1]{index=1}
    if content_div is None:
        raise ValueError(f"Could not find main content for page '{title}'")

    # 2a. Remove all [edit] links by deleting <span class="mw-editsection"> nodes first
    for edit_span in content_div.find_all("span", class_="mw-editsection"):
        edit_span.decompose()  # :contentReference[oaicite:2]{index=2}

    # 2b. Remove any superscripted footnote markers (<sup class="reference">)
    for sup in content_div.find_all("sup", class_="reference"):
        sup.decompose()  # :contentReference[oaicite:3]{index=3}

    # 3. Remove unwanted elements: infoboxes, toc, navboxes, thumbnails, images
    #    a) Infoboxes (sidebars)
    for infobox in content_div.find_all("table", class_=re.compile(r"infobox")):
        infobox.decompose()  # :contentReference[oaicite:4]{index=4}

    #    b) Table of Contents
    toc = content_div.find("div", id="toc")
    if toc:
        toc.decompose()  # :contentReference[oaicite:5]{index=5}

    #    c) Navigation boxes and other Wikipedia templates
    for nav in content_div.find_all(["div", "table"], class_=re.compile(r"navbox|vertical-navbox|metadata")):
        nav.decompose()  # :contentReference[oaicite:6]{index=6}

    #    d) Thumbnails / image wrappers
    for thumb in content_div.find_all("div", class_=re.compile(r"thumb")):
        thumb.decompose()  # :contentReference[oaicite:7]{index=7}

    #    e) Remove any raw <img> tags
    for img in content_div.find_all("img"):
        img.decompose()  # :contentReference[oaicite:8]{index=8}

    # 4. Convert <table> tags to Markdown in-place
    def table_to_markdown(table_tag: Tag) -> str:
        """
        Converts a <table> tag into a Markdown-formatted table, preserving headers if present.
        """
        headers = []
        header_row = table_tag.find("tr")
        if header_row:
            for th in header_row.find_all("th"):
                headers.append(th.get_text(strip=True))
        md_table = ""
        if headers:
            md_table += "| " + " | ".join(headers) + " |\n"
            md_table += "| " + " | ".join("---" for _ in headers) + " |\n"
        # Data rows
        for row in table_tag.find_all("tr")[1:]:
            cells = row.find_all(["td", "th"])
            if not cells:
                continue
            row_texts = [cell.get_text(strip=True) for cell in cells]
            md_table += "| " + " | ".join(row_texts) + " |\n"
        return md_table.rstrip()

    for table in content_div.find_all("table"):
        # Skip tables that were already removed or are infobox/navigation
        if "infobox" in table.get("class", []) or table.get("role") == "navigation":
            continue
        markdown_table = table_to_markdown(table)  # :contentReference[oaicite:9]{index=9}
        new_node = soup.new_string("\n\n" + markdown_table + "\n\n")
        table.replace_with(new_node)

    # 5. Handle ignoring the References section if requested
    if ignore_references:
        # 5a. Find the "References" heading (ignore case & trailing whitespace)
        ref_heading = None
        for span in content_div.find_all("span", class_="mw-headline"):
            heading_text = span.get_text(strip=True).lower()
            # After removing <sup> and <span class="mw-editsection">, this will be exactly "references"
            if heading_text == "references":
                ref_heading = span.parent  # the <h2> or <h3> containing the <span>
                break

        if ref_heading:
            # 5b. Gather siblings until the next same-level heading without decomposing immediately
            siblings_to_remove = []
            for sibling in ref_heading.find_next_siblings():
                if sibling.name and re.match(r"h[2-3]", sibling.name):
                    break
                siblings_to_remove.append(sibling)
            # Remove those siblings
            for node in siblings_to_remove:
                node.decompose()  # :contentReference[oaicite:10]{index=10}
            # Finally remove the "References" heading itself
            ref_heading.decompose()  # :contentReference[oaicite:11]{index=11}

    # 6. Convert remaining HTML in content_div to Markdown
    markdown_options = {}
    if ignore_links:
        markdown_options["strip"] = ["a"]  # strip all <a> tags :contentReference[oaicite:12]{index=12}

    raw_html = "".join(str(child) for child in content_div.children)
    markdown_text = md(raw_html, **markdown_options)  # :contentReference[oaicite:13]{index=13}

    # 7. Collapse multiple blank lines
    markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text).strip()

    return markdown_text


In [10]:
from IPython.display import display, Markdown

md_page = get_wikipedia_markdown(
    'Mercedes_Sosa',
    lang='en',
    ignore_references= True,
    ignore_links= True)

display(Markdown(md_page)) 

Argentine singer (1935–2009)

Not to be confused with Mercedes Sola.

**Haydée Mercedes** "**La Negra**" **Sosa** (Latin American Spanish: [meɾˈseðes ˈsosa]; 9 July 1935 – 4 October 2009) was an Argentine singer who was popular throughout Latin America and many countries outside the region. With her roots in Argentine folk music, Sosa became one of the preeminent exponents of *El nuevo cancionero*. She gave voice to songs written by many Latin American songwriters. Her music made people hail her as the "voice of the voiceless ones". She was often called "the conscience of Latin America".

Sosa performed in venues such as the Lincoln Center in New York City, the Théâtre Mogador in Paris, the Sistine Chapel in Vatican City, as well as sold-out shows in New York's Carnegie Hall and the Roman Colosseum during her final decade of life. Her career spanned four decades and she was the recipient of six Latin Grammy awards (2000, 2003, 2004, 2006, 2009, 2011), including a Latin Grammy Lifetime Achievement Award in 2004 and two posthumous Latin Grammy Award for Best Folk Album in 2009 and 2011. She won the Premio Gardel in 2000, the main musical award in Argentina. She served as an ambassador for UNICEF.

Life
----

Sosa was born on 9 July 1935, in San Miguel de Tucumán, in the northwestern Argentine province of Tucumán, of mestizo ancestry. She was of French, Spanish and Diaguita descent. Her nickname "la negra", which is a common nickname in Argentina for people with darker complexion, is a reference to her indigenous heritage. Her parents, a day laborer and a washerwoman, were Peronists, although they never registered in the party, and she started her career as a singer for the Peronist Party in Tucuman under the name Gladys Osorio. In 1950, at age fifteen, she won a singing competition organized by a local radio station and was given a contract to perform for two months. She recorded her first album, *La Voz de la Zafra*, in 1959. A performance at the 1965 Cosquín National Folklore Festival—where she was introduced and brought to the stage while sitting in the audience by fellow folk singer Jorge Cafrune— brought her to the attention of the Argentine public. Sosa and her first husband, Manuel Oscar Matus, with whom she had one son, were key players in the mid-60s *nueva canción* movement (which was called *nuevo cancionero* in Argentina). Her second record was *Canciones con Fundamento*, a collection of Argentine folk songs.

Sosa with Félix Luna and Ariel Ramírez (at the piano)

Sosa "spent the late 1960s building her audience in Europe and among the cosmopolitan middle class in Buenos Aires, becoming in the process a much bigger star" than her contemporaries. In 1967, Sosa toured the United States and Europe with great success.[*citation needed*] In later years, she performed and recorded extensively, broadening her repertoire to include material from throughout Latin America.

In the early 1970s, Sosa released two concept albums in collaboration with composer Ariel Ramírez and lyricist Félix Luna: *Cantata Sudamericana* and *Mujeres Argentinas* (Argentine Women). She also recorded a tribute to Chilean musician Violeta Parra in 1971, including what was to become one of Sosa's signature songs, *Gracias a la vida*. She further popularized of songs written by Milton Nascimento of Brazil and Pablo Milanés and Silvio Rodríguez both from Cuba. Throughout the decade, she released albums such as *Hasta la Victoria* in 1972 and *Traigo un Pueblo* *en mi Voz* in 1973. They featured songs like "Cuando tenga la tierra", written by Ariel Petrocelli and Daniel Toro, which tackles political and social issues like wealth and land inequality. During the 1970s she was a part of two films by the director Leopoldo Torre Nilsson: *El Santo de la Espada* in 1970 and *Güemes, la tierra en armas* in 1971, in which she portrayed Juana Azurduy de Padilla, the guerrilla military leader who fought for Argentine independence.

Sosa in 1972

After the military junta of Jorge Videla came to power in 1976, the atmosphere in Argentina grew increasingly oppressive. Sosa faced death threats against both her and her family, but refused for many years to leave the country. At a concert in La Plata in 1979, Sosa was searched and arrested on stage, along with all those attending the concert. Their release came about through international intervention. Despite attempts to hold more concerts, she was officially barred from performing by the military regime. Banned in her own country, she moved to Paris and then to Madrid. She has spoken publicly about her artistic and emotional struggles during this period of her life. While in exile, she released the album *A Quien Doy* in 1981. The album included a recording of the song "Cuando Me Acuerdo de Mi Pais" which was originally written by the prolific Chilean singer/songwriter, Patricio Manns. The song, which he wrote while also in political exile, expresses the sorrow he felt from being separated from his homeland. She related to this feeling and struggled to continue recording and performing. In an interview with the New York Times, she said, “It was a mental problem, a problem of morale...It wasn’t my throat, or anything physical".

Sosa returned to Argentina from her exile in Europe in February 1982, several months before the military regime collapsed as a result of the Falklands War, and gave a series of concerts at the *Teatro Ópera* in Buenos Aires, where she invited many of her younger colleagues to share the stage. A double album of recordings from these performances became an instant best seller. She then traveled to perform in her home province of Tucuman. However, these performances were largely ignored by mainstream media in the country. In subsequent years, Sosa continued to tour both in Argentina and abroad, performing in such venues as the Lincoln Center in New York City and the *Théâtre Mogador* in Paris. In poor health for much of the 1990s, she performed a comeback show in Argentina in 1998. In 1994, she played in the Sistine Chapel in Vatican City. In 2002, she sold out both Carnegie Hall in New York and the Colosseum in Rome in the same year.

Sosa in 1973

A supporter of Perón, she favored leftist causes throughout her life. She supported President Raul Alfonsin in the election of 1983 which marked the return of democracy in Argentina following the dictatorship. She referred to this election as "Argentina's Spring" She opposed President Carlos Menem, who was in office from 1989 to 1999, and supported the election of Néstor Kirchner, who became president in 2003.
Sosa was a UNESCO Goodwill Ambassador for Latin America and the Caribbean.

Sosa disliked being identified as a protest singer. While she was outright in her political stances, Sosa said the following on the position of the artist:

> “An artist isn’t political in the party political sense – they have a constituency, which is their public – it is the poetry that matters most of all.”

In a career spanning four decades, she worked with performers across several genres and generations, folk, opera, pop, rock, including Martha Argerich, Andrea Bocelli, David Broza, Franco Battiato, Jaime Roos, Joan Baez, Francis Cabrel, Gal Costa, Luz Casal, Lila Downs, Lucio Dalla, Maria Farantouri, Lucecita Benitez, Nilda Fernández, Charly Garcia, León Gieco, Gian Marco, Nana Mouskouri, Pablo Milanés, Holly Near, Milton Nascimento, Pata Negra, Fito Páez, Franco De Vita, Lourdes Pérez, Luciano Pavarotti, Silvio Rodríguez, Ismael Serrano, Shakira, Sting, Caetano Veloso, Julieta Venegas, Gustavo Cerati and Konstantin Wecker

Sosa participated in a 1999 production of Ariel Ramírez's *Misa Criolla*. Her song *Balderrama* is featured in the 2008 movie *Che*, starring Benicio del Toro as the Argentine Marxist revolutionary Che Guevara.

Sosa was the co-chair of the Earth Charter International Commission.

Awards
------

Sosa won the Latin Grammy Award for Best Folk Album in 2000 (*Misa Criolla*), 2003 (*Acústico*), 2006 (*Corazón Libre*), 2009 (*Cantora 1*, which also won Best Recording Package and was nominated for Album of the Year), and 2011 (*Deja La Vida Volar*), as well as several international awards.

In 1995, Konex Foundation from Argentina granted her the Diamond Konex Award, one of the most prestigious awards in Argentina, as the most important personality in the popular music of her country in the last decade.

Death
-----

Mercedes Sosa lying in repose, with her family and President Cristina Fernández de Kirchner viewing

Suffering from recurrent endocrine and respiratory problems in later years, the 74-year-old Sosa was hospitalized in Buenos Aires on 18 September 2009. She died from multiple organ failure on 4 October 2009, at 5:15 am. She is survived by one son, Fabián Matus, born of her first marriage. He said: "She lived her 74 years to the fullest. She had done practically everything she wanted, she didn't have any type of barrier or any type of fear that limited her". The hospital expressed its sympathies to her relatives. Her website featured the following: "Her undisputed talent, her honesty and her profound convictions leave a great legacy to future generations".

Her body was placed on display at the National Congress building in Buenos Aires for the public to pay their respects, and President Fernández de Kirchner ordered three days of national mourning. Thousands had queued by the end of the day.

Sosa's obituary in *The Daily Telegraph* said she was "an unrivalled interpreter of works by her compatriot, the Argentine Atahualpa Yupanqui, and Chile's Violeta Parra". Helen Popper of Reuters reported her death by saying she "fought South America's dictators with her voice and became a giant of contemporary Latin American music". Sosa received three Latin Grammy nominations for her album, in 2009 . She went on to win Best Folk Album about a month after her death.

Tributes
--------

In 2019, Sosa was celebrated by a Google Doodle. The doodle was showcased in Argentina, Chile, Uruguay, Paraguay, Bolivia, Peru, Ecuador, Cuba, Iceland, Sweden, Serbia, Greece, Israel and Vietnam.

In 2023, *Rolling Stone* ranked Sosa at number 160 on its list of the 200 Greatest Singers of All Time.

Discography
-----------

Sosa in 2005, with Argentina's then-First Lady (later president from 2007 to 2015), Cristina Fernández de Kirchner

Sosa recorded forty albums.

### Studio albums

| Year | Album details |
| --- | --- |
| 1962 | La Voz De La ZafraLabel: RCA |
| 1965 | Canciones Con FundamentoLabel: El Grillo |
| 1966 | HermanoLabel: Philips |
| 1966 | Yo No Canto Por CantarLabel: Philips |
| 1967 | Para Cantarle A Mi GenteLabel: Philips |
| 1968 | Con Sabor A Mercedes SosaLabel: Philips |
| 1969 | Mujeres ArgentinasLabel: Philips |
| 1970 | El Grito De La TierraLabel: Philips |
| 1970 | Navidad Con Mercedes SosaLabel: Philips |
| 1971 | Homenaje a Violeta ParraLabel: Philips |
| 1972 | Hasta La VictoriaLabel: Philips |
| 1972 | Cantata SudamericanaLabel: Philips |
| 1973 | Traigo Un Pueblo En Mi VozLabel: Philips |
| 1975 | A Que Florezca Mi PuebloLabel: Philips |
| 1976 | En Dirección Del VientoLabel: Philips |
| 1977 | Mercedes Sosa Interpreta A Atahualpa YupanquiLabel: Philips |
| 1979 | Serenata Para La Tierra De UnoLabel: Philips |
| 1981 | A Quien Doy / Cuando Me Acuerdo de Mi PaísLabel: Philips |
| 1982 | Como Un Pájaro LibreLabel: Philips |
| 1983 | Mercedes SosaLabel: Philips |
| 1984 | ¿Será Posible El Sur?Label: Philips |
| 1985 | Vengo A Ofrecer Mi CorazónLabel: Philips |
| 1986 | Mercedes Sosa '86Label: Philips |
| 1987 | Mercedes Sosa '87Label: Philips |
| 1993 | SinoLabel: Philips/Polygram |
| 1994 | Gestos De AmorLabel: Polydor |
| 1996 | Escondido En Mi PaísLabel: Polydor |
| 1997 | Alta Fidelidad(w/Charly García)Label: Mercury |
| 1998 | Al DespertarLabel: Mercury |
| 1999 | Misa CriollaLabel: Mercury |
| 2005 | Corazón LibreLabel: Edge |
| 2009 | Cantora 1(w/various artists)Label: RCA |
| 2009 | Cantora 2(w/various artists)Label: RCA |
| 2011 | CensuradaLabel: Philips |
| 2015 | LuceritoLabel: RCA |

### EPs

| Year | EP details |
| --- | --- |
| 1975 | Niño De MañanaLabel: Philips |

### Live albums

| Year | Album details |
| --- | --- |
| 1973 | Si Se Calla El Cantor(with Gloria Martin)Label: Philips |
| 1980 | Gravado Ao Vivo No BrasilLabel: Philips |
| 1982 | Mercedes Sosa en ArgentinaLabel: Phonogram/Philips |
| 1985 | Corazón Americano(withMilton Nascimento&León Gieco)Label: Philips |
| 1989 | Live in EuropeLabel: Tropical Music/Polygram Argentina |
| 1991 | De MíLabel: Philips |
| 2002 | Acústico En VivoLabel: Sony Music Argentina |
| 2003 | Argentina Quiere Cantar(withVíctor Heredia&León Gieco)Label: Odeon/EMI |
| 2010 | Deja La Vida Volar (En Gira)Label: RCA |
| 2014 | AngelLabel: Universal Music |
| 2024 | En vivo en el Gran Rex 2006Label: INAMU Discos |
| Mercedes Sosa en Nueva York, 1974Label: Sony Music Argentina |

### Compilation albums

| Year | Album details |
| --- | --- |
| 1975 | Disco De OroLabel: Philips |
| 1983 | RecitalLabel: Philips |
| 1988 | Amigos MíosLabel: Philips |
| 1993 | 30 AñosLabel: Polygram Argentina |
| 1995 | OroLabel: Polygram |
| 1997 | The Best Of Mercedes SosaLabel: Mercury |
| 2013 | Siempre En TiLabel: Universal Music |

Filmography
-----------

* *Güemes, la tierra en armas* (1971)
* *Argentinísima* (1972)
* *Esta es mi Argentina* (1974)
* *Mercedes Sosa, como un pájaro libre* (1983)
* *Será possible el sur: Mercedes Sosa* (1985)
* *Historias de Argentina en vivo* (2001)

Further reading
---------------

* Christensen, Anette (2019). *Mercedes Sosa - The Voice of Hope*. Denmark: Tribute2life Publishing. ISBN 978-87-998216-5-5.
* Christensen, Anette (2019). *Mercedes Sosa - More Than a Song*. Denmark: Tribute2life Publishing. ISBN 978-87-998216-7-9. (Abridged version of Mercedes Sosa - The Voice of Hope)
* Braceli, Rodolfo (2010). *Mercedes Sosa. La Negra* (in Spanish). Italy: Perrone. ISBN 978-88-6004-347-4.
* Matus, Fabián (2016). *Mercedes Sosa. La Mami* (in Spanish). Argentina: Planeta. ISBN 978-950-49-5247-3.

References
----------

1. **^** Mercedes Sosa at BrainyHistory.com
2. **^** "Singer Mercedes Sosa: The voice of the 'voiceless ones' outlasts South American dictatorships".
3. ^ ***a*** ***b*** ***c*** Heckman, Don (29 October 1995). "POP MUSIC : The Voice Heard Round the World : Mercedes Sosa, a compelling figure in world music and a social activist, will make a rare L.A. appearance". *Los Angeles Times*. Retrieved 5 December 2023.
4. ^ ***a*** ***b*** ***c*** ***d*** ***e*** ***f*** ***g*** ***h*** "Legendary folk singer Mercedes Sosa dies at 74". France 24. 4 October 2009. Retrieved 5 October 2009.
5. ^ ***a*** ***b*** ***c*** ***d*** Bernstein, Adam (5 October 2009). "Argentine folk singer who championed social justice". *Los Angeles Times*. Retrieved 8 March 2025.
6. **^** *Mercedes Sosa: The Voice of Latin America*. Dir. Rodrigo H. Villa. First Run Features, 2013. Web.
7. ^ ***a*** ***b*** ***c*** ***d*** ***e*** ***f*** ***g*** ***h*** "Mercedes Sosa: Obituary". *The Daily Telegraph*. 4 October 2009. Retrieved 5 October 2009.
8. **^** The presentation by Jorge Cafrune and the song Mercedes Sosa sang on YouTube. Retrieved 3 March 2010.
9. ^ ***a*** ***b*** ***c*** ***d*** ***e*** ***f*** ***g*** ***h*** "Latin artist Mercedes Sosa dies". BBC. 4 October 2009. Retrieved 5 October 2009.
10. **^** Karush, Matthew (2017). *Musicians in Transit: Argentina and the Globalization of Popular Music*. Duke. p. 168. ISBN 978-0-8223-7377-3.
11. ^ ***a*** ***b*** Associated Press[*dead link*]
12. ^ ***a*** ***b*** "Biografía". *Fundación Mercedes Sosa* (in Spanish). Retrieved 8 March 2025.
13. **^** Argentina, Cadena 3. "El folclore argentino llora la muerte de Daniel Toro - Notas - Viva la Radio". *Cadena 3 Argentina* (in Spanish). Retrieved 14 March 2025.`{{cite web}}`: CS1 maint: numeric names: authors list (link)
14. **^** Nilsson, Leopoldo Torre (7 April 1971), *Güemes - la tierra en armas* (Drama, History), Alfredo Alcón, Norma Aleandro, Gabriela Gili, Producciones Cinematográficas Cerrillos, retrieved 8 March 2025
15. **^** Rodrigo (10 September 2020). "Patricio Manns: Cuando me acuerdo de mi país (1983) | PERRERAC: La canción, un arma de la revolución" (in Spanish). Retrieved 14 March 2025.
16. ^ ***a*** ***b*** Lopez, Vicente F. (18 January 1983). "ARTISTAS EXILIADOS HAN REGRESADO A ARGENTINA". *El Nuevo Herald*. p. 8. Retrieved 7 March 2025.
17. **^** Drosdoff, Daniel (30 October 1983). "ARGENTINIAN VOTE TO END DICTATORSHIP PERONIST AND RADICAL IN LEAD FOR PRESIDENCY". *Miami Herald*. pp. 16A. Retrieved 7 March 2025.
18. **^** Interview with Mercedes Sosa Archived 16 October 2009 at the Wayback Machine, *Magazin Berliner Zeitung*, 25 October 2003. (in German)
19. **^** Mercedes Sosa in concert Archived 4 January 2008 at the Wayback Machine
20. **^** Meyer, Bill (7 October 2009). "A U.S. musician pays tribute to Mercedes Sosa". *People's World*. Retrieved 5 December 2023.
21. **^** "In Profile: Mercedes Sosa". *soundsandcolours.com*. 26 August 2010. Retrieved 27 March 2018.
22. **^** *Balderrama* by Mercedes Sosa on YouTube – a tribute to Che Guevara
23. **^** "Latin Grammys: Ganadores – Años Anteriores (2000)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
24. **^** "Latin Grammys: Ganadores – Años Anteriores (2003)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
25. **^** "Latin Grammys: Ganadores – Años Anteriores (2006)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
26. **^** "Latin Grammys: Ganadores – Años Anteriores (2009)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
27. **^** "Latin Grammys: Ganadores – Años Anteriores (2011)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
28. **^** "Premios Konex 1995: Música Popular". *Fundación Konex* (in Spanish). Retrieved 7 July 2021.
29. **^** ""En ningún momento sufrió", dijo el hijo de Mercedes Sosa" (in Spanish). October 2009. Archived from the original on 4 October 2009. Retrieved 1 October 2009.
30. ^ ***a*** ***b*** ***c*** Javier Doberti (4 October 2009). "Argentine singer Mercedes Sosa, 'voice of Latin America,' dies at 74". CNN. Retrieved 5 October 2009.
31. **^** "Argentine folk legend Mercedes Sosa dead at 74". *Bangkok Post*. 4 October 2009. Retrieved 5 October 2009.
32. ^ ***a*** ***b*** "Argentine folk icon Sosa dies at 74". Al Jazeera. 4 October 2009. Retrieved 5 October 2009.
33. **^** "Continúa la procesión en el Congreso para despedir a Mercedes Sosa".
34. ^ ***a*** ***b*** Helen Popper (4 October 2009). "Argentine singer Mercedes Sosa dies at 74". *Reuters*. Archived from the original on 11 October 2009. Retrieved 5 October 2009.
35. **^** "Celebrating Mercedes Sosa". *Doodles Archive, Google*. 31 January 2019.
36. **^** "The 200 Greatest Singers of All Time". *Rolling Stone*. 1 January 2023. Retrieved 9 March 2023.

External links
--------------

Wikiquote has quotations related to ***Mercedes Sosa***.

Wikimedia Commons has media related to Mercedes Sosa.

* Tribute to Mercedes Sosa (in Portuguese BR)
* Mercedes Sosa's website (in Spanish)
* Mercedes Sosa's News (in Spanish)
* Mercedes Sosa at IMDb
* Mercedes Sosa's Discography on Discogs.com

NewPP limit report
Parsed by mw‐api‐ext.eqiad.main‐655cc685c8‐xdg6t
Cached time: 20250603141506
Cache expiry: 2592000
Reduced expiry: false
Complications: [vary‐revision‐sha1, show‐toc]
CPU time usage: 1.359 seconds
Real time usage: 1.721 seconds
Preprocessor visited node count: 6973/1000000
Revision size: 29435/2097152 bytes
Post‐expand include size: 288001/2097152 bytes
Template argument size: 6520/2097152 bytes
Highest expansion depth: 20/100
Expensive parser function count: 8/500
Unstrip recursion depth: 1/20
Unstrip post‐expand size: 166311/5000000 bytes
Lua time usage: 0.741/10.000 seconds
Lua memory usage: 16760866/52428800 bytes
Number of Wikibase entities loaded: 1/500
Transclusion expansion time report (%,ms,calls,template)
100.00% 1305.662 1 -total
29.52% 385.448 1 Template:Reflist
20.38% 266.073 1 Template:Infobox\_person
9.77% 127.503 20 Template:Cite\_web
8.75% 114.217 5 Template:Cite\_book
8.41% 109.751 1 Template:In\_lang
7.34% 95.784 1 Template:Infobox\_musical\_artist
7.02% 91.627 2 Template:Short\_description
6.76% 88.198 6 Template:Br\_separated\_entries
6.03% 78.712 1 Template:Mercedes\_Sosa
Saved in parser cache with key enwiki:pcache:476992:|#|:idhash:canonical and timestamp 20250603141506 and revision id 1293758111. Rendering was triggered because: api-parse

In [30]:
import re
import requests
from bs4 import BeautifulSoup, Tag
from markdownify import markdownify as md

def get_wikipedia_markdown(
    title: str,
    lang: str = 'en',
    ignore_references: bool = True,
    ignore_links: bool = False
) -> str:
    """
    Fetches the main content of a Wikipedia page and returns it as Markdown,
    excluding infoboxes, sidebars, images, and optionally the References,
    Further reading, and External links sections.

    Args:
        title (str): Wikipedia page title (e.g., "Python_(programming_language)").
        lang (str): Language code (default 'en' for English).
        ignore_references (bool): If True, drop "References", "Further reading", and
                                  "External links" sections entirely.
        ignore_links (bool): If True, strip out all <a> tags (leaving plain text).

    Returns:
        str: Markdown-formatted content of the main article body.
    """
    # 1. Fetch raw HTML of the Wikipedia page
    url = f"https://{lang}.wikipedia.org/wiki/{title}"
    response = requests.get(url)
    response.raise_for_status()  # :contentReference[oaicite:6]{index=6}
    html = response.text

    # 2. Parse with BeautifulSoup and isolate the div containing article text
    soup = BeautifulSoup(html, "lxml")
    content_div = soup.find("div", class_="mw-parser-output")  # :contentReference[oaicite:7]{index=7}
    if content_div is None:
        raise ValueError(f"Could not find main content for page '{title}'")

    # 2a. Remove all "[edit]" links by deleting <span class="mw-editsection">
    for edit_span in content_div.find_all("span", class_="mw-editsection"):
        edit_span.decompose()  # :contentReference[oaicite:8]{index=8}

    # 2b. Remove any superscripted footnote markers (<sup class="reference">)
    for sup in content_div.find_all("sup", class_="reference"):
        sup.decompose()  # :contentReference[oaicite:9]{index=9}

    # 3. Remove unwanted elements: infoboxes, toc, navboxes, thumbnails, images
    #    a) Infoboxes (sidebars)
    for infobox in content_div.find_all("table", class_=re.compile(r"infobox")):
        infobox.decompose()  # :contentReference[oaicite:10]{index=10}

    #    b) Table of Contents
    toc = content_div.find("div", id="toc")
    if toc:
        toc.decompose()  # :contentReference[oaicite:11]{index=11}

    #    c) Navigation boxes and Wikipedia templates (navbox/metadata)
    for nav in content_div.find_all(
        ["div", "table"],
        class_=re.compile(r"navbox|vertical-navbox|metadata")
    ):
        nav.decompose()  # :contentReference[oaicite:12]{index=12}

    #    d) Thumbnails / image wrappers
    for thumb in content_div.find_all("div", class_=re.compile(r"thumb")):
        thumb.decompose()  # :contentReference[oaicite:13]{index=13}

    #    e) Remove any raw <img> tags
    for img in content_div.find_all("img"):
        img.decompose()  # :contentReference[oaicite:14]{index=14}

    # 4. Convert every remaining <table> into Markdown, in place
    def table_to_markdown(table_tag: Tag) -> str:
        """
        Converts a <table> tag to a Markdown-formatted table, preserving any <th> headers.
        """
        headers = []
        header_row = table_tag.find("tr")
        if header_row:
            for th in header_row.find_all("th"):
                headers.append(th.get_text(strip=True))
        md_table = ""
        if headers:
            md_table += "| " + " | ".join(headers) + " |\n"
            md_table += "| " + " | ".join("---" for _ in headers) + " |\n"
        # Data rows (skip header row)
        for row in table_tag.find_all("tr")[1:]:
            cells = row.find_all(["td", "th"])
            if not cells:
                continue
            row_texts = [cell.get_text(strip=True) for cell in cells]
            md_table += "| " + " | ".join(row_texts) + " |\n"
        return md_table.rstrip()

    for table in content_div.find_all("table"):
        # Skip any tables already removed or used for navigation
        if "infobox" in table.get("class", []) or table.get("role") == "navigation":
            continue
        markdown_table = table_to_markdown(table)  # :contentReference[oaicite:15]{index=15}
        new_node = soup.new_string("\n\n" + markdown_table + "\n\n")
        table.replace_with(new_node)

    # 5. Remove "References", "Further reading", and "External links" sections if requested
    if ignore_references:
        print("\nDEBUG: --- ignore_references is True ---") # DEBUG
        target_sections = {"references", "further reading", "external links"}
        print(f"DEBUG: Target section names: {target_sections}") # DEBUG
        
        all_headline_spans = list(content_div.find_all("span", class_="mw-headline"))
        print(f"DEBUG: Found {len(all_headline_spans)} 'mw-headline' spans in content_div.") # DEBUG
        
        for i, span in enumerate(all_headline_spans):
            if not span.parent:
                # This span's parent was already removed from the tree.
                # print(f"DEBUG: Span {i} ('{span.get_text(strip=True)[:30]}...') has no parent, skipping.") # DEBUG
                continue

            heading_text = span.get_text(strip=True).lower()
            # Optionally, print all heading texts found to check for subtle differences:
            # print(f"DEBUG: Checking span {i}: Extracted text: '{heading_text}' (Original ID: {span.get('id')})") # DEBUG

            if heading_text in target_sections:
                print(f"DEBUG: Matched target section: '{heading_text}' from span {i} (ID: {span.get('id')}).") # DEBUG
                section_heading_tag = span.find_parent(re.compile(r"^h[1-6]$"))

                if not section_heading_tag:
                    print(f"DEBUG: No H-tag ancestor found for '{heading_text}'. Span HTML: {str(span.parent)[:100]}") # DEBUG
                    continue
                
                if not section_heading_tag.parent:
                    print(f"DEBUG: H-tag <{section_heading_tag.name}> for '{heading_text}' (ID: {section_heading_tag.get('id')}) was already decomposed. Skipping.") # DEBUG
                    continue
                
                print(f"DEBUG: Found H-tag: <{section_heading_tag.name} id='{section_heading_tag.get('id')}'> for '{heading_text}'.") # DEBUG

                try:
                    current_section_level = int(section_heading_tag.name[1:])
                except (ValueError, IndexError):
                    print(f"DEBUG: Could not parse level from H-tag name '{section_heading_tag.name}'. Skipping.") # DEBUG
                    continue

                nodes_to_remove = []
                for sibling_node in section_heading_tag.find_next_siblings():
                    if sibling_node.name and sibling_node.name.startswith('h'):
                        try:
                            sibling_level = int(sibling_node.name[1:])
                            if sibling_level <= current_section_level:
                                print(f"DEBUG: Stopping content removal for '{heading_text}' at sibling <{sibling_node.name} id='{sibling_node.get('id')}'>.") # DEBUG
                                break 
                        except (ValueError, IndexError):
                            pass 
                    nodes_to_remove.append(sibling_node)
                
                print(f"DEBUG: For '{heading_text}', planning to remove {len(nodes_to_remove)} content nodes.") # DEBUG
                for node_idx, node in enumerate(nodes_to_remove):
                    if node.parent:
                        # print(f"DEBUG: Decomposing content node {node_idx} for '{heading_text}': <{node.name}>") # DEBUG
                        node.decompose()
                
                if section_heading_tag.parent:
                    print(f"DEBUG: Decomposing H-tag <{section_heading_tag.name} id='{section_heading_tag.get('id')}'> for '{heading_text}'.") # DEBUG
                    section_heading_tag.decompose()
            elif any(ts_part in heading_text for ts_part in ["reference", "external", "further", "see also", "notes"]): # DEBUG for near misses
                print(f"DEBUG: Near miss? Span text: '{heading_text}' (ID: {span.get('id')})") # DEBUG


    # 6. Convert remaining HTML in content_div to Markdown
    markdown_options = {}
    if ignore_links:
        markdown_options["strip"] = ["a"]  # strip all <a> tags :contentReference[oaicite:18]{index=18}
    raw_html = "".join(str(child) for child in content_div.children)
    markdown_text = md(raw_html, **markdown_options)  # :contentReference[oaicite:19]{index=19}

    # 7. Collapse runs of 3+ newlines into exactly two
    markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text).strip()

    return markdown_text


In [31]:
from IPython.display import display, Markdown

md_page = get_wikipedia_markdown(
    'Mercedes_Sosa',
    lang='en',
    ignore_references= True,
    ignore_links= True)

display(Markdown(md_page)) 


DEBUG: --- ignore_references is True ---
DEBUG: Target section names: {'further reading', 'references', 'external links'}
DEBUG: Found 0 'mw-headline' spans in content_div.


Argentine singer (1935–2009)

Not to be confused with Mercedes Sola.

**Haydée Mercedes** "**La Negra**" **Sosa** (Latin American Spanish: [meɾˈseðes ˈsosa]; 9 July 1935 – 4 October 2009) was an Argentine singer who was popular throughout Latin America and many countries outside the region. With her roots in Argentine folk music, Sosa became one of the preeminent exponents of *El nuevo cancionero*. She gave voice to songs written by many Latin American songwriters. Her music made people hail her as the "voice of the voiceless ones". She was often called "the conscience of Latin America".

Sosa performed in venues such as the Lincoln Center in New York City, the Théâtre Mogador in Paris, the Sistine Chapel in Vatican City, as well as sold-out shows in New York's Carnegie Hall and the Roman Colosseum during her final decade of life. Her career spanned four decades and she was the recipient of six Latin Grammy awards (2000, 2003, 2004, 2006, 2009, 2011), including a Latin Grammy Lifetime Achievement Award in 2004 and two posthumous Latin Grammy Award for Best Folk Album in 2009 and 2011. She won the Premio Gardel in 2000, the main musical award in Argentina. She served as an ambassador for UNICEF.

Life
----

Sosa was born on 9 July 1935, in San Miguel de Tucumán, in the northwestern Argentine province of Tucumán, of mestizo ancestry. She was of French, Spanish and Diaguita descent. Her nickname "la negra", which is a common nickname in Argentina for people with darker complexion, is a reference to her indigenous heritage. Her parents, a day laborer and a washerwoman, were Peronists, although they never registered in the party, and she started her career as a singer for the Peronist Party in Tucuman under the name Gladys Osorio. In 1950, at age fifteen, she won a singing competition organized by a local radio station and was given a contract to perform for two months. She recorded her first album, *La Voz de la Zafra*, in 1959. A performance at the 1965 Cosquín National Folklore Festival—where she was introduced and brought to the stage while sitting in the audience by fellow folk singer Jorge Cafrune— brought her to the attention of the Argentine public. Sosa and her first husband, Manuel Oscar Matus, with whom she had one son, were key players in the mid-60s *nueva canción* movement (which was called *nuevo cancionero* in Argentina). Her second record was *Canciones con Fundamento*, a collection of Argentine folk songs.

Sosa with Félix Luna and Ariel Ramírez (at the piano)

Sosa "spent the late 1960s building her audience in Europe and among the cosmopolitan middle class in Buenos Aires, becoming in the process a much bigger star" than her contemporaries. In 1967, Sosa toured the United States and Europe with great success.[*citation needed*] In later years, she performed and recorded extensively, broadening her repertoire to include material from throughout Latin America.

In the early 1970s, Sosa released two concept albums in collaboration with composer Ariel Ramírez and lyricist Félix Luna: *Cantata Sudamericana* and *Mujeres Argentinas* (Argentine Women). She also recorded a tribute to Chilean musician Violeta Parra in 1971, including what was to become one of Sosa's signature songs, *Gracias a la vida*. She further popularized of songs written by Milton Nascimento of Brazil and Pablo Milanés and Silvio Rodríguez both from Cuba. Throughout the decade, she released albums such as *Hasta la Victoria* in 1972 and *Traigo un Pueblo* *en mi Voz* in 1973. They featured songs like "Cuando tenga la tierra", written by Ariel Petrocelli and Daniel Toro, which tackles political and social issues like wealth and land inequality. During the 1970s she was a part of two films by the director Leopoldo Torre Nilsson: *El Santo de la Espada* in 1970 and *Güemes, la tierra en armas* in 1971, in which she portrayed Juana Azurduy de Padilla, the guerrilla military leader who fought for Argentine independence.

Sosa in 1972

After the military junta of Jorge Videla came to power in 1976, the atmosphere in Argentina grew increasingly oppressive. Sosa faced death threats against both her and her family, but refused for many years to leave the country. At a concert in La Plata in 1979, Sosa was searched and arrested on stage, along with all those attending the concert. Their release came about through international intervention. Despite attempts to hold more concerts, she was officially barred from performing by the military regime. Banned in her own country, she moved to Paris and then to Madrid. She has spoken publicly about her artistic and emotional struggles during this period of her life. While in exile, she released the album *A Quien Doy* in 1981. The album included a recording of the song "Cuando Me Acuerdo de Mi Pais" which was originally written by the prolific Chilean singer/songwriter, Patricio Manns. The song, which he wrote while also in political exile, expresses the sorrow he felt from being separated from his homeland. She related to this feeling and struggled to continue recording and performing. In an interview with the New York Times, she said, “It was a mental problem, a problem of morale...It wasn’t my throat, or anything physical".

Sosa returned to Argentina from her exile in Europe in February 1982, several months before the military regime collapsed as a result of the Falklands War, and gave a series of concerts at the *Teatro Ópera* in Buenos Aires, where she invited many of her younger colleagues to share the stage. A double album of recordings from these performances became an instant best seller. She then traveled to perform in her home province of Tucuman. However, these performances were largely ignored by mainstream media in the country. In subsequent years, Sosa continued to tour both in Argentina and abroad, performing in such venues as the Lincoln Center in New York City and the *Théâtre Mogador* in Paris. In poor health for much of the 1990s, she performed a comeback show in Argentina in 1998. In 1994, she played in the Sistine Chapel in Vatican City. In 2002, she sold out both Carnegie Hall in New York and the Colosseum in Rome in the same year.

Sosa in 1973

A supporter of Perón, she favored leftist causes throughout her life. She supported President Raul Alfonsin in the election of 1983 which marked the return of democracy in Argentina following the dictatorship. She referred to this election as "Argentina's Spring" She opposed President Carlos Menem, who was in office from 1989 to 1999, and supported the election of Néstor Kirchner, who became president in 2003.
Sosa was a UNESCO Goodwill Ambassador for Latin America and the Caribbean.

Sosa disliked being identified as a protest singer. While she was outright in her political stances, Sosa said the following on the position of the artist:

> “An artist isn’t political in the party political sense – they have a constituency, which is their public – it is the poetry that matters most of all.”

In a career spanning four decades, she worked with performers across several genres and generations, folk, opera, pop, rock, including Martha Argerich, Andrea Bocelli, David Broza, Franco Battiato, Jaime Roos, Joan Baez, Francis Cabrel, Gal Costa, Luz Casal, Lila Downs, Lucio Dalla, Maria Farantouri, Lucecita Benitez, Nilda Fernández, Charly Garcia, León Gieco, Gian Marco, Nana Mouskouri, Pablo Milanés, Holly Near, Milton Nascimento, Pata Negra, Fito Páez, Franco De Vita, Lourdes Pérez, Luciano Pavarotti, Silvio Rodríguez, Ismael Serrano, Shakira, Sting, Caetano Veloso, Julieta Venegas, Gustavo Cerati and Konstantin Wecker

Sosa participated in a 1999 production of Ariel Ramírez's *Misa Criolla*. Her song *Balderrama* is featured in the 2008 movie *Che*, starring Benicio del Toro as the Argentine Marxist revolutionary Che Guevara.

Sosa was the co-chair of the Earth Charter International Commission.

Awards
------

Sosa won the Latin Grammy Award for Best Folk Album in 2000 (*Misa Criolla*), 2003 (*Acústico*), 2006 (*Corazón Libre*), 2009 (*Cantora 1*, which also won Best Recording Package and was nominated for Album of the Year), and 2011 (*Deja La Vida Volar*), as well as several international awards.

In 1995, Konex Foundation from Argentina granted her the Diamond Konex Award, one of the most prestigious awards in Argentina, as the most important personality in the popular music of her country in the last decade.

Death
-----

Mercedes Sosa lying in repose, with her family and President Cristina Fernández de Kirchner viewing

Suffering from recurrent endocrine and respiratory problems in later years, the 74-year-old Sosa was hospitalized in Buenos Aires on 18 September 2009. She died from multiple organ failure on 4 October 2009, at 5:15 am. She is survived by one son, Fabián Matus, born of her first marriage. He said: "She lived her 74 years to the fullest. She had done practically everything she wanted, she didn't have any type of barrier or any type of fear that limited her". The hospital expressed its sympathies to her relatives. Her website featured the following: "Her undisputed talent, her honesty and her profound convictions leave a great legacy to future generations".

Her body was placed on display at the National Congress building in Buenos Aires for the public to pay their respects, and President Fernández de Kirchner ordered three days of national mourning. Thousands had queued by the end of the day.

Sosa's obituary in *The Daily Telegraph* said she was "an unrivalled interpreter of works by her compatriot, the Argentine Atahualpa Yupanqui, and Chile's Violeta Parra". Helen Popper of Reuters reported her death by saying she "fought South America's dictators with her voice and became a giant of contemporary Latin American music". Sosa received three Latin Grammy nominations for her album, in 2009 . She went on to win Best Folk Album about a month after her death.

Tributes
--------

In 2019, Sosa was celebrated by a Google Doodle. The doodle was showcased in Argentina, Chile, Uruguay, Paraguay, Bolivia, Peru, Ecuador, Cuba, Iceland, Sweden, Serbia, Greece, Israel and Vietnam.

In 2023, *Rolling Stone* ranked Sosa at number 160 on its list of the 200 Greatest Singers of All Time.

Discography
-----------

Sosa in 2005, with Argentina's then-First Lady (later president from 2007 to 2015), Cristina Fernández de Kirchner

Sosa recorded forty albums.

### Studio albums

| Year | Album details |
| --- | --- |
| 1962 | La Voz De La ZafraLabel: RCA |
| 1965 | Canciones Con FundamentoLabel: El Grillo |
| 1966 | HermanoLabel: Philips |
| 1966 | Yo No Canto Por CantarLabel: Philips |
| 1967 | Para Cantarle A Mi GenteLabel: Philips |
| 1968 | Con Sabor A Mercedes SosaLabel: Philips |
| 1969 | Mujeres ArgentinasLabel: Philips |
| 1970 | El Grito De La TierraLabel: Philips |
| 1970 | Navidad Con Mercedes SosaLabel: Philips |
| 1971 | Homenaje a Violeta ParraLabel: Philips |
| 1972 | Hasta La VictoriaLabel: Philips |
| 1972 | Cantata SudamericanaLabel: Philips |
| 1973 | Traigo Un Pueblo En Mi VozLabel: Philips |
| 1975 | A Que Florezca Mi PuebloLabel: Philips |
| 1976 | En Dirección Del VientoLabel: Philips |
| 1977 | Mercedes Sosa Interpreta A Atahualpa YupanquiLabel: Philips |
| 1979 | Serenata Para La Tierra De UnoLabel: Philips |
| 1981 | A Quien Doy / Cuando Me Acuerdo de Mi PaísLabel: Philips |
| 1982 | Como Un Pájaro LibreLabel: Philips |
| 1983 | Mercedes SosaLabel: Philips |
| 1984 | ¿Será Posible El Sur?Label: Philips |
| 1985 | Vengo A Ofrecer Mi CorazónLabel: Philips |
| 1986 | Mercedes Sosa '86Label: Philips |
| 1987 | Mercedes Sosa '87Label: Philips |
| 1993 | SinoLabel: Philips/Polygram |
| 1994 | Gestos De AmorLabel: Polydor |
| 1996 | Escondido En Mi PaísLabel: Polydor |
| 1997 | Alta Fidelidad(w/Charly García)Label: Mercury |
| 1998 | Al DespertarLabel: Mercury |
| 1999 | Misa CriollaLabel: Mercury |
| 2005 | Corazón LibreLabel: Edge |
| 2009 | Cantora 1(w/various artists)Label: RCA |
| 2009 | Cantora 2(w/various artists)Label: RCA |
| 2011 | CensuradaLabel: Philips |
| 2015 | LuceritoLabel: RCA |

### EPs

| Year | EP details |
| --- | --- |
| 1975 | Niño De MañanaLabel: Philips |

### Live albums

| Year | Album details |
| --- | --- |
| 1973 | Si Se Calla El Cantor(with Gloria Martin)Label: Philips |
| 1980 | Gravado Ao Vivo No BrasilLabel: Philips |
| 1982 | Mercedes Sosa en ArgentinaLabel: Phonogram/Philips |
| 1985 | Corazón Americano(withMilton Nascimento&León Gieco)Label: Philips |
| 1989 | Live in EuropeLabel: Tropical Music/Polygram Argentina |
| 1991 | De MíLabel: Philips |
| 2002 | Acústico En VivoLabel: Sony Music Argentina |
| 2003 | Argentina Quiere Cantar(withVíctor Heredia&León Gieco)Label: Odeon/EMI |
| 2010 | Deja La Vida Volar (En Gira)Label: RCA |
| 2014 | AngelLabel: Universal Music |
| 2024 | En vivo en el Gran Rex 2006Label: INAMU Discos |
| Mercedes Sosa en Nueva York, 1974Label: Sony Music Argentina |

### Compilation albums

| Year | Album details |
| --- | --- |
| 1975 | Disco De OroLabel: Philips |
| 1983 | RecitalLabel: Philips |
| 1988 | Amigos MíosLabel: Philips |
| 1993 | 30 AñosLabel: Polygram Argentina |
| 1995 | OroLabel: Polygram |
| 1997 | The Best Of Mercedes SosaLabel: Mercury |
| 2013 | Siempre En TiLabel: Universal Music |

Filmography
-----------

* *Güemes, la tierra en armas* (1971)
* *Argentinísima* (1972)
* *Esta es mi Argentina* (1974)
* *Mercedes Sosa, como un pájaro libre* (1983)
* *Será possible el sur: Mercedes Sosa* (1985)
* *Historias de Argentina en vivo* (2001)

Further reading
---------------

* Christensen, Anette (2019). *Mercedes Sosa - The Voice of Hope*. Denmark: Tribute2life Publishing. ISBN 978-87-998216-5-5.
* Christensen, Anette (2019). *Mercedes Sosa - More Than a Song*. Denmark: Tribute2life Publishing. ISBN 978-87-998216-7-9. (Abridged version of Mercedes Sosa - The Voice of Hope)
* Braceli, Rodolfo (2010). *Mercedes Sosa. La Negra* (in Spanish). Italy: Perrone. ISBN 978-88-6004-347-4.
* Matus, Fabián (2016). *Mercedes Sosa. La Mami* (in Spanish). Argentina: Planeta. ISBN 978-950-49-5247-3.

References
----------

1. **^** Mercedes Sosa at BrainyHistory.com
2. **^** "Singer Mercedes Sosa: The voice of the 'voiceless ones' outlasts South American dictatorships".
3. ^ ***a*** ***b*** ***c*** Heckman, Don (29 October 1995). "POP MUSIC : The Voice Heard Round the World : Mercedes Sosa, a compelling figure in world music and a social activist, will make a rare L.A. appearance". *Los Angeles Times*. Retrieved 5 December 2023.
4. ^ ***a*** ***b*** ***c*** ***d*** ***e*** ***f*** ***g*** ***h*** "Legendary folk singer Mercedes Sosa dies at 74". France 24. 4 October 2009. Retrieved 5 October 2009.
5. ^ ***a*** ***b*** ***c*** ***d*** Bernstein, Adam (5 October 2009). "Argentine folk singer who championed social justice". *Los Angeles Times*. Retrieved 8 March 2025.
6. **^** *Mercedes Sosa: The Voice of Latin America*. Dir. Rodrigo H. Villa. First Run Features, 2013. Web.
7. ^ ***a*** ***b*** ***c*** ***d*** ***e*** ***f*** ***g*** ***h*** "Mercedes Sosa: Obituary". *The Daily Telegraph*. 4 October 2009. Retrieved 5 October 2009.
8. **^** The presentation by Jorge Cafrune and the song Mercedes Sosa sang on YouTube. Retrieved 3 March 2010.
9. ^ ***a*** ***b*** ***c*** ***d*** ***e*** ***f*** ***g*** ***h*** "Latin artist Mercedes Sosa dies". BBC. 4 October 2009. Retrieved 5 October 2009.
10. **^** Karush, Matthew (2017). *Musicians in Transit: Argentina and the Globalization of Popular Music*. Duke. p. 168. ISBN 978-0-8223-7377-3.
11. ^ ***a*** ***b*** Associated Press[*dead link*]
12. ^ ***a*** ***b*** "Biografía". *Fundación Mercedes Sosa* (in Spanish). Retrieved 8 March 2025.
13. **^** Argentina, Cadena 3. "El folclore argentino llora la muerte de Daniel Toro - Notas - Viva la Radio". *Cadena 3 Argentina* (in Spanish). Retrieved 14 March 2025.`{{cite web}}`: CS1 maint: numeric names: authors list (link)
14. **^** Nilsson, Leopoldo Torre (7 April 1971), *Güemes - la tierra en armas* (Drama, History), Alfredo Alcón, Norma Aleandro, Gabriela Gili, Producciones Cinematográficas Cerrillos, retrieved 8 March 2025
15. **^** Rodrigo (10 September 2020). "Patricio Manns: Cuando me acuerdo de mi país (1983) | PERRERAC: La canción, un arma de la revolución" (in Spanish). Retrieved 14 March 2025.
16. ^ ***a*** ***b*** Lopez, Vicente F. (18 January 1983). "ARTISTAS EXILIADOS HAN REGRESADO A ARGENTINA". *El Nuevo Herald*. p. 8. Retrieved 7 March 2025.
17. **^** Drosdoff, Daniel (30 October 1983). "ARGENTINIAN VOTE TO END DICTATORSHIP PERONIST AND RADICAL IN LEAD FOR PRESIDENCY". *Miami Herald*. pp. 16A. Retrieved 7 March 2025.
18. **^** Interview with Mercedes Sosa Archived 16 October 2009 at the Wayback Machine, *Magazin Berliner Zeitung*, 25 October 2003. (in German)
19. **^** Mercedes Sosa in concert Archived 4 January 2008 at the Wayback Machine
20. **^** Meyer, Bill (7 October 2009). "A U.S. musician pays tribute to Mercedes Sosa". *People's World*. Retrieved 5 December 2023.
21. **^** "In Profile: Mercedes Sosa". *soundsandcolours.com*. 26 August 2010. Retrieved 27 March 2018.
22. **^** *Balderrama* by Mercedes Sosa on YouTube – a tribute to Che Guevara
23. **^** "Latin Grammys: Ganadores – Años Anteriores (2000)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
24. **^** "Latin Grammys: Ganadores – Años Anteriores (2003)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
25. **^** "Latin Grammys: Ganadores – Años Anteriores (2006)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
26. **^** "Latin Grammys: Ganadores – Años Anteriores (2009)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
27. **^** "Latin Grammys: Ganadores – Años Anteriores (2011)". *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
28. **^** "Premios Konex 1995: Música Popular". *Fundación Konex* (in Spanish). Retrieved 7 July 2021.
29. **^** ""En ningún momento sufrió", dijo el hijo de Mercedes Sosa" (in Spanish). October 2009. Archived from the original on 4 October 2009. Retrieved 1 October 2009.
30. ^ ***a*** ***b*** ***c*** Javier Doberti (4 October 2009). "Argentine singer Mercedes Sosa, 'voice of Latin America,' dies at 74". CNN. Retrieved 5 October 2009.
31. **^** "Argentine folk legend Mercedes Sosa dead at 74". *Bangkok Post*. 4 October 2009. Retrieved 5 October 2009.
32. ^ ***a*** ***b*** "Argentine folk icon Sosa dies at 74". Al Jazeera. 4 October 2009. Retrieved 5 October 2009.
33. **^** "Continúa la procesión en el Congreso para despedir a Mercedes Sosa".
34. ^ ***a*** ***b*** Helen Popper (4 October 2009). "Argentine singer Mercedes Sosa dies at 74". *Reuters*. Archived from the original on 11 October 2009. Retrieved 5 October 2009.
35. **^** "Celebrating Mercedes Sosa". *Doodles Archive, Google*. 31 January 2019.
36. **^** "The 200 Greatest Singers of All Time". *Rolling Stone*. 1 January 2023. Retrieved 9 March 2023.

External links
--------------

Wikiquote has quotations related to ***Mercedes Sosa***.

Wikimedia Commons has media related to Mercedes Sosa.

* Tribute to Mercedes Sosa (in Portuguese BR)
* Mercedes Sosa's website (in Spanish)
* Mercedes Sosa's News (in Spanish)
* Mercedes Sosa at IMDb
* Mercedes Sosa's Discography on Discogs.com

NewPP limit report
Parsed by mw‐api‐ext.eqiad.main‐655cc685c8‐xdg6t
Cached time: 20250603141506
Cache expiry: 2592000
Reduced expiry: false
Complications: [vary‐revision‐sha1, show‐toc]
CPU time usage: 1.359 seconds
Real time usage: 1.721 seconds
Preprocessor visited node count: 6973/1000000
Revision size: 29435/2097152 bytes
Post‐expand include size: 288001/2097152 bytes
Template argument size: 6520/2097152 bytes
Highest expansion depth: 20/100
Expensive parser function count: 8/500
Unstrip recursion depth: 1/20
Unstrip post‐expand size: 166311/5000000 bytes
Lua time usage: 0.741/10.000 seconds
Lua memory usage: 16760866/52428800 bytes
Number of Wikibase entities loaded: 1/500
Transclusion expansion time report (%,ms,calls,template)
100.00% 1305.662 1 -total
29.52% 385.448 1 Template:Reflist
20.38% 266.073 1 Template:Infobox\_person
9.77% 127.503 20 Template:Cite\_web
8.75% 114.217 5 Template:Cite\_book
8.41% 109.751 1 Template:In\_lang
7.34% 95.784 1 Template:Infobox\_musical\_artist
7.02% 91.627 2 Template:Short\_description
6.76% 88.198 6 Template:Br\_separated\_entries
6.03% 78.712 1 Template:Mercedes\_Sosa
Saved in parser cache with key enwiki:pcache:476992:|#|:idhash:canonical and timestamp 20250603141506 and revision id 1293758111. Rendering was triggered because: api-parse

In [46]:
import re
import requests
from bs4 import BeautifulSoup, Tag
from markdownify import markdownify as md

def get_wikipedia_markdown(
    title: str,
    lang: str = 'en',
    ignore_references: bool = True,
    ignore_links: bool = False
) -> str:
    """
    Fetches the main content of a Wikipedia page and returns it as Markdown,
    excluding infoboxes, sidebars, images, and optionally the References,
    Further reading, and External links sections.

    Args:
        title (str): Wikipedia page title (e.g., "Python_(programming_language)").
        lang (str): Language code (default 'en' for English).
        ignore_references (bool): If True, drop "References", "Further reading", and
                                  "External links" sections entirely.
        ignore_links (bool): If True, strip out all <a> tags (leaving plain text).

    Returns:
        str: Markdown-formatted content of the main article body.
    """
    # 1. Fetch raw HTML
    url = f"https://{lang}.wikipedia.org/wiki/{title}"
    print(f"DEBUG: Fetching URL: {url}")
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    html = response.text
    print(f"DEBUG: Fetched HTML, length: {len(html)}")

    # Save the fetched HTML to a file for inspection
    with open(f"fetched_wikipedia_page_{title}.html", "w", encoding="utf-8") as f:
        f.write(html)
    print(f"DEBUG: Saved fetched HTML to fetched_wikipedia_page_{title}.html")


    # DEBUG: Check raw HTML for a known headline span
    known_headline_html = '<span class="mw-headline" id="References">'
    if known_headline_html in html:
        print(f"DEBUG: Raw HTML CONTAINS the string for '{known_headline_html}'.")
    else:
        print(f"DEBUG: Raw HTML DOES NOT CONTAIN the string for '{known_headline_html}'.")

    # 2. Parse and get content_div
    print("DEBUG: Parsing HTML with BeautifulSoup (lxml)...")
    soup = BeautifulSoup(html, "lxml")
    content_div = soup.find("div", class_="mw-parser-output")
    
    if content_div is None:
        print("DEBUG: 'mw-parser-output' div NOT FOUND in parsed soup.")
        raise ValueError(f"Could not find main content for page '{title}'")
    else:
        print(f"DEBUG: 'mw-parser-output' div FOUND. Object type: {type(content_div)}")

        # DEBUG: Intensive check for 'mw-headline' spans within the initial content_div
        print("\nDEBUG: --- Intensive check on initial content_div ---")
        
        # Method 1: Direct find_all for the specific class
        initial_headline_spans = content_div.find_all("span", class_="mw-headline")
        print(f"DEBUG: Method 1: content_div.find_all('span', class_='mw-headline') found {len(initial_headline_spans)} spans.")
        if initial_headline_spans:
            print(f"DEBUG: First few found by Method 1 (IDs): {[s.get('id') for s in initial_headline_spans[:5]]}")
        
        # Method 2: Find by a known ID, then check class
        references_span_by_id = content_div.find("span", id="References")
        if references_span_by_id:
            print(f"DEBUG: Method 2: Found span with id='References'. Its classes: {references_span_by_id.get('class')}")
            if "mw-headline" in references_span_by_id.get("class", []):
                print("DEBUG: Method 2: Span with id='References' HAS 'mw-headline' class.")
            else:
                print("DEBUG: Method 2: Span with id='References' DOES NOT HAVE 'mw-headline' class.")
        else:
            print("DEBUG: Method 2: Could not find span with id='References'.")

        biography_span_by_id = content_div.find("span", id="Biography")
        if biography_span_by_id:
            print(f"DEBUG: Method 2: Found span with id='Biography'. Its classes: {biography_span_by_id.get('class')}")
            if "mw-headline" in biography_span_by_id.get("class", []):
                print("DEBUG: Method 2: Span with id='Biography' HAS 'mw-headline' class.")
            else:
                print("DEBUG: Method 2: Span with id='Biography' DOES NOT HAVE 'mw-headline' class.")
        else:
            print("DEBUG: Method 2: Could not find span with id='Biography'.")

        # Method 3: Iterate through H2/H3 tags and check their children
        print("DEBUG: Method 3: Checking H-tags (h2-h6) for 'mw-headline' spans...")
        count_method_3 = 0
        found_ids_method_3 = []
        for h_tag in content_div.find_all(["h2", "h3", "h4", "h5", "h6"]):
            span = h_tag.find("span", class_="mw-headline")
            if span:
                count_method_3 += 1
                found_ids_method_3.append(span.get("id", "N/A"))
        print(f"DEBUG: Method 3: Found {count_method_3} 'mw-headline' spans within H-tags. IDs (first 5): {found_ids_method_3[:5]}")

        if not initial_headline_spans and not (references_span_by_id and "mw-headline" in references_span_by_id.get("class",[])) and count_method_3 == 0:
            print("DEBUG: ALL CHECKS FAILED to find 'mw-headline' spans in initial content_div.")
            # Consider printing a larger portion of content_div if issues persist
            # print("DEBUG: Printing a larger portion of initial content_div (first 10000 chars) for manual inspection:")
            # print(str(content_div.prettify())[:10000]) 
        print("DEBUG: --- End of intensive check on initial content_div ---\n")
    


    # 2a. Remove "[edit]" links
    for edit_span in content_div.find_all("span", class_="mw-editsection"):
        edit_span.decompose()

    # 2b. Remove superscripted footnote markers
    for sup in content_div.find_all("sup", class_="reference"):
        sup.decompose()

    # 3. Remove unwanted elements
    #    a) Infoboxes
    for infobox in content_div.find_all("table", class_=re.compile(r"infobox")):
        infobox.decompose()

    #    b) Table of Contents
    toc_element = content_div.find("div", id="toc") # Renamed to avoid conflict with variable 'toc' in your original code
    if toc_element:
        toc_element.decompose()

    #    c) Navigation boxes and templates
    for nav in content_div.find_all(["div", "table"], class_=re.compile(r"navbox|vertical-navbox|metadata")):
        nav.decompose()

    #    d) Thumbnails / image wrappers
    for thumb in content_div.find_all("div", class_=re.compile(r"thumb")):
        thumb.decompose()

    #    e) Remove raw <img> tags
    for img_tag in content_div.find_all("img"): # Renamed to avoid conflict
        img_tag.decompose()

    # 4. Convert every remaining <table> into Markdown, in place
    def table_to_markdown(table_tag: Tag) -> str:
        """
        Converts a <table> tag to a Markdown-formatted table, preserving any <th> headers.
        """
        headers = []
        header_row = table_tag.find("tr")
        if header_row:
            for th in header_row.find_all("th"):
                headers.append(th.get_text(strip=True))
        md_table = ""
        if headers:
            md_table += "| " + " | ".join(headers) + " |\n"
            md_table += "| " + " | ".join("---" for _ in headers) + " |\n"
        # Data rows (skip header row)
        for row in table_tag.find_all("tr")[1:]:
            cells = row.find_all(["td", "th"])
            if not cells:
                continue
            row_texts = [cell.get_text(strip=True) for cell in cells]
            md_table += "| " + " | ".join(row_texts) + " |\n"
        return md_table.rstrip()

    for table in content_div.find_all("table"):
        # Skip any tables already removed or used for navigation
        if "infobox" in table.get("class", []) or table.get("role") == "navigation":
            continue
        markdown_table = table_to_markdown(table)  # :contentReference[oaicite:15]{index=15}
        new_node = soup.new_string("\n\n" + markdown_table + "\n\n")
        table.replace_with(new_node)

    # 5. Remove "References", "Further reading", and "External links" sections if requested
    if ignore_references:
        print("\nDEBUG: --- ignore_references is True ---") # DEBUG
        target_sections = {"references", "further reading", "external links"}
        print(f"DEBUG: Target section names: {target_sections}") # DEBUG
        
        all_headline_spans = list(content_div.find_all("span", class_="mw-headline"))
        print(f"DEBUG: Found {len(all_headline_spans)} 'mw-headline' spans in content_div.") # DEBUG
        
        for i, span in enumerate(all_headline_spans):
            if not span.parent:
                # This span's parent was already removed from the tree.
                # print(f"DEBUG: Span {i} ('{span.get_text(strip=True)[:30]}...') has no parent, skipping.") # DEBUG
                continue

            heading_text = span.get_text(strip=True).lower()
            # Optionally, print all heading texts found to check for subtle differences:
            # print(f"DEBUG: Checking span {i}: Extracted text: '{heading_text}' (Original ID: {span.get('id')})") # DEBUG

            if heading_text in target_sections:
                print(f"DEBUG: Matched target section: '{heading_text}' from span {i} (ID: {span.get('id')}).") # DEBUG
                section_heading_tag = span.find_parent(re.compile(r"^h[1-6]$"))

                if not section_heading_tag:
                    print(f"DEBUG: No H-tag ancestor found for '{heading_text}'. Span HTML: {str(span.parent)[:100]}") # DEBUG
                    continue
                
                if not section_heading_tag.parent:
                    print(f"DEBUG: H-tag <{section_heading_tag.name}> for '{heading_text}' (ID: {section_heading_tag.get('id')}) was already decomposed. Skipping.") # DEBUG
                    continue
                
                print(f"DEBUG: Found H-tag: <{section_heading_tag.name} id='{section_heading_tag.get('id')}'> for '{heading_text}'.") # DEBUG

                try:
                    current_section_level = int(section_heading_tag.name[1:])
                except (ValueError, IndexError):
                    print(f"DEBUG: Could not parse level from H-tag name '{section_heading_tag.name}'. Skipping.") # DEBUG
                    continue

                nodes_to_remove = []
                for sibling_node in section_heading_tag.find_next_siblings():
                    if sibling_node.name and sibling_node.name.startswith('h'):
                        try:
                            sibling_level = int(sibling_node.name[1:])
                            if sibling_level <= current_section_level:
                                print(f"DEBUG: Stopping content removal for '{heading_text}' at sibling <{sibling_node.name} id='{sibling_node.get('id')}'>.") # DEBUG
                                break 
                        except (ValueError, IndexError):
                            pass 
                    nodes_to_remove.append(sibling_node)
                
                print(f"DEBUG: For '{heading_text}', planning to remove {len(nodes_to_remove)} content nodes.") # DEBUG
                for node_idx, node in enumerate(nodes_to_remove):
                    if node.parent:
                        # print(f"DEBUG: Decomposing content node {node_idx} for '{heading_text}': <{node.name}>") # DEBUG
                        node.decompose()
                
                if section_heading_tag.parent:
                    print(f"DEBUG: Decomposing H-tag <{section_heading_tag.name} id='{section_heading_tag.get('id')}'> for '{heading_text}'.") # DEBUG
                    section_heading_tag.decompose()
            elif any(ts_part in heading_text for ts_part in ["reference", "external", "further", "see also", "notes"]): # DEBUG for near misses
                print(f"DEBUG: Near miss? Span text: '{heading_text}' (ID: {span.get('id')})") # DEBUG


    # 6. Convert remaining HTML in content_div to Markdown
    markdown_options = {}
    if ignore_links:
        markdown_options["strip"] = ["a"]  # strip all <a> tags :contentReference[oaicite:18]{index=18}
    raw_html = "".join(str(child) for child in content_div.children)
    markdown_text = md(raw_html, **markdown_options)  # :contentReference[oaicite:19]{index=19}

    # 7. Collapse runs of 3+ newlines into exactly two
    markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text).strip()

    return markdown_text


In [47]:
md_page = get_wikipedia_markdown(
    'Mercedes_Sosa',
    lang='en',
    ignore_references= True,
    ignore_links= True)

DEBUG: Fetching URL: https://en.wikipedia.org/wiki/Mercedes_Sosa
DEBUG: Fetched HTML, length: 369275
DEBUG: Saved fetched HTML to fetched_wikipedia_page_Mercedes_Sosa.html
DEBUG: Raw HTML DOES NOT CONTAIN the string for '<span class="mw-headline" id="References">'.
DEBUG: Parsing HTML with BeautifulSoup (lxml)...
DEBUG: 'mw-parser-output' div FOUND. Object type: <class 'bs4.element.Tag'>

DEBUG: --- Intensive check on initial content_div ---
DEBUG: Method 1: content_div.find_all('span', class_='mw-headline') found 0 spans.
DEBUG: Method 2: Could not find span with id='References'.
DEBUG: Method 2: Could not find span with id='Biography'.
DEBUG: Method 3: Checking H-tags (h2-h6) for 'mw-headline' spans...
DEBUG: Method 3: Found 0 'mw-headline' spans within H-tags. IDs (first 5): []
DEBUG: ALL CHECKS FAILED to find 'mw-headline' spans in initial content_div.
DEBUG: --- End of intensive check on initial content_div ---


DEBUG: --- ignore_references is True ---
DEBUG: Target section names

In [51]:
import re
import requests
from bs4 import BeautifulSoup, Tag
from markdownify import markdownify as md

def get_wikipedia_markdown(
    title: str,
    lang: str = 'en',
    ignore_references: bool = True,
    ignore_links: bool = False
) -> str:
    """
    Fetches the main content of a Wikipedia page and returns it as Markdown,
    excluding infoboxes, navigation templates, images, and—if requested—the
    References, Further reading, and External links sections.

    Args:
        title (str): Wikipedia page title (e.g., "Python_(programming_language)").
        lang (str): Language code (default 'en').
        ignore_references (bool): If True, drop "References", "Further reading",
                                  and "External links" sections entirely.
        ignore_links (bool): If True, strip out all <a> tags entirely.

    Returns:
        str: Markdown-formatted content of the main article body.
    """
    # 1. Fetch raw HTML
    url = f"https://{lang}.wikipedia.org/wiki/{title}"
    response = requests.get(url)
    response.raise_for_status()                                                       # 
    html = response.text

    # 2. Parse with BeautifulSoup and isolate the article’s main <div>
    soup = BeautifulSoup(html, "lxml")
    content_div = soup.find("div", class_="mw-parser-output")                         # 
    if content_div is None:
        raise ValueError(f"Could not find main content for page '{title}'")

    # 2a. Remove all “[edit]” links (<span class="mw-editsection">)
    for edit_span in content_div.find_all("span", class_="mw-editsection"):
        edit_span.decompose()                                                          # 

    # 2b. Remove any superscripted footnote markers (<sup class="reference">)
    for sup in content_div.find_all("sup", class_="reference"):
        sup.decompose()                                                                # 

    # 3. Remove unwanted “boilerplate” elements:
    #    a) Infoboxes (sidebars)
    for infobox in content_div.find_all("table", class_=re.compile(r"infobox")):
        infobox.decompose()                                                            # 

    #    b) Table of Contents
    toc = content_div.find("div", id="toc")
    if toc:
        toc.decompose()                                                                # 

    #    c) Navigation templates (navbox/vertical-navbox/metadata)
    for nav in content_div.find_all(
        ["div", "table"],
        class_=re.compile(r"navbox|vertical-navbox|metadata")
    ):
        nav.decompose()                                                                # 

    #    d) Thumbnails / image wrappers
    for thumb in content_div.find_all("div", class_=re.compile(r"thumb")):
        thumb.decompose()                                                              # 

    #    e) Raw <img> tags
    for img in content_div.find_all("img"):
        img.decompose()                                                                # 

    # 4. Convert any remaining <table> into a Markdown table **in-place**
    def table_to_markdown(table_tag: Tag) -> str:
        """
        Converts a <table> into a Markdown-formatted table, preserving <th> headers.
        """
        headers = []
        header_row = table_tag.find("tr")
        if header_row:
            for th in header_row.find_all("th"):
                headers.append(th.get_text(strip=True))
        md_table = ""
        if headers:
            md_table += "| " + " | ".join(headers) + " |\n"
            md_table += "| " + " | ".join("---" for _ in headers) + " |\n"
        # Now process data rows (skip first <tr> if it was headers)
        for row in table_tag.find_all("tr")[1:]:
            cells = row.find_all(["td", "th"])
            if not cells:
                continue
            row_texts = [cell.get_text(strip=True) for cell in cells]
            md_table += "| " + " | ".join(row_texts) + " |\n"
        return md_table.rstrip()

    for table in content_div.find_all("table"):
        # Skip infobox/navigation tables (already removed above)
        if "infobox" in table.get("class", []) or table.get("role") == "navigation":
            continue
        markdown_table = table_to_markdown(table)                                      # 
        new_node = soup.new_string("\n\n" + markdown_table + "\n\n")
        table.replace_with(new_node)

    # 5. Remove “References”, “Further reading” & “External links” sections if requested
    if ignore_references:
        # We look for the wrapper <div class="mw-heading mw-heading2"> or mw-heading3
        # that **contains** an <h2 id="References"> or <h2 id="Further_reading">, etc.
        section_ids = {"references", "further_reading", "external_links"}

        # Find all wrapper divs whose class is “mw-heading mw-heading2” or “mw-heading mw-heading3”
        # Inside each wrapper, there’s typically an <h2 id="…"> or <h3 id="…">.
        for wrapper in content_div.find_all("div", class_=re.compile(r"mw-heading mw-heading[23]")):
            heading_tag = wrapper.find(re.compile(r"^h[2-3]$"))
            if heading_tag and heading_tag.get("id", "").strip().lower() in section_ids:
                # Collect all siblings up to the next wrapper of the same form
                siblings_to_remove = []
                for sib in wrapper.find_next_siblings():
                    # Stop once we hit the next section wrapper (another mw-heading mw-heading2/3)
                    if (
                        sib.name == "div"
                        and "mw-heading" in (sib.get("class") or [])
                        and re.match(r"mw-heading mw-heading[23]", " ".join(sib.get("class")))
                    ):
                        break
                    siblings_to_remove.append(sib)

                # Delete those siblings first (everything under “Further reading” or “References”)
                for node in siblings_to_remove:
                    node.decompose()                                                      # 

                # Finally delete the heading wrapper itself
                wrapper.decompose()                                                        # 

    # 6. Convert the cleaned HTML into Markdown
    markdown_options = {}
    if ignore_links:
        markdown_options["strip"] = ["a"]  # strip all <a> tags (keep only their text) 

    raw_html = "".join(str(child) for child in content_div.children)
    markdown_text = md(raw_html, **markdown_options)                                 # 

    # 7. Collapse 3+ blank lines into exactly two
    markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text).strip()

    return markdown_text


In [52]:
from IPython.display import display, Markdown

md_page = get_wikipedia_markdown(
    'Mercedes_Sosa',
    lang='en',
    ignore_references= True,
    ignore_links= True)

display(Markdown(md_page)) 

Argentine singer (1935–2009)

Not to be confused with Mercedes Sola.

**Haydée Mercedes** "**La Negra**" **Sosa** (Latin American Spanish: [meɾˈseðes ˈsosa]; 9 July 1935 – 4 October 2009) was an Argentine singer who was popular throughout Latin America and many countries outside the region. With her roots in Argentine folk music, Sosa became one of the preeminent exponents of *El nuevo cancionero*. She gave voice to songs written by many Latin American songwriters. Her music made people hail her as the "voice of the voiceless ones". She was often called "the conscience of Latin America".

Sosa performed in venues such as the Lincoln Center in New York City, the Théâtre Mogador in Paris, the Sistine Chapel in Vatican City, as well as sold-out shows in New York's Carnegie Hall and the Roman Colosseum during her final decade of life. Her career spanned four decades and she was the recipient of six Latin Grammy awards (2000, 2003, 2004, 2006, 2009, 2011), including a Latin Grammy Lifetime Achievement Award in 2004 and two posthumous Latin Grammy Award for Best Folk Album in 2009 and 2011. She won the Premio Gardel in 2000, the main musical award in Argentina. She served as an ambassador for UNICEF.

Life
----

Sosa was born on 9 July 1935, in San Miguel de Tucumán, in the northwestern Argentine province of Tucumán, of mestizo ancestry. She was of French, Spanish and Diaguita descent. Her nickname "la negra", which is a common nickname in Argentina for people with darker complexion, is a reference to her indigenous heritage. Her parents, a day laborer and a washerwoman, were Peronists, although they never registered in the party, and she started her career as a singer for the Peronist Party in Tucuman under the name Gladys Osorio. In 1950, at age fifteen, she won a singing competition organized by a local radio station and was given a contract to perform for two months. She recorded her first album, *La Voz de la Zafra*, in 1959. A performance at the 1965 Cosquín National Folklore Festival—where she was introduced and brought to the stage while sitting in the audience by fellow folk singer Jorge Cafrune— brought her to the attention of the Argentine public. Sosa and her first husband, Manuel Oscar Matus, with whom she had one son, were key players in the mid-60s *nueva canción* movement (which was called *nuevo cancionero* in Argentina). Her second record was *Canciones con Fundamento*, a collection of Argentine folk songs.

Sosa with Félix Luna and Ariel Ramírez (at the piano)

Sosa "spent the late 1960s building her audience in Europe and among the cosmopolitan middle class in Buenos Aires, becoming in the process a much bigger star" than her contemporaries. In 1967, Sosa toured the United States and Europe with great success.[*citation needed*] In later years, she performed and recorded extensively, broadening her repertoire to include material from throughout Latin America.

In the early 1970s, Sosa released two concept albums in collaboration with composer Ariel Ramírez and lyricist Félix Luna: *Cantata Sudamericana* and *Mujeres Argentinas* (Argentine Women). She also recorded a tribute to Chilean musician Violeta Parra in 1971, including what was to become one of Sosa's signature songs, *Gracias a la vida*. She further popularized of songs written by Milton Nascimento of Brazil and Pablo Milanés and Silvio Rodríguez both from Cuba. Throughout the decade, she released albums such as *Hasta la Victoria* in 1972 and *Traigo un Pueblo* *en mi Voz* in 1973. They featured songs like "Cuando tenga la tierra", written by Ariel Petrocelli and Daniel Toro, which tackles political and social issues like wealth and land inequality. During the 1970s she was a part of two films by the director Leopoldo Torre Nilsson: *El Santo de la Espada* in 1970 and *Güemes, la tierra en armas* in 1971, in which she portrayed Juana Azurduy de Padilla, the guerrilla military leader who fought for Argentine independence.

Sosa in 1972

After the military junta of Jorge Videla came to power in 1976, the atmosphere in Argentina grew increasingly oppressive. Sosa faced death threats against both her and her family, but refused for many years to leave the country. At a concert in La Plata in 1979, Sosa was searched and arrested on stage, along with all those attending the concert. Their release came about through international intervention. Despite attempts to hold more concerts, she was officially barred from performing by the military regime. Banned in her own country, she moved to Paris and then to Madrid. She has spoken publicly about her artistic and emotional struggles during this period of her life. While in exile, she released the album *A Quien Doy* in 1981. The album included a recording of the song "Cuando Me Acuerdo de Mi Pais" which was originally written by the prolific Chilean singer/songwriter, Patricio Manns. The song, which he wrote while also in political exile, expresses the sorrow he felt from being separated from his homeland. She related to this feeling and struggled to continue recording and performing. In an interview with the New York Times, she said, “It was a mental problem, a problem of morale...It wasn’t my throat, or anything physical".

Sosa returned to Argentina from her exile in Europe in February 1982, several months before the military regime collapsed as a result of the Falklands War, and gave a series of concerts at the *Teatro Ópera* in Buenos Aires, where she invited many of her younger colleagues to share the stage. A double album of recordings from these performances became an instant best seller. She then traveled to perform in her home province of Tucuman. However, these performances were largely ignored by mainstream media in the country. In subsequent years, Sosa continued to tour both in Argentina and abroad, performing in such venues as the Lincoln Center in New York City and the *Théâtre Mogador* in Paris. In poor health for much of the 1990s, she performed a comeback show in Argentina in 1998. In 1994, she played in the Sistine Chapel in Vatican City. In 2002, she sold out both Carnegie Hall in New York and the Colosseum in Rome in the same year.

Sosa in 1973

A supporter of Perón, she favored leftist causes throughout her life. She supported President Raul Alfonsin in the election of 1983 which marked the return of democracy in Argentina following the dictatorship. She referred to this election as "Argentina's Spring" She opposed President Carlos Menem, who was in office from 1989 to 1999, and supported the election of Néstor Kirchner, who became president in 2003.
Sosa was a UNESCO Goodwill Ambassador for Latin America and the Caribbean.

Sosa disliked being identified as a protest singer. While she was outright in her political stances, Sosa said the following on the position of the artist:

> “An artist isn’t political in the party political sense – they have a constituency, which is their public – it is the poetry that matters most of all.”

In a career spanning four decades, she worked with performers across several genres and generations, folk, opera, pop, rock, including Martha Argerich, Andrea Bocelli, David Broza, Franco Battiato, Jaime Roos, Joan Baez, Francis Cabrel, Gal Costa, Luz Casal, Lila Downs, Lucio Dalla, Maria Farantouri, Lucecita Benitez, Nilda Fernández, Charly Garcia, León Gieco, Gian Marco, Nana Mouskouri, Pablo Milanés, Holly Near, Milton Nascimento, Pata Negra, Fito Páez, Franco De Vita, Lourdes Pérez, Luciano Pavarotti, Silvio Rodríguez, Ismael Serrano, Shakira, Sting, Caetano Veloso, Julieta Venegas, Gustavo Cerati and Konstantin Wecker

Sosa participated in a 1999 production of Ariel Ramírez's *Misa Criolla*. Her song *Balderrama* is featured in the 2008 movie *Che*, starring Benicio del Toro as the Argentine Marxist revolutionary Che Guevara.

Sosa was the co-chair of the Earth Charter International Commission.

Awards
------

Sosa won the Latin Grammy Award for Best Folk Album in 2000 (*Misa Criolla*), 2003 (*Acústico*), 2006 (*Corazón Libre*), 2009 (*Cantora 1*, which also won Best Recording Package and was nominated for Album of the Year), and 2011 (*Deja La Vida Volar*), as well as several international awards.

In 1995, Konex Foundation from Argentina granted her the Diamond Konex Award, one of the most prestigious awards in Argentina, as the most important personality in the popular music of her country in the last decade.

Death
-----

Mercedes Sosa lying in repose, with her family and President Cristina Fernández de Kirchner viewing

Suffering from recurrent endocrine and respiratory problems in later years, the 74-year-old Sosa was hospitalized in Buenos Aires on 18 September 2009. She died from multiple organ failure on 4 October 2009, at 5:15 am. She is survived by one son, Fabián Matus, born of her first marriage. He said: "She lived her 74 years to the fullest. She had done practically everything she wanted, she didn't have any type of barrier or any type of fear that limited her". The hospital expressed its sympathies to her relatives. Her website featured the following: "Her undisputed talent, her honesty and her profound convictions leave a great legacy to future generations".

Her body was placed on display at the National Congress building in Buenos Aires for the public to pay their respects, and President Fernández de Kirchner ordered three days of national mourning. Thousands had queued by the end of the day.

Sosa's obituary in *The Daily Telegraph* said she was "an unrivalled interpreter of works by her compatriot, the Argentine Atahualpa Yupanqui, and Chile's Violeta Parra". Helen Popper of Reuters reported her death by saying she "fought South America's dictators with her voice and became a giant of contemporary Latin American music". Sosa received three Latin Grammy nominations for her album, in 2009 . She went on to win Best Folk Album about a month after her death.

Tributes
--------

In 2019, Sosa was celebrated by a Google Doodle. The doodle was showcased in Argentina, Chile, Uruguay, Paraguay, Bolivia, Peru, Ecuador, Cuba, Iceland, Sweden, Serbia, Greece, Israel and Vietnam.

In 2023, *Rolling Stone* ranked Sosa at number 160 on its list of the 200 Greatest Singers of All Time.

Discography
-----------

Sosa in 2005, with Argentina's then-First Lady (later president from 2007 to 2015), Cristina Fernández de Kirchner

Sosa recorded forty albums.

### Studio albums

| Year | Album details |
| --- | --- |
| 1962 | La Voz De La ZafraLabel: RCA |
| 1965 | Canciones Con FundamentoLabel: El Grillo |
| 1966 | HermanoLabel: Philips |
| 1966 | Yo No Canto Por CantarLabel: Philips |
| 1967 | Para Cantarle A Mi GenteLabel: Philips |
| 1968 | Con Sabor A Mercedes SosaLabel: Philips |
| 1969 | Mujeres ArgentinasLabel: Philips |
| 1970 | El Grito De La TierraLabel: Philips |
| 1970 | Navidad Con Mercedes SosaLabel: Philips |
| 1971 | Homenaje a Violeta ParraLabel: Philips |
| 1972 | Hasta La VictoriaLabel: Philips |
| 1972 | Cantata SudamericanaLabel: Philips |
| 1973 | Traigo Un Pueblo En Mi VozLabel: Philips |
| 1975 | A Que Florezca Mi PuebloLabel: Philips |
| 1976 | En Dirección Del VientoLabel: Philips |
| 1977 | Mercedes Sosa Interpreta A Atahualpa YupanquiLabel: Philips |
| 1979 | Serenata Para La Tierra De UnoLabel: Philips |
| 1981 | A Quien Doy / Cuando Me Acuerdo de Mi PaísLabel: Philips |
| 1982 | Como Un Pájaro LibreLabel: Philips |
| 1983 | Mercedes SosaLabel: Philips |
| 1984 | ¿Será Posible El Sur?Label: Philips |
| 1985 | Vengo A Ofrecer Mi CorazónLabel: Philips |
| 1986 | Mercedes Sosa '86Label: Philips |
| 1987 | Mercedes Sosa '87Label: Philips |
| 1993 | SinoLabel: Philips/Polygram |
| 1994 | Gestos De AmorLabel: Polydor |
| 1996 | Escondido En Mi PaísLabel: Polydor |
| 1997 | Alta Fidelidad(w/Charly García)Label: Mercury |
| 1998 | Al DespertarLabel: Mercury |
| 1999 | Misa CriollaLabel: Mercury |
| 2005 | Corazón LibreLabel: Edge |
| 2009 | Cantora 1(w/various artists)Label: RCA |
| 2009 | Cantora 2(w/various artists)Label: RCA |
| 2011 | CensuradaLabel: Philips |
| 2015 | LuceritoLabel: RCA |

### EPs

| Year | EP details |
| --- | --- |
| 1975 | Niño De MañanaLabel: Philips |

### Live albums

| Year | Album details |
| --- | --- |
| 1973 | Si Se Calla El Cantor(with Gloria Martin)Label: Philips |
| 1980 | Gravado Ao Vivo No BrasilLabel: Philips |
| 1982 | Mercedes Sosa en ArgentinaLabel: Phonogram/Philips |
| 1985 | Corazón Americano(withMilton Nascimento&León Gieco)Label: Philips |
| 1989 | Live in EuropeLabel: Tropical Music/Polygram Argentina |
| 1991 | De MíLabel: Philips |
| 2002 | Acústico En VivoLabel: Sony Music Argentina |
| 2003 | Argentina Quiere Cantar(withVíctor Heredia&León Gieco)Label: Odeon/EMI |
| 2010 | Deja La Vida Volar (En Gira)Label: RCA |
| 2014 | AngelLabel: Universal Music |
| 2024 | En vivo en el Gran Rex 2006Label: INAMU Discos |
| Mercedes Sosa en Nueva York, 1974Label: Sony Music Argentina |

### Compilation albums

| Year | Album details |
| --- | --- |
| 1975 | Disco De OroLabel: Philips |
| 1983 | RecitalLabel: Philips |
| 1988 | Amigos MíosLabel: Philips |
| 1993 | 30 AñosLabel: Polygram Argentina |
| 1995 | OroLabel: Polygram |
| 1997 | The Best Of Mercedes SosaLabel: Mercury |
| 2013 | Siempre En TiLabel: Universal Music |

Filmography
-----------

* *Güemes, la tierra en armas* (1971)
* *Argentinísima* (1972)
* *Esta es mi Argentina* (1974)
* *Mercedes Sosa, como un pájaro libre* (1983)
* *Será possible el sur: Mercedes Sosa* (1985)
* *Historias de Argentina en vivo* (2001)

NewPP limit report
Parsed by mw‐api‐ext.eqiad.main‐655cc685c8‐xdg6t
Cached time: 20250603141506
Cache expiry: 2592000
Reduced expiry: false
Complications: [vary‐revision‐sha1, show‐toc]
CPU time usage: 1.359 seconds
Real time usage: 1.721 seconds
Preprocessor visited node count: 6973/1000000
Revision size: 29435/2097152 bytes
Post‐expand include size: 288001/2097152 bytes
Template argument size: 6520/2097152 bytes
Highest expansion depth: 20/100
Expensive parser function count: 8/500
Unstrip recursion depth: 1/20
Unstrip post‐expand size: 166311/5000000 bytes
Lua time usage: 0.741/10.000 seconds
Lua memory usage: 16760866/52428800 bytes
Number of Wikibase entities loaded: 1/500
Transclusion expansion time report (%,ms,calls,template)
100.00% 1305.662 1 -total
29.52% 385.448 1 Template:Reflist
20.38% 266.073 1 Template:Infobox\_person
9.77% 127.503 20 Template:Cite\_web
8.75% 114.217 5 Template:Cite\_book
8.41% 109.751 1 Template:In\_lang
7.34% 95.784 1 Template:Infobox\_musical\_artist
7.02% 91.627 2 Template:Short\_description
6.76% 88.198 6 Template:Br\_separated\_entries
6.03% 78.712 1 Template:Mercedes\_Sosa
Saved in parser cache with key enwiki:pcache:476992:|#|:idhash:canonical and timestamp 20250603141506 and revision id 1293758111. Rendering was triggered because: api-parse

In [53]:
import re
import requests
from bs4 import BeautifulSoup, Tag
from markdownify import markdownify as md

def get_wikipedia_markdown(
    title: str,
    lang: str = 'en',
    ignore_references: bool = True,
    ignore_links: bool = False
) -> str:
    """
    Fetches the main content of a Wikipedia page and returns it as Markdown,
    excluding infoboxes, navigation templates, images, and—if requested—the
    References, Further reading, and External links sections. Additionally,
    removes any parser‐debug footer (e.g., “NewPP limit report …”) that may
    appear at the end.

    Args:
        title (str): Wikipedia page title (e.g., "Mercedes_Sosa").
        lang (str): Language code (default 'en').
        ignore_references (bool): If True, drop "References", "Further reading",
                                  and "External links" sections entirely.
        ignore_links (bool): If True, strip out all <a> tags entirely.

    Returns:
        str: Markdown-formatted content of the main article body.
    """
    # 1. Fetch raw HTML
    url = f"https://{lang}.wikipedia.org/wiki/{title}"
    response = requests.get(url)
    response.raise_for_status()                                                        # 
    html = response.text

    # 2. Parse with BeautifulSoup and isolate the article’s main <div>
    soup = BeautifulSoup(html, "lxml")
    content_div = soup.find("div", class_="mw-parser-output")                          # 
    if content_div is None:
        raise ValueError(f"Could not find main content for page '{title}'")

    # 2a. Remove all “[edit]” links (<span class="mw-editsection">…)
    for edit_span in content_div.find_all("span", class_="mw-editsection"):
        edit_span.decompose()                                                           # 

    # 2b. Remove any superscript footnote markers (<sup class="reference">…)
    for sup in content_div.find_all("sup", class_="reference"):
        sup.decompose()                                                                 # 

    # 2c. Remove any parser‐debug footer text (e.g., “NewPP limit report …”)
    #     This often appears as a single NavigableString inside content_div at the end.
    for debug_text in content_div.find_all(text=lambda t: t and "NewPP limit report" in t):
        debug_text.extract()                                                            # 

    # 3. Remove unwanted “boilerplate” elements:
    #    a) Infoboxes (sidebars)
    for infobox in content_div.find_all("table", class_=re.compile(r"infobox")):
        infobox.decompose()                                                             # 

    #    b) Table of Contents
    toc = content_div.find("div", id="toc")
    if toc:
        toc.decompose()                                                                 # 

    #    c) Navigation templates (navbox/vertical-navbox/metadata)
    for nav in content_div.find_all(
        ["div", "table"],
        class_=re.compile(r"navbox|vertical-navbox|metadata")
    ):
        nav.decompose()                                                                 # 

    #    d) Thumbnails / image wrappers
    for thumb in content_div.find_all("div", class_=re.compile(r"thumb")):
        thumb.decompose()                                                               # 

    #    e) Raw <img> tags
    for img in content_div.find_all("img"):
        img.decompose()                                                                 # 

    # 4. Convert any remaining <table> into a Markdown table **in-place**
    def table_to_markdown(table_tag: Tag) -> str:
        """
        Converts a <table> into a Markdown-formatted table, preserving <th> headers.
        """
        headers = []
        header_row = table_tag.find("tr")
        if header_row:
            for th in header_row.find_all("th"):
                headers.append(th.get_text(strip=True))
        md_table = ""
        if headers:
            md_table += "| " + " | ".join(headers) + " |\n"
            md_table += "| " + " | ".join("---" for _ in headers) + " |\n"
        # Now process data rows (skip the first <tr> if it was header row)
        for row in table_tag.find_all("tr")[1:]:
            cells = row.find_all(["td", "th"])
            if not cells:
                continue
            row_texts = [cell.get_text(strip=True) for cell in cells]
            md_table += "| " + " | ".join(row_texts) + " |\n"
        return md_table.rstrip()

    for table in content_div.find_all("table"):
        # Skip infobox/navigation tables (already removed above)
        if "infobox" in table.get("class", []) or table.get("role") == "navigation":
            continue
        markdown_table = table_to_markdown(table)                                       # 
        new_node = soup.new_string("\n\n" + markdown_table + "\n\n")
        table.replace_with(new_node)

    # 5. Remove “References”, “Further reading” & “External links” sections if requested
    if ignore_references:
        section_ids = {"references", "further_reading", "external_links"}
        # We look for wrapper <div class="mw-heading mw-heading2"> or mw-heading3
        for wrapper in content_div.find_all("div", class_=re.compile(r"mw-heading mw-heading[23]")):
            heading_tag = wrapper.find(re.compile(r"^h[2-3]$"))
            if heading_tag and heading_tag.get("id", "").strip().lower() in section_ids:
                # Collect every sibling until the next wrapper of the same form
                siblings_to_remove = []
                for sib in wrapper.find_next_siblings():
                    if (
                        sib.name == "div"
                        and "mw-heading" in (sib.get("class") or [])
                        and re.match(r"mw-heading mw-heading[23]", " ".join(sib.get("class") or []))
                    ):
                        break
                    siblings_to_remove.append(sib)
                # First delete those siblings
                for node in siblings_to_remove:
                    node.decompose()                                                         # 
                # Finally delete the wrapper itself
                wrapper.decompose()                                                           # 

    # 6. Convert the cleaned HTML into Markdown
    markdown_options = {}
    if ignore_links:
        markdown_options["strip"] = ["a"]  # strip all <a> tags (keep only their text) 

    raw_html = "".join(str(child) for child in content_div.children)
    markdown_text = md(raw_html, **markdown_options)                                  # 

    # 7. Collapse 3+ blank lines into exactly two
    markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text).strip()

    return markdown_text


In [54]:
from IPython.display import display, Markdown

md_page = get_wikipedia_markdown(
    'Mercedes_Sosa',
    lang='en',
    ignore_references= True,
    ignore_links= True)

display(Markdown(md_page)) 

  for debug_text in content_div.find_all(text=lambda t: t and "NewPP limit report" in t):


Argentine singer (1935–2009)

Not to be confused with Mercedes Sola.

**Haydée Mercedes** "**La Negra**" **Sosa** (Latin American Spanish: [meɾˈseðes ˈsosa]; 9 July 1935 – 4 October 2009) was an Argentine singer who was popular throughout Latin America and many countries outside the region. With her roots in Argentine folk music, Sosa became one of the preeminent exponents of *El nuevo cancionero*. She gave voice to songs written by many Latin American songwriters. Her music made people hail her as the "voice of the voiceless ones". She was often called "the conscience of Latin America".

Sosa performed in venues such as the Lincoln Center in New York City, the Théâtre Mogador in Paris, the Sistine Chapel in Vatican City, as well as sold-out shows in New York's Carnegie Hall and the Roman Colosseum during her final decade of life. Her career spanned four decades and she was the recipient of six Latin Grammy awards (2000, 2003, 2004, 2006, 2009, 2011), including a Latin Grammy Lifetime Achievement Award in 2004 and two posthumous Latin Grammy Award for Best Folk Album in 2009 and 2011. She won the Premio Gardel in 2000, the main musical award in Argentina. She served as an ambassador for UNICEF.

Life
----

Sosa was born on 9 July 1935, in San Miguel de Tucumán, in the northwestern Argentine province of Tucumán, of mestizo ancestry. She was of French, Spanish and Diaguita descent. Her nickname "la negra", which is a common nickname in Argentina for people with darker complexion, is a reference to her indigenous heritage. Her parents, a day laborer and a washerwoman, were Peronists, although they never registered in the party, and she started her career as a singer for the Peronist Party in Tucuman under the name Gladys Osorio. In 1950, at age fifteen, she won a singing competition organized by a local radio station and was given a contract to perform for two months. She recorded her first album, *La Voz de la Zafra*, in 1959. A performance at the 1965 Cosquín National Folklore Festival—where she was introduced and brought to the stage while sitting in the audience by fellow folk singer Jorge Cafrune— brought her to the attention of the Argentine public. Sosa and her first husband, Manuel Oscar Matus, with whom she had one son, were key players in the mid-60s *nueva canción* movement (which was called *nuevo cancionero* in Argentina). Her second record was *Canciones con Fundamento*, a collection of Argentine folk songs.

Sosa with Félix Luna and Ariel Ramírez (at the piano)

Sosa "spent the late 1960s building her audience in Europe and among the cosmopolitan middle class in Buenos Aires, becoming in the process a much bigger star" than her contemporaries. In 1967, Sosa toured the United States and Europe with great success.[*citation needed*] In later years, she performed and recorded extensively, broadening her repertoire to include material from throughout Latin America.

In the early 1970s, Sosa released two concept albums in collaboration with composer Ariel Ramírez and lyricist Félix Luna: *Cantata Sudamericana* and *Mujeres Argentinas* (Argentine Women). She also recorded a tribute to Chilean musician Violeta Parra in 1971, including what was to become one of Sosa's signature songs, *Gracias a la vida*. She further popularized of songs written by Milton Nascimento of Brazil and Pablo Milanés and Silvio Rodríguez both from Cuba. Throughout the decade, she released albums such as *Hasta la Victoria* in 1972 and *Traigo un Pueblo* *en mi Voz* in 1973. They featured songs like "Cuando tenga la tierra", written by Ariel Petrocelli and Daniel Toro, which tackles political and social issues like wealth and land inequality. During the 1970s she was a part of two films by the director Leopoldo Torre Nilsson: *El Santo de la Espada* in 1970 and *Güemes, la tierra en armas* in 1971, in which she portrayed Juana Azurduy de Padilla, the guerrilla military leader who fought for Argentine independence.

Sosa in 1972

After the military junta of Jorge Videla came to power in 1976, the atmosphere in Argentina grew increasingly oppressive. Sosa faced death threats against both her and her family, but refused for many years to leave the country. At a concert in La Plata in 1979, Sosa was searched and arrested on stage, along with all those attending the concert. Their release came about through international intervention. Despite attempts to hold more concerts, she was officially barred from performing by the military regime. Banned in her own country, she moved to Paris and then to Madrid. She has spoken publicly about her artistic and emotional struggles during this period of her life. While in exile, she released the album *A Quien Doy* in 1981. The album included a recording of the song "Cuando Me Acuerdo de Mi Pais" which was originally written by the prolific Chilean singer/songwriter, Patricio Manns. The song, which he wrote while also in political exile, expresses the sorrow he felt from being separated from his homeland. She related to this feeling and struggled to continue recording and performing. In an interview with the New York Times, she said, “It was a mental problem, a problem of morale...It wasn’t my throat, or anything physical".

Sosa returned to Argentina from her exile in Europe in February 1982, several months before the military regime collapsed as a result of the Falklands War, and gave a series of concerts at the *Teatro Ópera* in Buenos Aires, where she invited many of her younger colleagues to share the stage. A double album of recordings from these performances became an instant best seller. She then traveled to perform in her home province of Tucuman. However, these performances were largely ignored by mainstream media in the country. In subsequent years, Sosa continued to tour both in Argentina and abroad, performing in such venues as the Lincoln Center in New York City and the *Théâtre Mogador* in Paris. In poor health for much of the 1990s, she performed a comeback show in Argentina in 1998. In 1994, she played in the Sistine Chapel in Vatican City. In 2002, she sold out both Carnegie Hall in New York and the Colosseum in Rome in the same year.

Sosa in 1973

A supporter of Perón, she favored leftist causes throughout her life. She supported President Raul Alfonsin in the election of 1983 which marked the return of democracy in Argentina following the dictatorship. She referred to this election as "Argentina's Spring" She opposed President Carlos Menem, who was in office from 1989 to 1999, and supported the election of Néstor Kirchner, who became president in 2003.
Sosa was a UNESCO Goodwill Ambassador for Latin America and the Caribbean.

Sosa disliked being identified as a protest singer. While she was outright in her political stances, Sosa said the following on the position of the artist:

> “An artist isn’t political in the party political sense – they have a constituency, which is their public – it is the poetry that matters most of all.”

In a career spanning four decades, she worked with performers across several genres and generations, folk, opera, pop, rock, including Martha Argerich, Andrea Bocelli, David Broza, Franco Battiato, Jaime Roos, Joan Baez, Francis Cabrel, Gal Costa, Luz Casal, Lila Downs, Lucio Dalla, Maria Farantouri, Lucecita Benitez, Nilda Fernández, Charly Garcia, León Gieco, Gian Marco, Nana Mouskouri, Pablo Milanés, Holly Near, Milton Nascimento, Pata Negra, Fito Páez, Franco De Vita, Lourdes Pérez, Luciano Pavarotti, Silvio Rodríguez, Ismael Serrano, Shakira, Sting, Caetano Veloso, Julieta Venegas, Gustavo Cerati and Konstantin Wecker

Sosa participated in a 1999 production of Ariel Ramírez's *Misa Criolla*. Her song *Balderrama* is featured in the 2008 movie *Che*, starring Benicio del Toro as the Argentine Marxist revolutionary Che Guevara.

Sosa was the co-chair of the Earth Charter International Commission.

Awards
------

Sosa won the Latin Grammy Award for Best Folk Album in 2000 (*Misa Criolla*), 2003 (*Acústico*), 2006 (*Corazón Libre*), 2009 (*Cantora 1*, which also won Best Recording Package and was nominated for Album of the Year), and 2011 (*Deja La Vida Volar*), as well as several international awards.

In 1995, Konex Foundation from Argentina granted her the Diamond Konex Award, one of the most prestigious awards in Argentina, as the most important personality in the popular music of her country in the last decade.

Death
-----

Mercedes Sosa lying in repose, with her family and President Cristina Fernández de Kirchner viewing

Suffering from recurrent endocrine and respiratory problems in later years, the 74-year-old Sosa was hospitalized in Buenos Aires on 18 September 2009. She died from multiple organ failure on 4 October 2009, at 5:15 am. She is survived by one son, Fabián Matus, born of her first marriage. He said: "She lived her 74 years to the fullest. She had done practically everything she wanted, she didn't have any type of barrier or any type of fear that limited her". The hospital expressed its sympathies to her relatives. Her website featured the following: "Her undisputed talent, her honesty and her profound convictions leave a great legacy to future generations".

Her body was placed on display at the National Congress building in Buenos Aires for the public to pay their respects, and President Fernández de Kirchner ordered three days of national mourning. Thousands had queued by the end of the day.

Sosa's obituary in *The Daily Telegraph* said she was "an unrivalled interpreter of works by her compatriot, the Argentine Atahualpa Yupanqui, and Chile's Violeta Parra". Helen Popper of Reuters reported her death by saying she "fought South America's dictators with her voice and became a giant of contemporary Latin American music". Sosa received three Latin Grammy nominations for her album, in 2009 . She went on to win Best Folk Album about a month after her death.

Tributes
--------

In 2019, Sosa was celebrated by a Google Doodle. The doodle was showcased in Argentina, Chile, Uruguay, Paraguay, Bolivia, Peru, Ecuador, Cuba, Iceland, Sweden, Serbia, Greece, Israel and Vietnam.

In 2023, *Rolling Stone* ranked Sosa at number 160 on its list of the 200 Greatest Singers of All Time.

Discography
-----------

Sosa in 2005, with Argentina's then-First Lady (later president from 2007 to 2015), Cristina Fernández de Kirchner

Sosa recorded forty albums.

### Studio albums

| Year | Album details |
| --- | --- |
| 1962 | La Voz De La ZafraLabel: RCA |
| 1965 | Canciones Con FundamentoLabel: El Grillo |
| 1966 | HermanoLabel: Philips |
| 1966 | Yo No Canto Por CantarLabel: Philips |
| 1967 | Para Cantarle A Mi GenteLabel: Philips |
| 1968 | Con Sabor A Mercedes SosaLabel: Philips |
| 1969 | Mujeres ArgentinasLabel: Philips |
| 1970 | El Grito De La TierraLabel: Philips |
| 1970 | Navidad Con Mercedes SosaLabel: Philips |
| 1971 | Homenaje a Violeta ParraLabel: Philips |
| 1972 | Hasta La VictoriaLabel: Philips |
| 1972 | Cantata SudamericanaLabel: Philips |
| 1973 | Traigo Un Pueblo En Mi VozLabel: Philips |
| 1975 | A Que Florezca Mi PuebloLabel: Philips |
| 1976 | En Dirección Del VientoLabel: Philips |
| 1977 | Mercedes Sosa Interpreta A Atahualpa YupanquiLabel: Philips |
| 1979 | Serenata Para La Tierra De UnoLabel: Philips |
| 1981 | A Quien Doy / Cuando Me Acuerdo de Mi PaísLabel: Philips |
| 1982 | Como Un Pájaro LibreLabel: Philips |
| 1983 | Mercedes SosaLabel: Philips |
| 1984 | ¿Será Posible El Sur?Label: Philips |
| 1985 | Vengo A Ofrecer Mi CorazónLabel: Philips |
| 1986 | Mercedes Sosa '86Label: Philips |
| 1987 | Mercedes Sosa '87Label: Philips |
| 1993 | SinoLabel: Philips/Polygram |
| 1994 | Gestos De AmorLabel: Polydor |
| 1996 | Escondido En Mi PaísLabel: Polydor |
| 1997 | Alta Fidelidad(w/Charly García)Label: Mercury |
| 1998 | Al DespertarLabel: Mercury |
| 1999 | Misa CriollaLabel: Mercury |
| 2005 | Corazón LibreLabel: Edge |
| 2009 | Cantora 1(w/various artists)Label: RCA |
| 2009 | Cantora 2(w/various artists)Label: RCA |
| 2011 | CensuradaLabel: Philips |
| 2015 | LuceritoLabel: RCA |

### EPs

| Year | EP details |
| --- | --- |
| 1975 | Niño De MañanaLabel: Philips |

### Live albums

| Year | Album details |
| --- | --- |
| 1973 | Si Se Calla El Cantor(with Gloria Martin)Label: Philips |
| 1980 | Gravado Ao Vivo No BrasilLabel: Philips |
| 1982 | Mercedes Sosa en ArgentinaLabel: Phonogram/Philips |
| 1985 | Corazón Americano(withMilton Nascimento&León Gieco)Label: Philips |
| 1989 | Live in EuropeLabel: Tropical Music/Polygram Argentina |
| 1991 | De MíLabel: Philips |
| 2002 | Acústico En VivoLabel: Sony Music Argentina |
| 2003 | Argentina Quiere Cantar(withVíctor Heredia&León Gieco)Label: Odeon/EMI |
| 2010 | Deja La Vida Volar (En Gira)Label: RCA |
| 2014 | AngelLabel: Universal Music |
| 2024 | En vivo en el Gran Rex 2006Label: INAMU Discos |
| Mercedes Sosa en Nueva York, 1974Label: Sony Music Argentina |

### Compilation albums

| Year | Album details |
| --- | --- |
| 1975 | Disco De OroLabel: Philips |
| 1983 | RecitalLabel: Philips |
| 1988 | Amigos MíosLabel: Philips |
| 1993 | 30 AñosLabel: Polygram Argentina |
| 1995 | OroLabel: Polygram |
| 1997 | The Best Of Mercedes SosaLabel: Mercury |
| 2013 | Siempre En TiLabel: Universal Music |

Filmography
-----------

* *Güemes, la tierra en armas* (1971)
* *Argentinísima* (1972)
* *Esta es mi Argentina* (1974)
* *Mercedes Sosa, como un pájaro libre* (1983)
* *Será possible el sur: Mercedes Sosa* (1985)
* *Historias de Argentina en vivo* (2001)

Transclusion expansion time report (%,ms,calls,template)
100.00% 1305.662 1 -total
29.52% 385.448 1 Template:Reflist
20.38% 266.073 1 Template:Infobox\_person
9.77% 127.503 20 Template:Cite\_web
8.75% 114.217 5 Template:Cite\_book
8.41% 109.751 1 Template:In\_lang
7.34% 95.784 1 Template:Infobox\_musical\_artist
7.02% 91.627 2 Template:Short\_description
6.76% 88.198 6 Template:Br\_separated\_entries
6.03% 78.712 1 Template:Mercedes\_Sosa
Saved in parser cache with key enwiki:pcache:476992:|#|:idhash:canonical and timestamp 20250603141506 and revision id 1293758111. Rendering was triggered because: api-parse

In [95]:
import re
import requests
from bs4 import BeautifulSoup, Tag, Comment
from markdownify import markdownify as md

@tool
def get_wikipedia_markdown(
    title: str,
    lang: str = 'en',
    ignore_references: bool = True,
    ignore_links: bool = True
) -> str:
    """
    Fetches the main content of a Wikipedia page and returns it as Markdown,
    excluding infoboxes, navigation templates, images, and—if requested—the
    References, Further reading, and External links sections. It's recommended
    to start with ignore_references=True and ignore_links=True 
    to reduce the amount of output to the pure infomation.

    Args:
        title (str): Wikipedia page title (e.g., "Mercedes_Sosa").
        lang (str): Language code (default 'en').
        ignore_references (bool): If True, drop "References", "Further reading",
                                  and "External links" sections entirely.
        ignore_links (bool): If True, strip out all <a> tags entirely.

    Returns:
        str: Markdown-formatted content of the main article body.
    """
    # 1. Fetch raw HTML
    url = f"https://{lang}.wikipedia.org/wiki/{title}"
    try:
        response = requests.get(url)
        response.raise_for_status()     
    except requests.exceptions.HTTPError as e:

        # use wikipedia's API to check if the page exists
        api_url = f"https://{lang}.wikipedia.org/w/api.php"
        search_params = {
            'list': 'search',
            'srprop': '',
            'srlimit': 10,
            'limit': 10,
            'srsearch': title.replace("_", " "),
            'srinfo': 'suggestion',
            'format': 'json',
            'action': 'query'
        }

        headers = {
            'User-Agent': "mozilla /5.0 (Windows NT 10.0; Win64; x64)"
        }

        r = requests.get(api_url, params=search_params, headers=headers)

        raw_results = r.json()
        search_results = [d['title'].replace(" ", "_") for d in raw_results['query']['search']]
        if ('searchinfo' in raw_results['query']) and ('suggestion' in raw_results['query']['searchinfo']):
            search_results.insert(0, raw_results['query']['searchinfo']['suggestion'].replace(" ", "_"))

        errorMsg = f"Could not fetch page '{title}' for language '{lang}' (HTTP {response.status_code})."
        if search_results:
            errorMsg += f" Did you mean one of these pages? {', '.join(search_results)}"

        raise ValueError(errorMsg) from e

    html = response.text

    # 2. Parse with BeautifulSoup and isolate the article’s main <div>
    soup = BeautifulSoup(html, "lxml")
    content_div = soup.find("div", class_="mw-parser-output")                          # 
    if content_div is None:
        raise ValueError(f"Could not find main content for page '{title}'")

    # 2a. Remove all “[edit]” links (<span class="mw-editsection">…)
    for edit_span in content_div.find_all("span", class_="mw-editsection"):
        edit_span.decompose()                                                           # 

    # 2b. Remove any superscript footnote markers (<sup class="reference">…)
    for sup in content_div.find_all("sup", class_="reference"):
        sup.decompose()                                                                 # 

    # 2c. Remove any parser‐debug comments (e.g., “NewPP limit report…”, “Transclusion expansion time report…”)
    for comment in content_div.find_all(string=lambda text: isinstance(text, Comment)):
        comment_text = str(comment)
        # If the comment contains debug keywords, extract it
        if (
            "NewPP limit report" in comment_text
            or "Transclusion expansion time report" in comment_text
            or "Saved in parser cache" in comment_text
        ):
            comment.extract()  # 

    # 3. Remove unwanted “boilerplate” elements:
    #    a) Infoboxes (sidebars)
    for infobox in content_div.find_all("table", class_=re.compile(r"infobox")):
        infobox.decompose()                                                             # 

    #    b) Table of Contents
    toc = content_div.find("div", id="toc")
    if toc:
        toc.decompose()                                                                 # 

    #    c) Navigation templates (navbox/vertical-navbox/metadata)
    for nav in content_div.find_all(
        ["div", "table"],
        class_=re.compile(r"navbox|vertical-navbox|metadata")
    ):
        nav.decompose()                                                                 # 

    #    d) Thumbnails / image wrappers
    for thumb in content_div.find_all("div", class_=re.compile(r"thumb")):
        thumb.decompose()                                                               # 

    #    e) Raw <img> tags
    for img in content_div.find_all("img"):
        img.decompose()                                                                 # 

    # 4. Convert any remaining <table> into a Markdown table **in-place**
    def table_to_markdown(table_tag: Tag) -> str:
        """
        Converts a <table> into a Markdown-formatted table, preserving <th> headers.
        """
        headers = []
        header_row = table_tag.find("tr")
        if header_row:
            for th in header_row.find_all("th"):
                headers.append(th.get_text(strip=True))
        md_table = ""
        if headers:
            md_table += "| " + " | ".join(headers) + " |\n"
            md_table += "| " + " | ".join("---" for _ in headers) + " |\n"
        # Now process data rows (skip the first <tr> if it was header row)
        for row in table_tag.find_all("tr")[1:]:
            cells = row.find_all(["td", "th"])
            if not cells:
                continue
            row_texts = [cell.get_text(strip=True) for cell in cells]
            md_table += "| " + " | ".join(row_texts) + " |\n"
        return md_table.rstrip()

    for table in content_div.find_all("table"):
        # Skip infobox/navigation tables (already removed above)
        if "infobox" in table.get("class", []) or table.get("role") == "navigation":
            continue
        markdown_table = table_to_markdown(table)                                        # 
        new_node = soup.new_string("\n\n" + markdown_table + "\n\n")
        table.replace_with(new_node)

    # 5. Remove “References”, “Further reading” & “External links” sections if requested
    if ignore_references:
        section_ids = {"references", "further_reading", "external_links"}
        # We look for wrapper <div class="mw-heading mw-heading2"> or mw-heading3
        for wrapper in content_div.find_all("div", class_=re.compile(r"mw-heading mw-heading[23]")):
            heading_tag = wrapper.find(re.compile(r"^h[2-3]$"))
            if heading_tag and heading_tag.get("id", "").strip().lower() in section_ids:
                # Collect every sibling until the next wrapper of the same form
                siblings_to_remove = []
                for sib in wrapper.find_next_siblings():
                    if (
                        sib.name == "div"
                        and "mw-heading" in (sib.get("class") or [])
                        and re.match(r"mw-heading mw-heading[23]", " ".join(sib.get("class") or []))
                    ):
                        break
                    siblings_to_remove.append(sib)
                # First delete those siblings
                for node in siblings_to_remove:
                    node.decompose()                                                        # 
                # Finally delete the wrapper itself
                wrapper.decompose()                                                          # 

    # 6. Convert the cleaned HTML into Markdown
    markdown_options = {}
    if ignore_links:
        markdown_options["strip"] = ["a"]  # strip all <a> tags (keep only their text) 

    raw_html = "".join(str(child) for child in content_div.children)
    markdown_text = md(raw_html, **markdown_options)                                   # 

    # 7. Collapse 3+ blank lines into exactly two
    markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text).strip()

    return markdown_text


In [97]:
md_page = get_wikipedia_markdown(
    'USA_Obama',
    lang='en',
    ignore_references= False,
    ignore_links= False)


ValueError: Could not fetch page 'USA_Obama' for language 'en' (HTTP 404). Did you mean one of these pages? Barack_Obama, United_States_presidential_approval_rating, Family_of_Barack_Obama, Michelle_Obama, List_of_people_granted_executive_clemency_by_Barack_Obama, Presidency_of_Barack_Obama, Barack_Obama_citizenship_conspiracy_theories, Barack_Obama_2008_presidential_campaign, Renegades:_Born_in_the_USA, Barack_Obama_Presidential_Center

In [98]:
from IPython.display import display, Markdown

md_page = get_wikipedia_markdown(
    'Mercedes_Sosa',
    lang='en',
    ignore_references= False,
    ignore_links= False)

display(Markdown(md_page)) 

Argentine singer (1935–2009)

Not to be confused with [Mercedes Sola](/wiki/Mercedes_Sola "Mercedes Sola").

**Haydée Mercedes** "**La Negra**" **Sosa** (Latin American Spanish: [[meɾˈseðes ˈsosa]](/wiki/Help:IPA/Spanish "Help:IPA/Spanish"); 9 July 1935 – 4 October 2009) was an [Argentine](/wiki/Argentines "Argentines") singer who was popular throughout [Latin America](/wiki/Latin_America "Latin America") and many countries outside the region. With her roots in [Argentine folk music](/wiki/Folk_music_of_Argentina "Folk music of Argentina"), Sosa became one of the preeminent exponents of *El nuevo cancionero*. She gave voice to songs written by many Latin American songwriters. Her music made people hail her as the "voice of the voiceless ones". She was often called "the conscience of Latin America".

Sosa performed in venues such as the [Lincoln Center](/wiki/Lincoln_Center "Lincoln Center") in New York City, the [Théâtre Mogador](/wiki/Th%C3%A9%C3%A2tre_Mogador "Théâtre Mogador") in Paris, the [Sistine Chapel](/wiki/Sistine_Chapel "Sistine Chapel") in Vatican City, as well as sold-out shows in New York's [Carnegie Hall](/wiki/Carnegie_Hall "Carnegie Hall") and the Roman [Colosseum](/wiki/Colosseum "Colosseum") during her final decade of life. Her career spanned four decades and she was the recipient of six [Latin Grammy](/wiki/Latin_Grammy "Latin Grammy") awards (2000, 2003, 2004, 2006, 2009, 2011), including a [Latin Grammy Lifetime Achievement Award](/wiki/Latin_Grammy_Lifetime_Achievement_Award "Latin Grammy Lifetime Achievement Award") in 2004 and two posthumous [Latin Grammy Award for Best Folk Album](/wiki/Latin_Grammy_Award_for_Best_Folk_Album "Latin Grammy Award for Best Folk Album") in 2009 and 2011. She won the [Premio Gardel](/wiki/Premios_Gardel "Premios Gardel") in 2000, the main musical award in Argentina. She served as an ambassador for [UNICEF](/wiki/UNICEF "UNICEF").

Life
----

Sosa was born on 9 July 1935, in [San Miguel de Tucumán](/wiki/San_Miguel_de_Tucum%C3%A1n "San Miguel de Tucumán"), in the [northwestern Argentine](/wiki/Argentine_Northwest "Argentine Northwest") province of [Tucumán](/wiki/Tucum%C3%A1n_Province "Tucumán Province"), of [mestizo](/wiki/Mestizo "Mestizo") ancestry. She was of French, Spanish and [Diaguita](/wiki/Diaguita "Diaguita") descent. Her nickname "la negra", which is a common nickname in Argentina for people with darker complexion, is a reference to her indigenous heritage. Her parents, a day laborer and a washerwoman, were [Peronists](/wiki/Peronism "Peronism"), although they never registered in the party, and she started her career as a singer for the [Peronist Party](/wiki/Peronist_Party "Peronist Party") in [Tucuman](/wiki/Tucum%C3%A1n_Province "Tucumán Province") under the name Gladys Osorio. In 1950, at age fifteen, she won a singing competition organized by a local radio station and was given a contract to perform for two months. She recorded her first album, *[La Voz de la Zafra](/wiki/La_voz_de_la_zafra "La voz de la zafra")*, in 1959. A performance at the 1965 [Cosquín National Folklore Festival](/wiki/Cosqu%C3%ADn_Festival "Cosquín Festival")—where she was introduced and brought to the stage while sitting in the audience by fellow folk singer [Jorge Cafrune](/wiki/Jorge_Cafrune "Jorge Cafrune")— brought her to the attention of the Argentine public. Sosa and her first husband, [Manuel Oscar Matus](/w/index.php?title=Manuel_Oscar_Matus&action=edit&redlink=1 "Manuel Oscar Matus (page does not exist)"), with whom she had one son, were key players in the mid-60s *[nueva canción](/wiki/Nueva_canci%C3%B3n "Nueva canción")* movement (which was called *nuevo cancionero* in Argentina). Her second record was *Canciones con Fundamento*, a collection of Argentine folk songs.

Sosa with [Félix Luna](/wiki/F%C3%A9lix_Luna "Félix Luna") and [Ariel Ramírez](/wiki/Ariel_Ram%C3%ADrez "Ariel Ramírez") (at the piano)

Sosa "spent the late 1960s building her audience in Europe and among the cosmopolitan middle class in Buenos Aires, becoming in the process a much bigger star" than her contemporaries. In 1967, Sosa toured the United States and Europe with great success.[*[citation needed](/wiki/Wikipedia:Citation_needed "Wikipedia:Citation needed")*] In later years, she performed and recorded extensively, broadening her repertoire to include material from throughout Latin America.

In the early 1970s, Sosa released two concept albums in collaboration with composer [Ariel Ramírez](/wiki/Ariel_Ram%C3%ADrez "Ariel Ramírez") and lyricist [Félix Luna](/wiki/F%C3%A9lix_Luna "Félix Luna"): *Cantata Sudamericana* and *Mujeres Argentinas* (Argentine Women). She also recorded a tribute to Chilean musician [Violeta Parra](/wiki/Violeta_Parra "Violeta Parra") in 1971, including what was to become one of Sosa's signature songs, *[Gracias a la vida](/wiki/Gracias_a_la_vida "Gracias a la vida")*. She further popularized of songs written by [Milton Nascimento](/wiki/Milton_Nascimento "Milton Nascimento") of Brazil and [Pablo Milanés](/wiki/Pablo_Milan%C3%A9s "Pablo Milanés") and [Silvio Rodríguez](/wiki/Silvio_Rodr%C3%ADguez "Silvio Rodríguez") both from [Cuba](/wiki/Cuba "Cuba"). Throughout the decade, she released albums such as *Hasta la Victoria* in 1972 and *Traigo un Pueblo* *en mi Voz* in 1973. They featured songs like "Cuando tenga la tierra", written by [Ariel Petrocelli](/w/index.php?title=Ariel_Petrocelli&action=edit&redlink=1 "Ariel Petrocelli (page does not exist)") and Daniel Toro, which tackles political and social issues like wealth and land inequality. During the 1970s she was a part of two films by the director [Leopoldo Torre Nilsson](/wiki/Leopoldo_Torre_Nilsson "Leopoldo Torre Nilsson"): *[El Santo de la Espada](/wiki/El_Santo_de_la_Espada "El Santo de la Espada")* in 1970 and *[Güemes, la tierra en armas](/wiki/G%C3%BCemes:_la_tierra_en_armas "Güemes: la tierra en armas")* in 1971, in which she portrayed [Juana Azurduy de Padilla](/wiki/Juana_Azurduy_de_Padilla "Juana Azurduy de Padilla"), the guerrilla military leader who fought for Argentine independence.

Sosa in 1972

After the [military junta](/wiki/National_Reorganization_Process "National Reorganization Process") of [Jorge Videla](/wiki/Jorge_Videla "Jorge Videla") came to power in 1976, the atmosphere in Argentina grew increasingly oppressive. Sosa faced death threats against both her and her family, but refused for many years to leave the country. At a concert in [La Plata](/wiki/La_Plata "La Plata") in 1979, Sosa was searched and arrested on stage, along with all those attending the concert. Their release came about through international intervention. Despite attempts to hold more concerts, she was officially barred from performing by the military regime. Banned in her own country, she moved to Paris and then to [Madrid](/wiki/Madrid "Madrid"). She has spoken publicly about her artistic and emotional struggles during this period of her life. While in exile, she released the album *A Quien Doy* in 1981. The album included a recording of the song "Cuando Me Acuerdo de Mi Pais" which was originally written by the prolific Chilean singer/songwriter, [Patricio Manns](/wiki/Patricio_Manns "Patricio Manns"). The song, which he wrote while also in political exile, expresses the sorrow he felt from being separated from his homeland. She related to this feeling and struggled to continue recording and performing. In an interview with the New York Times, she said, “It was a mental problem, a problem of morale...It wasn’t my throat, or anything physical".

Sosa returned to Argentina from her exile in Europe in February 1982, several months before the military regime collapsed as a result of the [Falklands War](/wiki/Falklands_War "Falklands War"), and gave a series of concerts at the *[Teatro Ópera](/wiki/Teatro_Opera "Teatro Opera")* in Buenos Aires, where she invited many of her younger colleagues to share the stage. A double album of recordings from these performances became an instant best seller. She then traveled to perform in her home province of Tucuman. However, these performances were largely ignored by mainstream media in the country. In subsequent years, Sosa continued to tour both in Argentina and abroad, performing in such venues as the [Lincoln Center](/wiki/Lincoln_Center "Lincoln Center") in New York City and the *[Théâtre Mogador](/wiki/Th%C3%A9%C3%A2tre_Mogador "Théâtre Mogador")* in Paris. In poor health for much of the 1990s, she performed a comeback show in Argentina in 1998. In 1994, she played in the [Sistine Chapel](/wiki/Sistine_Chapel "Sistine Chapel") in Vatican City. In 2002, she sold out both [Carnegie Hall](/wiki/Carnegie_Hall "Carnegie Hall") in New York and the [Colosseum](/wiki/Colosseum "Colosseum") in Rome in the same year.

Sosa in 1973

A supporter of [Perón](/wiki/Juan_Per%C3%B3n "Juan Perón"), she favored leftist causes throughout her life. She supported President [Raul Alfonsin](/wiki/Ra%C3%BAl_Alfons%C3%ADn "Raúl Alfonsín") in the election of 1983 which marked the return of democracy in Argentina following the dictatorship. She referred to this election as "Argentina's Spring" She opposed President [Carlos Menem](/wiki/Carlos_Menem "Carlos Menem"), who was in office from 1989 to 1999, and supported the election of [Néstor Kirchner](/wiki/N%C3%A9stor_Kirchner "Néstor Kirchner"), who became president in 2003.
Sosa was a [UNESCO Goodwill Ambassador](/wiki/UNESCO_Goodwill_Ambassador "UNESCO Goodwill Ambassador") for Latin America and the Caribbean.

Sosa disliked being identified as a protest singer. While she was outright in her political stances, Sosa said the following on the position of the artist:

> “An artist isn’t political in the party political sense – they have a constituency, which is their public – it is the poetry that matters most of all.”

In a career spanning four decades, she worked with performers across several genres and generations, folk, opera, pop, rock, including [Martha Argerich](/wiki/Martha_Argerich "Martha Argerich"), [Andrea Bocelli](/wiki/Andrea_Bocelli "Andrea Bocelli"), [David Broza](/wiki/David_Broza "David Broza"), [Franco Battiato](/wiki/Franco_Battiato "Franco Battiato"), [Jaime Roos](/wiki/Jaime_Roos "Jaime Roos"), [Joan Baez](/wiki/Joan_Baez "Joan Baez"), [Francis Cabrel](/wiki/Francis_Cabrel "Francis Cabrel"), [Gal Costa](/wiki/Gal_Costa "Gal Costa"), [Luz Casal](/wiki/Luz_Casal "Luz Casal"), [Lila Downs](/wiki/Lila_Downs "Lila Downs"), [Lucio Dalla](/wiki/Lucio_Dalla "Lucio Dalla"), [Maria Farantouri](/wiki/Maria_Farantouri "Maria Farantouri"), [Lucecita Benitez](/wiki/Lucecita_Benitez "Lucecita Benitez"), [Nilda Fernández](/wiki/Nilda_Fern%C3%A1ndez "Nilda Fernández"), [Charly Garcia](/wiki/Charly_Garcia "Charly Garcia"), [León Gieco](/wiki/Le%C3%B3n_Gieco "León Gieco"), [Gian Marco](/wiki/Gian_Marco "Gian Marco"), [Nana Mouskouri](/wiki/Nana_Mouskouri "Nana Mouskouri"), [Pablo Milanés](/wiki/Pablo_Milan%C3%A9s "Pablo Milanés"), [Holly Near](/wiki/Holly_Near "Holly Near"), [Milton Nascimento](/wiki/Milton_Nascimento "Milton Nascimento"), [Pata Negra](/wiki/Pata_Negra "Pata Negra"), [Fito Páez](/wiki/Fito_P%C3%A1ez "Fito Páez"), [Franco De Vita](/wiki/Franco_De_Vita "Franco De Vita"), [Lourdes Pérez](/wiki/Lourdes_P%C3%A9rez "Lourdes Pérez"), [Luciano Pavarotti](/wiki/Luciano_Pavarotti "Luciano Pavarotti"), [Silvio Rodríguez](/wiki/Silvio_Rodr%C3%ADguez "Silvio Rodríguez"), [Ismael Serrano](/wiki/Ismael_Serrano "Ismael Serrano"), [Shakira](/wiki/Shakira "Shakira"), [Sting](/wiki/Sting_(musician) "Sting (musician)"), [Caetano Veloso](/wiki/Caetano_Veloso "Caetano Veloso"), [Julieta Venegas](/wiki/Julieta_Venegas "Julieta Venegas"), [Gustavo Cerati](/wiki/Gustavo_Cerati "Gustavo Cerati") and [Konstantin Wecker](/wiki/Konstantin_Wecker "Konstantin Wecker")

Sosa participated in a 1999 production of [Ariel Ramírez](/wiki/Ariel_Ram%C3%ADrez "Ariel Ramírez")'s *Misa Criolla*. Her song *Balderrama* is featured in the 2008 movie *[Che](/wiki/Che_(2008_film) "Che (2008 film)")*, starring [Benicio del Toro](/wiki/Benicio_del_Toro "Benicio del Toro") as the Argentine [Marxist](/wiki/Marxist "Marxist") revolutionary [Che Guevara](/wiki/Che_Guevara "Che Guevara").

Sosa was the co-chair of the [Earth Charter](/wiki/Earth_Charter "Earth Charter") International Commission.

Awards
------

Sosa won the [Latin Grammy Award for Best Folk Album](/wiki/Latin_Grammy_Award_for_Best_Folk_Album "Latin Grammy Award for Best Folk Album") in 2000 (*Misa Criolla*), 2003 (*Acústico*), 2006 (*Corazón Libre*), 2009 (*[Cantora 1](/wiki/Cantora,_un_Viaje_%C3%8Dntimo "Cantora, un Viaje Íntimo")*, which also won [Best Recording Package](/wiki/Latin_Grammy_Award_for_Best_Recording_Package "Latin Grammy Award for Best Recording Package") and was nominated for [Album of the Year](/wiki/Latin_Grammy_Award_for_Album_of_the_Year "Latin Grammy Award for Album of the Year")), and 2011 (*Deja La Vida Volar*), as well as several international awards.

In 1995, [Konex Foundation](/wiki/Konex_Foundation "Konex Foundation") from Argentina granted her the Diamond [Konex Award](/wiki/Konex_Award "Konex Award"), one of the most prestigious awards in Argentina, as the most important personality in the popular music of her country in the last decade.

Death
-----

Mercedes Sosa lying in repose, with her family and President [Cristina Fernández de Kirchner](/wiki/Cristina_Fern%C3%A1ndez_de_Kirchner "Cristina Fernández de Kirchner") viewing

Suffering from recurrent [endocrine](/wiki/Endocrine_system "Endocrine system") and [respiratory](/wiki/Respiratory_system "Respiratory system") problems in later years, the 74-year-old Sosa was hospitalized in Buenos Aires on 18 September 2009. She died from [multiple organ failure](/wiki/Multiple_organ_failure "Multiple organ failure") on 4 October 2009, at 5:15 am. She is survived by one son, Fabián Matus, born of her first marriage. He said: "She lived her 74 years to the fullest. She had done practically everything she wanted, she didn't have any type of barrier or any type of fear that limited her". The hospital expressed its sympathies to her relatives. Her website featured the following: "Her undisputed talent, her honesty and her profound convictions leave a great legacy to future generations".

Her body was placed on display at the [National Congress](/wiki/Argentine_National_Congress "Argentine National Congress") building in Buenos Aires for the public to pay their respects, and President Fernández de Kirchner ordered three days of national mourning. Thousands had queued by the end of the day.

Sosa's obituary in *[The Daily Telegraph](/wiki/The_Daily_Telegraph "The Daily Telegraph")* said she was "an unrivalled interpreter of works by her compatriot, the Argentine [Atahualpa Yupanqui](/wiki/Atahualpa_Yupanqui "Atahualpa Yupanqui"), and Chile's [Violeta Parra](/wiki/Violeta_Parra "Violeta Parra")". Helen Popper of [Reuters](/wiki/Reuters "Reuters") reported her death by saying she "fought South America's dictators with her voice and became a giant of contemporary Latin American music". Sosa received three [Latin Grammy](/wiki/Latin_Grammy "Latin Grammy") nominations for her album, in 2009 . She went on to win Best Folk Album about a month after her death.

Tributes
--------

In 2019, Sosa was celebrated by a [Google Doodle](/wiki/Google_Doodle "Google Doodle"). The doodle was showcased in [Argentina](/wiki/Argentina "Argentina"), [Chile](/wiki/Chile "Chile"), [Uruguay](/wiki/Uruguay "Uruguay"), [Paraguay](/wiki/Paraguay "Paraguay"), [Bolivia](/wiki/Bolivia "Bolivia"), [Peru](/wiki/Peru "Peru"), [Ecuador](/wiki/Ecuador "Ecuador"), [Cuba](/wiki/Cuba "Cuba"), [Iceland](/wiki/Iceland "Iceland"), Sweden, [Serbia](/wiki/Serbia "Serbia"), [Greece](/wiki/Greece "Greece"), [Israel](/wiki/Israel "Israel") and [Vietnam](/wiki/Vietnam "Vietnam").

In 2023, *[Rolling Stone](/wiki/Rolling_Stone "Rolling Stone")* ranked Sosa at number 160 on its list of the 200 Greatest Singers of All Time.

Discography
-----------

Sosa in 2005, with Argentina's then-First Lady (later president from 2007 to 2015), Cristina Fernández de Kirchner

Sosa recorded forty albums.

### Studio albums

| Year | Album details |
| --- | --- |
| 1962 | La Voz De La ZafraLabel: RCA |
| 1965 | Canciones Con FundamentoLabel: El Grillo |
| 1966 | HermanoLabel: Philips |
| 1966 | Yo No Canto Por CantarLabel: Philips |
| 1967 | Para Cantarle A Mi GenteLabel: Philips |
| 1968 | Con Sabor A Mercedes SosaLabel: Philips |
| 1969 | Mujeres ArgentinasLabel: Philips |
| 1970 | El Grito De La TierraLabel: Philips |
| 1970 | Navidad Con Mercedes SosaLabel: Philips |
| 1971 | Homenaje a Violeta ParraLabel: Philips |
| 1972 | Hasta La VictoriaLabel: Philips |
| 1972 | Cantata SudamericanaLabel: Philips |
| 1973 | Traigo Un Pueblo En Mi VozLabel: Philips |
| 1975 | A Que Florezca Mi PuebloLabel: Philips |
| 1976 | En Dirección Del VientoLabel: Philips |
| 1977 | Mercedes Sosa Interpreta A Atahualpa YupanquiLabel: Philips |
| 1979 | Serenata Para La Tierra De UnoLabel: Philips |
| 1981 | A Quien Doy / Cuando Me Acuerdo de Mi PaísLabel: Philips |
| 1982 | Como Un Pájaro LibreLabel: Philips |
| 1983 | Mercedes SosaLabel: Philips |
| 1984 | ¿Será Posible El Sur?Label: Philips |
| 1985 | Vengo A Ofrecer Mi CorazónLabel: Philips |
| 1986 | Mercedes Sosa '86Label: Philips |
| 1987 | Mercedes Sosa '87Label: Philips |
| 1993 | SinoLabel: Philips/Polygram |
| 1994 | Gestos De AmorLabel: Polydor |
| 1996 | Escondido En Mi PaísLabel: Polydor |
| 1997 | Alta Fidelidad(w/Charly García)Label: Mercury |
| 1998 | Al DespertarLabel: Mercury |
| 1999 | Misa CriollaLabel: Mercury |
| 2005 | Corazón LibreLabel: Edge |
| 2009 | Cantora 1(w/various artists)Label: RCA |
| 2009 | Cantora 2(w/various artists)Label: RCA |
| 2011 | CensuradaLabel: Philips |
| 2015 | LuceritoLabel: RCA |

### EPs

| Year | EP details |
| --- | --- |
| 1975 | Niño De MañanaLabel: Philips |

### Live albums

| Year | Album details |
| --- | --- |
| 1973 | Si Se Calla El Cantor(with Gloria Martin)Label: Philips |
| 1980 | Gravado Ao Vivo No BrasilLabel: Philips |
| 1982 | Mercedes Sosa en ArgentinaLabel: Phonogram/Philips |
| 1985 | Corazón Americano(withMilton Nascimento&León Gieco)Label: Philips |
| 1989 | Live in EuropeLabel: Tropical Music/Polygram Argentina |
| 1991 | De MíLabel: Philips |
| 2002 | Acústico En VivoLabel: Sony Music Argentina |
| 2003 | Argentina Quiere Cantar(withVíctor Heredia&León Gieco)Label: Odeon/EMI |
| 2010 | Deja La Vida Volar (En Gira)Label: RCA |
| 2014 | AngelLabel: Universal Music |
| 2024 | En vivo en el Gran Rex 2006Label: INAMU Discos |
| Mercedes Sosa en Nueva York, 1974Label: Sony Music Argentina |

### Compilation albums

| Year | Album details |
| --- | --- |
| 1975 | Disco De OroLabel: Philips |
| 1983 | RecitalLabel: Philips |
| 1988 | Amigos MíosLabel: Philips |
| 1993 | 30 AñosLabel: Polygram Argentina |
| 1995 | OroLabel: Polygram |
| 1997 | The Best Of Mercedes SosaLabel: Mercury |
| 2013 | Siempre En TiLabel: Universal Music |

Filmography
-----------

* *[Güemes, la tierra en armas](/wiki/G%C3%BCemes:_la_tierra_en_armas "Güemes: la tierra en armas")* (1971)
* *[Argentinísima](/wiki/Argentin%C3%ADsima "Argentinísima")* (1972)
* *[Esta es mi Argentina](/w/index.php?title=Esta_es_mi_Argentina&action=edit&redlink=1 "Esta es mi Argentina (page does not exist)")* (1974)
* *[Mercedes Sosa, como un pájaro libre](/wiki/Mercedes_Sosa:_como_un_pajaro_libre "Mercedes Sosa: como un pajaro libre")* (1983)
* *[Será possible el sur: Mercedes Sosa](/w/index.php?title=Ser%C3%A1_possible_el_sur:_Mercedes_Sosa&action=edit&redlink=1 "Será possible el sur: Mercedes Sosa (page does not exist)")* (1985)
* *[Historias de Argentina en vivo](/w/index.php?title=Historias_de_Argentina_en_vivo&action=edit&redlink=1 "Historias de Argentina en vivo (page does not exist)")* (2001)

Further reading
---------------

* Christensen, Anette (2019). *Mercedes Sosa - The Voice of Hope*. Denmark: Tribute2life Publishing. [ISBN](/wiki/ISBN_(identifier) "ISBN (identifier)") [978-87-998216-5-5](/wiki/Special:BookSources/978-87-998216-5-5 "Special:BookSources/978-87-998216-5-5").
* Christensen, Anette (2019). *Mercedes Sosa - More Than a Song*. Denmark: Tribute2life Publishing. [ISBN](/wiki/ISBN_(identifier) "ISBN (identifier)") [978-87-998216-7-9](/wiki/Special:BookSources/978-87-998216-7-9 "Special:BookSources/978-87-998216-7-9"). (Abridged version of Mercedes Sosa - The Voice of Hope)
* Braceli, Rodolfo (2010). *Mercedes Sosa. La Negra* (in Spanish). Italy: Perrone. [ISBN](/wiki/ISBN_(identifier) "ISBN (identifier)") [978-88-6004-347-4](/wiki/Special:BookSources/978-88-6004-347-4 "Special:BookSources/978-88-6004-347-4").
* Matus, Fabián (2016). *Mercedes Sosa. La Mami* (in Spanish). Argentina: Planeta. [ISBN](/wiki/ISBN_(identifier) "ISBN (identifier)") [978-950-49-5247-3](/wiki/Special:BookSources/978-950-49-5247-3 "Special:BookSources/978-950-49-5247-3").

References
----------

1. **[^](#cite_ref-birth_1-0)** [Mercedes Sosa](http://www.brainyhistory.com/events/1935/july_9_1935_93941.html) at BrainyHistory.com
2. **[^](#cite_ref-tmc_2-0)** ["Singer Mercedes Sosa: The voice of the 'voiceless ones' outlasts South American dictatorships"](http://www.tmcnet.com/usubmit/2007/12/08/3150199.htm).
3. ^ [***a***](#cite_ref-Heckman_3-0) [***b***](#cite_ref-Heckman_3-1) [***c***](#cite_ref-Heckman_3-2) Heckman, Don (29 October 1995). ["POP MUSIC : The Voice Heard Round the World : Mercedes Sosa, a compelling figure in world music and a social activist, will make a rare L.A. appearance"](https://www.latimes.com/archives/la-xpm-1995-10-29-ca-62462-story.html). *Los Angeles Times*. Retrieved 5 December 2023.
4. ^ [***a***](#cite_ref-Legendary_folk_singer_Mercedes_Sosa_dies_at_74_4-0) [***b***](#cite_ref-Legendary_folk_singer_Mercedes_Sosa_dies_at_74_4-1) [***c***](#cite_ref-Legendary_folk_singer_Mercedes_Sosa_dies_at_74_4-2) [***d***](#cite_ref-Legendary_folk_singer_Mercedes_Sosa_dies_at_74_4-3) [***e***](#cite_ref-Legendary_folk_singer_Mercedes_Sosa_dies_at_74_4-4) [***f***](#cite_ref-Legendary_folk_singer_Mercedes_Sosa_dies_at_74_4-5) [***g***](#cite_ref-Legendary_folk_singer_Mercedes_Sosa_dies_at_74_4-6) [***h***](#cite_ref-Legendary_folk_singer_Mercedes_Sosa_dies_at_74_4-7) ["Legendary folk singer Mercedes Sosa dies at 74"](http://www.france24.com/en/20091004-legendary-folk-singer-mercedes-sosa-dies-74-argentina-entertainment-music). [France 24](/wiki/France_24 "France 24"). 4 October 2009. Retrieved 5 October 2009.
5. ^ [***a***](#cite_ref-:0_5-0) [***b***](#cite_ref-:0_5-1) [***c***](#cite_ref-:0_5-2) [***d***](#cite_ref-:0_5-3) Bernstein, Adam (5 October 2009). ["Argentine folk singer who championed social justice"](https://www.latimes.com/archives/la-xpm-2009-oct-05-me-mercedes-sosa5-story.html). *Los Angeles Times*. Retrieved 8 March 2025.
6. **[^](#cite_ref-6)** *Mercedes Sosa: The Voice of Latin America*. Dir. Rodrigo H. Villa. First Run Features, 2013. Web.
7. ^ [***a***](#cite_ref-Mercedes_Sosa:_Obituary_7-0) [***b***](#cite_ref-Mercedes_Sosa:_Obituary_7-1) [***c***](#cite_ref-Mercedes_Sosa:_Obituary_7-2) [***d***](#cite_ref-Mercedes_Sosa:_Obituary_7-3) [***e***](#cite_ref-Mercedes_Sosa:_Obituary_7-4) [***f***](#cite_ref-Mercedes_Sosa:_Obituary_7-5) [***g***](#cite_ref-Mercedes_Sosa:_Obituary_7-6) [***h***](#cite_ref-Mercedes_Sosa:_Obituary_7-7) ["Mercedes Sosa: Obituary"](https://www.telegraph.co.uk/news/obituaries/culture-obituaries/music-obituaries/6259898/Mercedes-Sosa.html). *[The Daily Telegraph](/wiki/The_Daily_Telegraph "The Daily Telegraph")*. 4 October 2009. Retrieved 5 October 2009.
8. **[^](#cite_ref-test_8-0)** [The presentation by Jorge Cafrune and the song Mercedes Sosa sang](https://www.youtube.com/watch?v=QzwL8C2hE6c) on [YouTube](/wiki/YouTube_video_(identifier) "YouTube video (identifier)"). Retrieved 3 March 2010.
9. ^ [***a***](#cite_ref-Latin_artist_Mercedes_Sosa_dies_9-0) [***b***](#cite_ref-Latin_artist_Mercedes_Sosa_dies_9-1) [***c***](#cite_ref-Latin_artist_Mercedes_Sosa_dies_9-2) [***d***](#cite_ref-Latin_artist_Mercedes_Sosa_dies_9-3) [***e***](#cite_ref-Latin_artist_Mercedes_Sosa_dies_9-4) [***f***](#cite_ref-Latin_artist_Mercedes_Sosa_dies_9-5) [***g***](#cite_ref-Latin_artist_Mercedes_Sosa_dies_9-6) [***h***](#cite_ref-Latin_artist_Mercedes_Sosa_dies_9-7) ["Latin artist Mercedes Sosa dies"](http://news.bbc.co.uk/2/hi/entertainment/8289370.stm). [BBC](/wiki/BBC "BBC"). 4 October 2009. Retrieved 5 October 2009.
10. **[^](#cite_ref-10)** Karush, Matthew (2017). *Musicians in Transit: Argentina and the Globalization of Popular Music*. Duke. p. 168. [ISBN](/wiki/ISBN_(identifier) "ISBN (identifier)") [978-0-8223-7377-3](/wiki/Special:BookSources/978-0-8223-7377-3 "Special:BookSources/978-0-8223-7377-3").
11. ^ [***a***](#cite_ref-ap_11-0) [***b***](#cite_ref-ap_11-1) [Associated Press](https://www.google.com/hostednews/ap/article/ALeqM5iBSb5ZdEJ8MI6_qzWccZNZQITnMAD9B47Q8G1)[*[dead link](/wiki/Wikipedia:Link_rot "Wikipedia:Link rot")*]
12. ^ [***a***](#cite_ref-:2_12-0) [***b***](#cite_ref-:2_12-1) ["Biografía"](https://www.mercedessosa.org/biografia/). *Fundación Mercedes Sosa* (in Spanish). Retrieved 8 March 2025.
13. **[^](#cite_ref-13)** Argentina, Cadena 3. ["El folclore argentino llora la muerte de Daniel Toro - Notas - Viva la Radio"](https://www.cadena3.com/noticia/viva-la-radio/murio-daniel-toro-icono-del-folclore-argentino_359018). *Cadena 3 Argentina* (in Spanish). Retrieved 14 March 2025.`{{cite web}}`: CS1 maint: numeric names: authors list ([link](/wiki/Category:CS1_maint:_numeric_names:_authors_list "Category:CS1 maint: numeric names: authors list"))
14. **[^](#cite_ref-14)** Nilsson, Leopoldo Torre (7 April 1971), [*Güemes - la tierra en armas*](https://www.imdb.com/title/tt0067165/?ref_=nm_knf_t_2) (Drama, History), Alfredo Alcón, Norma Aleandro, Gabriela Gili, Producciones Cinematográficas Cerrillos, retrieved 8 March 2025
15. **[^](#cite_ref-15)** Rodrigo (10 September 2020). ["Patricio Manns: Cuando me acuerdo de mi país (1983) | PERRERAC: La canción, un arma de la revolución"](https://perrerac.org/album/patricio-manns-cuando-me-acuerdo-de-mi-pais-1983/13772/) (in Spanish). Retrieved 14 March 2025.
16. ^ [***a***](#cite_ref-:1_16-0) [***b***](#cite_ref-:1_16-1) Lopez, Vicente F. (18 January 1983). ["ARTISTAS EXILIADOS HAN REGRESADO A ARGENTINA"](https://infoweb.newsbank.com/apps/news/openurl?ctx_ver=z39.88-2004&rft_id=info%3Asid/infoweb.newsbank.com&svc_dat=WORLDNEWS&req_dat=0D1C15DBC6335C60&rft_val_format=info%3Aofi/fmt%3Akev%3Amtx%3Actx&rft_dat=document_id%3Anews%252F0EB960FAFAAF7A0F). *El Nuevo Herald*. p. 8. Retrieved 7 March 2025.
17. **[^](#cite_ref-17)** Drosdoff, Daniel (30 October 1983). ["ARGENTINIAN VOTE TO END DICTATORSHIP PERONIST AND RADICAL IN LEAD FOR PRESIDENCY"](https://infoweb.newsbank.com/apps/news/openurl?ctx_ver=z39.88-2004&rft_id=info%3Asid/infoweb.newsbank.com&svc_dat=WORLDNEWS&req_dat=0D1C15DBC6335C60&rft_val_format=info%3Aofi/fmt%3Akev%3Amtx%3Actx&rft_dat=document_id%3Anews%252F0EB35DEF2EAA1BC5). *Miami Herald*. pp. 16A. Retrieved 7 March 2025.
18. **[^](#cite_ref-18)** [Interview with Mercedes Sosa](http://www.berlinonline.de/berliner-zeitung/archiv/.bin/dump.fcgi/2003/1025/magazin/0001/) [Archived](https://web.archive.org/web/20091016201953/http://www.berlinonline.de/berliner-zeitung/archiv/.bin/dump.fcgi/2003/1025/magazin/0001/) 16 October 2009 at the [Wayback Machine](/wiki/Wayback_Machine "Wayback Machine"), *Magazin Berliner Zeitung*, 25 October 2003. (in German)
19. **[^](#cite_ref-19)** [Mercedes Sosa in concert](http://chicago.cervantes.es/Cultura/Fichas/Ficha41728_47_2.htm) [Archived](https://web.archive.org/web/20080104200631/http://chicago.cervantes.es/Cultura/Fichas/Ficha41728_47_2.htm) 4 January 2008 at the [Wayback Machine](/wiki/Wayback_Machine "Wayback Machine")
20. **[^](#cite_ref-20)** Meyer, Bill (7 October 2009). ["A U.S. musician pays tribute to Mercedes Sosa"](https://www.peoplesworld.org/article/a-u-s-musician-pays-tribute-to-mercedes-sosa/). *People's World*. Retrieved 5 December 2023.
21. **[^](#cite_ref-21)** ["In Profile: Mercedes Sosa"](https://soundsandcolours.com/articles/argentina/in-profile-mercedes-sosa-1761/). *soundsandcolours.com*. 26 August 2010. Retrieved 27 March 2018.
22. **[^](#cite_ref-22)** [*Balderrama* by Mercedes Sosa](https://www.youtube.com/watch?v=k4LJDTlviKw) on [YouTube](/wiki/YouTube_video_(identifier) "YouTube video (identifier)") – a tribute to [Che Guevara](/wiki/Che_Guevara "Che Guevara")
23. **[^](#cite_ref-23)** ["Latin Grammys: Ganadores – Años Anteriores (2000)"](https://www.latingrammy.com/es/nominees/search?artist=&field_nominee_work_value=&year=2000&genre=All). *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
24. **[^](#cite_ref-24)** ["Latin Grammys: Ganadores – Años Anteriores (2003)"](https://www.latingrammy.com/es/nominees/search?artist=&field_nominee_work_value=&year=2003&genre=All). *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
25. **[^](#cite_ref-25)** ["Latin Grammys: Ganadores – Años Anteriores (2006)"](https://www.latingrammy.com/es/nominees/search?artist=&field_nominee_work_value=&year=2006&genre=All). *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
26. **[^](#cite_ref-26)** ["Latin Grammys: Ganadores – Años Anteriores (2009)"](https://www.latingrammy.com/es/nominees/search?artist=&field_nominee_work_value=&year=2009&genre=All). *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
27. **[^](#cite_ref-27)** ["Latin Grammys: Ganadores – Años Anteriores (2011)"](https://www.latingrammy.com/es/nominees/search?artist=&field_nominee_work_value=&year=2011&genre=All). *Latin Grammys* (in Spanish). The Latin Recording Academy. Retrieved 7 July 2021.
28. **[^](#cite_ref-28)** ["Premios Konex 1995: Música Popular"](https://www.fundacionkonex.org/premios1995-musica-popular). *Fundación Konex* (in Spanish). Retrieved 7 July 2021.
29. **[^](#cite_ref-29)** [""En ningún momento sufrió", dijo el hijo de Mercedes Sosa"](https://web.archive.org/web/20091004144610/http://www.clarin.com/diario/2009/10/01/um/m-02010016.htm) (in Spanish). October 2009. Archived from [the original](http://www.clarin.com/diario/2009/10/01/um/m-02010016.htm) on 4 October 2009. Retrieved 1 October 2009.
30. ^ [***a***](#cite_ref-Argentine_singer_Mercedes_Sosa,_'voice_of_Latin_America,'_dies_at_74_30-0) [***b***](#cite_ref-Argentine_singer_Mercedes_Sosa,_'voice_of_Latin_America,'_dies_at_74_30-1) [***c***](#cite_ref-Argentine_singer_Mercedes_Sosa,_'voice_of_Latin_America,'_dies_at_74_30-2) Javier Doberti (4 October 2009). ["Argentine singer Mercedes Sosa, 'voice of Latin America,' dies at 74"](http://edition.cnn.com/2009/SHOWBIZ/Music/10/04/obit.mercedes.sosa/). [CNN](/wiki/CNN "CNN"). Retrieved 5 October 2009.
31. **[^](#cite_ref-Argentine_folk_legend_Mercedes_Sosa_dead_at_74_31-0)** ["Argentine folk legend Mercedes Sosa dead at 74"](http://www.bangkokpost.com/news/world/156066/argentine-folk-legend-mercedes-sosa-dead-at-74). *[Bangkok Post](/wiki/Bangkok_Post "Bangkok Post")*. 4 October 2009. Retrieved 5 October 2009.
32. ^ [***a***](#cite_ref-Argentine_folk_icon_Sosa_dies_at_74_32-0) [***b***](#cite_ref-Argentine_folk_icon_Sosa_dies_at_74_32-1) ["Argentine folk icon Sosa dies at 74"](http://english.aljazeera.net/news/americas/2009/10/200910421057755554.html#). [Al Jazeera](/wiki/Al_Jazeera_Arabic "Al Jazeera Arabic"). 4 October 2009. Retrieved 5 October 2009.
33. **[^](#cite_ref-33)** ["Continúa la procesión en el Congreso para despedir a Mercedes Sosa"](http://www.lanacion.com.ar/nota.asp?nota_id=1182377&pid=7460859&toi=6255).
34. ^ [***a***](#cite_ref-Argentine_singer_Mercedes_Sosa_dies_at_74_34-0) [***b***](#cite_ref-Argentine_singer_Mercedes_Sosa_dies_at_74_34-1) Helen Popper (4 October 2009). ["Argentine singer Mercedes Sosa dies at 74"](https://web.archive.org/web/20091011000305/http://in.reuters.com/article/hollywood/idINTRE5931S220091004). *[Reuters](/wiki/Reuters "Reuters")*. Archived from [the original](http://in.reuters.com/article/hollywood/idINTRE5931S220091004) on 11 October 2009. Retrieved 5 October 2009.
35. **[^](#cite_ref-35)** ["Celebrating Mercedes Sosa"](https://doodles.google/doodle/celebrating-mercedes-sosa/). *Doodles Archive, Google*. 31 January 2019.
36. **[^](#cite_ref-36)** ["The 200 Greatest Singers of All Time"](https://www.rollingstone.com/music/music-lists/best-singers-all-time-1234642307/mercedes-sosa-1234642917/). *[Rolling Stone](/wiki/Rolling_Stone "Rolling Stone")*. 1 January 2023. Retrieved 9 March 2023.

External links
--------------

Wikiquote has quotations related to ***[Mercedes Sosa](https://en.wikiquote.org/wiki/Special:Search/Mercedes_Sosa "q:Special:Search/Mercedes Sosa")***.

Wikimedia Commons has media related to [Mercedes Sosa](https://commons.wikimedia.org/wiki/Category:Mercedes_Sosa "commons:Category:Mercedes Sosa").

* [Tribute to Mercedes Sosa](https://web.archive.org/web/20120913073615/http://tragaseushow.com.br/8-tributo-a-mercedes-sosa-em-porto-alegre) (in Portuguese BR)
* [Mercedes Sosa's website](http://www.mercedessosa.com.ar/) (in Spanish)
* [Mercedes Sosa's News](http://noticiasmercedessosa.blogspot.com/) (in Spanish)
* [Mercedes Sosa](https://www.imdb.com/name/nm0815302/) at [IMDb](/wiki/IMDb_(identifier) "IMDb (identifier)")
* [Mercedes Sosa's Discography](https://www.discogs.com/artist/333361-Mercedes-Sosa) on [Discogs.com](https://www.discogs.com/)

In [100]:
md_page = get_wikipedia_markdown(
    "Malko_Competition",
    lang='en',
    ignore_references= False,
    ignore_links= False)


display(Markdown(md_page)) 

International competition for young conductors

The **Malko Competition** is an international competition for young [conductors](/wiki/Conducting "Conducting"). It is held every three years by the [Danish Radio Symphony Orchestra](/wiki/Danish_Radio_Symphony_Orchestra "Danish Radio Symphony Orchestra"), to commemorate its founding conductor, [Nicolai Malko](/wiki/Nicolai_Malko "Nicolai Malko").

Recipients
----------

| Year | Recipient | Lifetime | Nationality | Notes |
| --- | --- | --- | --- | --- |
| 1965 | Ralf Weikert | b. 1940 | Austria | |
| 1968 | Avi Ostrowsky | b. 1939 | Israel | |
| 1971 | Winston Dan Vogel | b. 1943 | United States | |
| 1974 | Gotthard Lienicke | | | |
| 1977 | Philip Barry Greenberg | | United States | |
| 1980 | Maximiano Valdés | b. 1949 | Chile | |
| 1983 | Claus Peter Flor | b. 1953 | East Germany | |
| 1986 | Kazufumi Yamashita | b. 1961 | Japan | |
| 1989 | Fabio Mechetti | b. 1957 | Brazil | |
| 1992 | Jin Wang | b. 1960 | Austria | |
| 1995 | Jan Wagner | | Venezuela | |
| 1998 | Seikyo Kim | b. 1970 | Japan | |
| 2001 | Josep Caballé Domenech | b. 1973 | Spain | |
| 2005 | Mei-Ann Chen | b. 1973 | United States | |
| 2009 | Joshua Weilerstein | b. 1987 | United States | |
| 2012 | Rafael Payare | b. 1980 | Venezuela | |
| 2015 | Tung-Chieh Chuang | b. 1982 | Taiwan | |
| 2018 | Ryan Bancroft | b. 1989 | United States | |
| 2021 | Dmitry Matvienko | b. 1990 | Belarus | |
| 2024 | Samuel Seungwon Lee | b. 1990 | South Korea | |

Notes
-----

1. **[^](#cite_ref-8)** No first prize was awarded in 2001, and Caballé-Domenech was appointed the highest (2nd) prize.

References
----------

1. **[^](#cite_ref-1)** ["Denmark's top orchestra plays"](https://www.newspapers.com/image/857915887/). *[Columbus Ledger-Enquirer](/wiki/Columbus_Ledger-Enquirer "Columbus Ledger-Enquirer")*. Vol. 165, no. 313 (Final ed.). April 9, 1993. p. B-1.
2. **[^](#cite_ref-2)**  Written at [Copenhagen](/wiki/Copenhagen "Copenhagen"). ["Award to Greenberg"](https://www.newspapers.com/image/98892785/). *[Detroit Free Press](/wiki/Detroit_Free_Press "Detroit Free Press")*. Vol. 147, no. 12 (metro ed.). [Detroit](/wiki/Detroit "Detroit"). [Associated Press](/wiki/Associated_Press "Associated Press"). May 16, 1977. p. 16-B.
3. **[^](#cite_ref-3)**  Written at [Copenhagen](/wiki/Copenhagen "Copenhagen"). ["Chilean named top conductor"](https://www.newspapers.com/image/349883195/). *[The Montana Standard](/wiki/The_Montana_Standard "The Montana Standard")*. Vol. 104, no. 356. [Butte, Montana](/wiki/Butte,_Montana "Butte, Montana"). [Associated Press](/wiki/Associated_Press "Associated Press"). May 21, 1980. p. 2.
4. **[^](#cite_ref-4)** ["Japanese Maestro Top Prize Winner"](https://www.latimes.com/archives/la-xpm-1986-07-01-ca-843-story.html). *[Los Angeles Times](/wiki/Los_Angeles_Times "Los Angeles Times")*. July 1, 1986. Retrieved August 9, 2012.
5. **[^](#cite_ref-5)** MacMillan, Kyle (February 3, 1994). ["Brazilian Is Faithful to Composers"](https://www.newspapers.com/image/891334962/). *[Omaha World-Herald](/wiki/Omaha_World-Herald "Omaha World-Herald")*. Vol. 129. pp. 31–32.
6. **[^](#cite_ref-6)** ["Hot conductor"](https://www.newspapers.com/image/639618099/). the ticket. *[The Miami Herald](/wiki/The_Miami_Herald "The Miami Herald")*. Vol. 85, no. 288 (Palm Beach ed.). September 14, 1995. p. 7E.
7. **[^](#cite_ref-7)** ["ARTS & ENTERTAINMENT IN BRIEF 21/7"](https://web.archive.org/web/20100925135423/http://www.lookatvietnam.com/2010/07/arts-entertainment-in-brief-217.html). *Look at Vietnam*. July 21, 2010. Archived from [the original](http://www.lookatvietnam.com/2010/07/arts-entertainment-in-brief-217.html) on September 25, 2010. Retrieved August 9, 2012.
8. **[^](#cite_ref-9)** Johnson, Lawrence A. (4 August 2010). ["Mei-Ann Chen named music director of the Chicago Sinfonietta"](http://chicagoclassicalreview.com/2010/08/mei-ann-chen-named-music-director-of-the-chicago-sinfonietta/). *Chicago Classical Review*. Chicago. Retrieved 17 December 2017.
9. **[^](#cite_ref-10)** Eriksen, Jon Bonde (1 May 2015). ["Former winner: Malko was the start of my conducting career"](https://www.dr.dk/Temaer/Malko/English/Articles/2015/0501110524.htm). *dr.dk*. Retrieved 17 December 2017.
10. **[^](#cite_ref-11)** Mellor, Andrew (14 May 2012). ["Venezuelan Rafael Payare wins Malko Competition"](http://www.gramophone.co.uk/classical-music-news/venezuelan-rafael-payare-wins-malko-competition). *Gramophone*. Haymarket Media Group. Retrieved 9 August 2012.
11. **[^](#cite_ref-12)** ["Tung-Chieh Chuang er vinder af Malko Konkurrencen 2015"](https://www.dr.dk/presse/tung-chieh-chuang-er-vinder-af-malko-konkurrencen-2015). *DR* (in Danish). 1 May 2015.
12. **[^](#cite_ref-13)** ["28-årige Ryan tager 1. plads i stor dansk musikkonkurrence: Nu vil jeg fejre det med en middag!"](https://www.dr.dk/nyheder/kultur/klassisk/28-aarige-ryan-tager-1-plads-i-stor-dansk-musikkonkurrence-nu-vil-jeg-fejre). *DR* (in Danish). Retrieved 28 April 2018.
13. **[^](#cite_ref-14)** ["Congratulations to the winners of the Malko competition 2021!"](https://malkocompetition.dk/). *Malko Competition*. Retrieved 12 June 2021.

External links
--------------

* [Classical music portal](/wiki/Portal:Classical_music "Portal:Classical music")

[Official website](http://malkocompetition.dk/)

## Transcribe mp3s

In [104]:
import os
# import wikipediaapi
from markdownify import markdownify as md
from smolagents import tool, LiteLLMModel
import whisper

from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import JSONFormatter

import base64
import mimetypes 
import requests # Keep for consistency, though not used for fetching image in this version
import os # Added for os.path.join

import re
from bs4 import BeautifulSoup, Tag, Comment

@tool
def transcribe_mp3(mp3_path: str, model_size: str = "base") -> str:
    """
    Transcribe an MP3 file to text using Whisper.

    Args:
        mp3_path (str): Path to the MP3 file.
        model_size (str): Whisper model size (tiny, base, small, medium, large).

    Returns:
        str: Transcribed text.
    """
    transcription_path = mp3_path.replace(".mp3", "_transcript.txt")

    # Check if transcription already exists
    if os.path.exists(transcription_path):
        with open(transcription_path, 'r', encoding='utf-8') as f:
            return f.read()

    # Load model
    model = whisper.load_model(model_size)

    # Transcribe
    result = model.transcribe(mp3_path)

    transcription = result["text"]

    # Save transcription to file
    with open(transcription_path, 'w', encoding='utf-8') as f:
        f.write(transcription)

    # Return the text
    return transcription

In [110]:
mp3_path = "downloaded_files/1f975693-876d-457b-a649-393859e79bf3.mp3"  
mp3_path = "downloaded_files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3" 

In [111]:
transcript = transcribe_mp3(mp3_path)

In [112]:
transcript

' In a saucepan, combine ripe strawberries, granulated sugar, freshly squeezed lemon juice and cornstarch. Cook the mixture over medium heat, stirring constantly until it thickens to a smooth consistency. Remove from heat and stir in a dash of pure vanilla extract. Allow the strawberry pie feeling to cool before using it as a delicious and fruity filling for your pie crust.'

## Describe images

In [133]:
@tool
def describe_image_file(local_image_path: str) -> str:
    """
    Describe the contents of a local image file in detail and return the description as text.
    Args:
        local_image_path (str): The path to the local image file to be described.
    Returns:
        str: A detailed description of the image contents.
    """

    file_stem, _ = os.path.splitext(local_image_path)
    description_path = file_stem + "_descript.txt"

    # Check if description already exists
    if os.path.exists(description_path):
        with open(description_path, 'r', encoding='utf-8') as f:
            return f.read()

    model = LiteLLMModel(
        model_id='ollama/gemma3:27b',
        api_base="https://192.168.5.217:8000",  # replace with remote open-ai compatible server if necessary
        api_key=os.getenv("OLLAMA_REVPROXY_SRVML"),
        num_ctx=16384,  # ollama default is 2048 which will often fail horribly. 8192 works for easy tasks, more is better. Check https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator to calculate how much VRAM this will need for the selected model
        ssl_verify=False,  # Explicitly disable SSL verification
        extra_headers={
            "Authorization": f"Bearer {os.getenv('OLLAMA_REVPROXY_SRVML')}",  # Explicitly set auth header
        },
        flatten_messages_as_text = False
    )
    
    text_prompt = "What is in this image? Describe it in detail."

    try:

        if not os.path.exists(local_image_path):
            raise FileNotFoundError(f"Image file not found at {local_image_path}. Please ensure it was downloaded correctly.")

        # 1. Read the image content from the local file
        with open(local_image_path, "rb") as image_file:
            image_content_bytes = image_file.read()

        # 2. Base64 encode the image content
        base64_image_bytes = base64.b64encode(image_content_bytes)
        base64_image_string = base64_image_bytes.decode('utf-8')

        # 3. Set MIME type based on file extension
        if local_image_path.lower().endswith('.png'):
            content_type = 'image/png'
        elif local_image_path.lower().endswith('.jpg') or local_image_path.lower().endswith('.jpeg'):
            content_type = 'image/jpeg'
        elif local_image_path.lower().endswith('.gif'):
            content_type = 'image/gif'
        elif local_image_path.lower().endswith('.bmp'):
            content_type = 'image/bmp'
        elif local_image_path.lower().endswith('.webp'):
            content_type = 'image/webp'
        else:
            content_type = mimetypes.guess_type(local_image_path)[0] or 'application/octet-stream'
        print(f"Using specified MIME type: {content_type}")

        # 4. Construct the data URI
        data_uri = f"data:{content_type};base64,{base64_image_string}"

        # Construct the messages payload
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": text_prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": data_uri  # Use the base64 data URI here
                        }
                    }
                ]
            }
        ]

        # Assuming 'model' is your LiteLLMModel instance initialized in a previous cell (e.g., cell 'dfc845ab')
        if 'model' not in locals():
            raise NameError("Variable 'model' is not defined. Please run the cell that initializes the LiteLLMModel.")
            
        response = model.generate(messages)

        description_text = response.content

        # 5. Save the description to a file
        with open(description_path, 'w', encoding='utf-8') as f:
            f.write(description_text)
            
        return description_text

    except FileNotFoundError as fnf_err:
        print(f"File error: {fnf_err}")
        raise FileNotFoundError(f"Image file not found at {local_image_path}. Please ensure it was downloaded correctly.")
    except NameError as ne:
        print(f"A required variable might not be defined (e.g., filename, model): {ne}")
        print("Please ensure the cells defining these variables have been run.")
        raise NameError(f"Variable 'model' is not defined. Please run the cell that initializes the LiteLLMModel.")
    except Exception as e:
        print(f"An error occurred: {e}")
        raise Exception(f"An error occurred while processing the image: {e}")


In [134]:
image_description = describe_image_file("downloaded_files/cca530fc-4052-43b2-b130-b30968d8aa44.png") 

image_description

Using specified MIME type: image/png


"Here's a detailed description of the image:\n\n**The image shows a chessboard in a mid-game position.** The board is a standard 8x8 grid with alternating green and beige squares.\n\n**Pieces:**\n\n*   **White:**\n    *   King (K) on h1\n    *   Rook (R) on e3\n    *   Bishop (B) on d3\n    *   Knight (N) on d4\n    *   Queen (Q) on h5\n    *   Pawns (P) on a3, b3, g2, h2, a7, b7, g7, h7\n*   **Black:**\n    *   King (K) on g8\n    *   Rook (R) on e8\n    *   Bishop (B) on f6\n    *   Knight (N) on f5\n    *   Queen (Q) on c3\n    *   Pawns (P) on a6, b6, c6, d6, e6, f7, g7, h7\n\n**Position:**\n\nThe pieces are arranged in a complex position, suggesting an ongoing game. There are various potential attacks and defenses. The black queen, positioned at c3, appears to be a threat.\n\n**Overall:**\n\nThe image shows a snapshot of a chess game in progress, offering a tactical challenge for chess players."

## Youtube video transcribe

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import JSONFormatter

def get_youtube_video_transcript(video_id: str) -> str:
    """
    Fetches the transcript of a YouTube video by its ID and returns it in JSON format.
    The video ID can be found in the YouTube video URL:
    https://www.youtube.com/watch?v=VIDEO_ID, where VIDEO_ID is the part after "v=".
    example: for the url https://www.youtube.com/watch?v=L1vXCYZAYYM the video_id is "L1vXCYZAYYM".

    Args:
        video_id (str): The YouTube video ID.
    Returns:
        str: The transcript in JSON format.
    """
    
    ytt_api = YouTubeTranscriptApi()
    transcript = ytt_api.fetch(video_id)

    formatter = JSONFormatter()

    # .format_transcript(transcript) turns the transcript into a JSON string.
    json_formatted = formatter.format_transcript(transcript)
    return json_formatted

In [None]:
video_id = '1htKBjuUWec'

# ytt_api = YouTubeTranscriptApi()
# transcript = ytt_api.fetch(video_id)

ytt_api = YouTubeTranscriptApi()
transcript = ytt_api.fetch(video_id)

formatter = JSONFormatter()

# .format_transcript(transcript) turns the transcript into a JSON string.
json_formatted = formatter.format_transcript(transcript)


In [None]:
json_formatted

'[{"text": "Wow this coffee\'s great I was just", "start": 0.03, "duration": 5.39}, {"text": "thinking that", "start": 3.84, "duration": 6.799}, {"text": "yeah is that cinnamon chicory", "start": 5.42, "duration": 5.219}, {"text": "tea oak", "start": 17.72, "duration": 3.09}, {"text": "[Music]", "start": 21.54, "duration": 3.12}, {"text": "isn\'t that hot", "start": 24.68, "duration": 5.04}, {"text": "extremely", "start": 26.72, "duration": 3.0}]'

# TESTS

In [9]:
import shutil
import os
import json
from smolagents import CodeAgent, LiteLLMModel, DuckDuckGoSearchTool, HfApiModel,VisitWebpageTool, FinalAnswerTool
# import datetime
import requests
from smolagents.tools import Tool
from typing import Any

from tools import get_text_from_ascii_file, get_wikipedia_markdown, transcribe_mp3, describe_image_file, get_youtube_video_transcript


In [3]:
model = LiteLLMModel(
                model_id='ollama/devstral:24b',
                # model_id="ollama/cogito:14b",
                # model_id='ollama/qwen3:32b',
                # model_id='ollama/gemma3:27b', 
                # model_id='ollama/qwen2.5-coder:32b-instruct-q4_K_M', 
                api_base="https://192.168.5.217:8000",  # replace with remote open-ai compatible server if necessary
                api_key=os.getenv("OLLAMA_REVPROXY_SRVML"),
                num_ctx=16384,  # ollama default is 2048 which will often fail horribly. 8192 works for easy tasks, more is better. Check https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator to calculate how much VRAM this will need for the selected model
                ssl_verify=False,  # Explicitly disable SSL verification
                extra_headers={
                    "Authorization": f"Bearer {os.getenv('OLLAMA_REVPROXY_SRVML')}",  # Explicitly set auth header
                },
                flatten_messages_as_text = False,
                timeout=900  # seconds, default is 600 seconds, set to 15 minutes to allow for longer tasks
            )

In [4]:
tools = [
            get_wikipedia_markdown, get_text_from_ascii_file, transcribe_mp3,
            describe_image_file, get_youtube_video_transcript,
            DuckDuckGoSearchTool(),   # Web search (main retrieval)
            VisitWebpageTool(),       # Optional: visit page if needed (sometimes helps)
            FinalAnswerTool(),        # Needed for FINAL ANSWER output
        ]

In [5]:
# if os.path.exists("agent_prompt_templates.json"):
#     with open("agent_prompt_templates.json", "r") as f:
#         prompt_templates = json.load(f)
# else:
prompt_templates = {'system_prompt': 'You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.\nTo do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.\nTo solve the task, you must plan forward to proceed in a series of steps, in a cycle of \'Thought:\', \'Code:\', and \'Observation:\' sequences.\n\nAt each step, in the \'Thought:\' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.\nThen in the \'Code:\' sequence, you should write the code in simple Python. The code sequence must end with \'<end_code>\' sequence.\nDuring each intermediate step, you can use \'print()\' to save whatever important information you will then need.\nThese print outputs will then appear in the \'Observation:\' field, which will be available as input for the next step.\nIn the end you have to return a final answer using the `final_answer` tool with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don\'t use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don\'t use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \n\nHere are a few examples using notional tools:\n---\nTask: "What is the result of the following operation: 5 + 3 + 1294.678?"\n\nThought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool\nCode:\n```py\nresult = 5 + 3 + 1294.678\nfinal_answer(print(f"FINAL ANSWER: {result}"))\n```<end_code>\n\n---\nTask:\n"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French.\nYou have been provided with these additional arguments, that you can access using the keys as variables in your python code:\n{\'question\': \'Quel est l\'animal sur l\'image?\', \'image\': \'path/to/image.jpg\'}"\n\nThought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.\nCode:\n```py\ntranslated_question = translator(question=question, src_lang="French", tgt_lang="English")\nprint(f"The translated question is {translated_question}.")\nanswer = image_qa(image=image, question=translated_question)\nfinal_answer(f"FINAL ANSWER {answer}")\n```<end_code>\n\n---\nTask:\nIn a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.\nWhat does he say was the consequence of Einstein learning too much math on his creativity, in one word?\n\nThought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.\nCode:\n```py\npages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")\nprint(pages)\n```<end_code>\nObservation:\nNo result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".\n\nThought: The query was maybe too restrictive and did not find any results. Let\'s try again with a broader query.\nCode:\n```py\npages = search(query="1979 interview Stanislaus Ulam")\nprint(pages)\n```<end_code>\nObservation:\nFound 6 pages:\n[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)\n\n[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)\n\n(truncated)\n\nThought: I will read the first 2 pages to know more.\nCode:\n```py\nfor url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:\n    whole_page = visit_webpage(url)\n    print(whole_page)\n    print("\\n" + "="*80 + "\\n")  # Print separator between pages\n```<end_code>\nObservation:\nManhattan Project Locations:\nLos Alamos, NM\nStanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at\n(truncated)\n\nThought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let\'s answer in one word.\nCode:\n```py\nfinal_answer("FINAL ANSWER diminished")\n```<end_code>\n\n---\nTask: "Which city has the highest population: Guangzhou or Shanghai?"\n\nThought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.\nCode:\n```py\nfor city in ["Guangzhou", "Shanghai"]:\n    print(f"Population {city}:", search(f"{city} population")\n```<end_code>\nObservation:\nPopulation Guangzhou: [\'Guangzhou has a population of 15 million inhabitants as of 2021.\']\nPopulation Shanghai: \'26 million (2019)\'\n\nThought: Now I know that Shanghai has the highest population.\nCode:\n```py\nfinal_answer("FINAL ANSWER Shanghai")\n```<end_code>\n\n---\nTask: "What is the current age of the pope, raised to the power 0.36?"\n\nThought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search.\nCode:\n```py\npope_age_wiki = wiki(query="current pope age")\nprint("Pope age as per wikipedia:", pope_age_wiki)\npope_age_search = web_search(query="current pope age")\nprint("Pope age as per google search:", pope_age_search)\n```<end_code>\nObservation:\nPope age: "The pope Francis is currently 88 years old."\n\nThought: I know that the pope is 88 years old. Let\'s compute the result using python code.\nCode:\n```py\npope_current_age = 88 ** 0.36\nfinal_answer(f"FINAL ANSWER {pope_current_age}")\n```<end_code>\n\nAbove example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools, behaving like regular python functions:\n```python\n{%- for tool in tools.values() %}\ndef {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:\n    """{{ tool.description }}\n\n    Args:\n    {%- for arg_name, arg_info in tool.inputs.items() %}\n        {{ arg_name }}: {{ arg_info.description }}\n    {%- endfor %}\n    """\n{% endfor %}\n```\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is \'task\'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n```python\n{%- for agent in managed_agents.values() %}\ndef {{ agent.name }}("Your query goes here.") -> str:\n    """{{ agent.description }}"""\n{% endfor %}\n```\n{%- endif %}\n\nHere are the rules you should always follow to solve your task:\n1. Always provide a \'Thought:\' sequence, and a \'Code:\\n```py\' sequence ending with \'```<end_code>\' sequence, else you will fail.\n2. Use only variables that you have defined!\n3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in \'answer = wiki({\'query\': "What is the place where James Bond lives?"})\', but use the arguments directly as in \'answer = wiki(query="What is the place where James Bond lives?")\'.\n4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.\n5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.\n6. Don\'t name any new variable with the same name as a tool: for instance don\'t name a variable \'final_answer\'.\n7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.\n8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}\n9. The state persists between code executions: so if in one step you\'ve created variables or imported modules, these will all persist.\n10. Don\'t give up! You\'re in charge of solving the task, not providing directions to solve it.\n\nNow Begin!',
    'planning': {'initial_plan': 'You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task.\nBelow I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task.\n\n## 1. Facts survey\nYou will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.\nThese "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:\n### 1.1. Facts given in the task\nList here the specific facts given in the task that could help you (there might be nothing here).\n\n### 1.2. Facts to look up\nList here any facts that we may need to look up.\nAlso list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.\n\n### 1.3. Facts to derive\nList here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.\n\nDon\'t make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above.\n\n## 2. Plan\nThen for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the \'\\n<end_plan>\' tag and stop there.\n\nYou can leverage these tools, behaving like regular python functions:\n```python\n{%- for tool in tools.values() %}\ndef {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:\n    """{{ tool.description }}\n\n    Args:\n    {%- for arg_name, arg_info in tool.inputs.items() %}\n        {{ arg_name }}: {{ arg_info.description }}\n    {%- endfor %}\n    """\n{% endfor %}\n```\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is \'task\'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n```python\n{%- for agent in managed_agents.values() %}\ndef {{ agent.name }}("Your query goes here.") -> str:\n    """{{ agent.description }}"""\n{% endfor %}\n```\n{%- endif %}\n\n---\nNow begin! Here is your task:\n```\n{{task}}\n```\nFirst in part 1, write the facts survey, then in part 2, write your plan.',
    'update_plan_pre_messages': 'You are a world expert at analyzing a situation, and plan accordingly towards solving a task.\nYou have been given the following task:\n```\n{{task}}\n```\n\nBelow you will find a history of attempts made to solve this task.\nYou will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task.\nIf the previous tries so far have met some success, your updated plan can build on these results.\nIf you are stalled, you can make a completely new plan starting from scratch.\n\nFind the task and history below:',
    'update_plan_post_messages': 'Now write your updated facts below, taking into account the above history:\n## 1. Updated facts survey\n### 1.1. Facts given in the task\n### 1.2. Facts that we have learned\n### 1.3. Facts still to look up\n### 1.4. Facts still to derive\n\nThen write a step-by-step high-level plan to solve the task above.\n## 2. Plan\n### 2. 1. ...\nEtc.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nBeware that you have {remaining_steps} steps remaining.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the \'\\n<end_plan>\' tag and stop there.\n\nYou can leverage these tools, behaving like regular python functions:\n```python\n{%- for tool in tools.values() %}\ndef {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:\n    """{{ tool.description }}\n\n    Args:\n    {%- for arg_name, arg_info in tool.inputs.items() %}\n        {{ arg_name }}: {{ arg_info.description }}\n    {%- endfor %}"""\n{% endfor %}\n```\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is \'task\'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n```python\n{%- for agent in managed_agents.values() %}\ndef {{ agent.name }}("Your query goes here.") -> str:\n    """{{ agent.description }}"""\n{% endfor %}\n```\n{%- endif %}\n\nNow write your updated facts survey below, then your new plan.'},
    'managed_agent': {'task': "You're a helpful agent named '{{name}}'.\nYou have been submitted this task by your manager.\n---\nTask:\n{{task}}\n---\nYou're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.\n\nYour final_answer WILL HAVE to contain these parts:\n### 1. Task outcome (short version):\n### 2. Task outcome (extremely detailed version):\n### 3. Additional context (if relevant):\n\nPut all these in your final_answer tool, everything that you do not pass as an argument to final_answer will be lost.\nAnd even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback.",
    'report': "Here is the final answer from your managed agent '{{name}}':\n{{final_answer}}"},
    'final_answer': {'pre_messages': "An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don\'t use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don\'t use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Here is the agent's memory:",
    'post_messages': 'Based on the above, please provide an answer to the following user task:\n{{task}}'}}

prompt_templates

{'system_prompt': 'You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.\nTo do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.\nTo solve the task, you must plan forward to proceed in a series of steps, in a cycle of \'Thought:\', \'Code:\', and \'Observation:\' sequences.\n\nAt each step, in the \'Thought:\' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.\nThen in the \'Code:\' sequence, you should write the code in simple Python. The code sequence must end with \'<end_code>\' sequence.\nDuring each intermediate step, you can use \'print()\' to save whatever important information you will then need.\nThese print outputs will then appear in the \'Observation:\' field, which will be available as input for the next step.\nIn the end you have to return a final answer using the `final_ans

In [6]:
agent = CodeAgent(
            tools=tools,
            model=model,
            prompt_templates = prompt_templates,
            max_steps=10,                 # should be enough, first guess --> to check
            planning_interval=3,          # should be enough, first guess --> to check
            verbosity_level=2,      # 0: no output, 1: only errors, 2: all outputs  
            additional_authorized_imports=["datetime", "numpy", "requests", "json", "re", 
                                           "bs4", "pandas", "lxml", "pymupdf", "openpyxl", 
                                           "scipy", "PIL", "cv2"],                 
        )

In [7]:
# get environment variable HF_API_TOKEN
import os
from huggingface_hub import login
login(token=os.getenv("HF_API_TOKEN", None))

In [10]:
# 2. Fetch Questions
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"


print(f"Fetching questions from: {questions_url}")
try:
    response = requests.get(questions_url, timeout=15)
    response.raise_for_status()
    questions_data = response.json()
    if not questions_data:
            print("Fetched questions list is empty.")

    print(f"Fetched {len(questions_data)} questions.")
except requests.exceptions.RequestException as e:
    print(f"Error fetching questions: {e}")

except requests.exceptions.JSONDecodeError as e:
        print(f"Error decoding JSON response from questions endpoint: {e}")
        print(f"Response text: {response.text[:500]}")

except Exception as e:
    print(f"An unexpected error occurred fetching questions: {e}")

results_log = []
answers_payload = []
print(f"Running agent on {len(questions_data)} questions...")

Fetching questions from: https://agents-course-unit4-scoring.hf.space/questions
Fetched 20 questions.
Running agent on 20 questions...


In [11]:
# 3. Run your Agent
os.makedirs("downloaded_files", exist_ok=True)
results_log = []
answers_payload = []
print(f"Running agent on {len(questions_data)} questions...")
# for item in questions_data:


item = questions_data[19] 

task_id = item.get("task_id")
question_text = item.get("question")

question_text

Running agent on 20 questions...


'What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?'

In [150]:


# Check if task_id and question_text are present
filename = item.get("file_name")
if filename and not os.path.exists("downloaded_files/"+filename):
    file_url = f"{api_url}/files/{task_id}"
    print(f"Attempting to download file from: {file_url}")

    try:
        response = requests.get(file_url, timeout=30) # Increased timeout
        response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)

        with open("downloaded_files/"+filename, "wb") as f:
            f.write(response.content)

    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
        print(f"Response content (first 500 chars): {response.text[:500]}")
    except requests.exceptions.ConnectionError as conn_err:
        print(f"Connection error occurred: {conn_err}")
    except requests.exceptions.Timeout as timeout_err:
        print(f"Timeout error occurred: {timeout_err}")
    except requests.exceptions.RequestException as req_err:
        print(f"An unexpected error occurred during the request: {req_err}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
if filename:
    question_text += f" (file: downloaded_files/{filename})"

if not task_id or question_text is None:
    print(f"Skipping item with missing task_id or question: {item}")

try:
    submitted_answer = agent(question_text)
    answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
    results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
        print(f"Error running agent on task {task_id}: {e}")
        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})


In [151]:
submitted_answer

"Here is the final answer from your managed agent 'None':\n### 1. Task outcome (short version):\nClaus\n\n### 2. Task outcome (extremely detailed version):\nThe only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists is Claus Peter Flor, who won in 1983 and had East German nationality. East Germany ceased to exist in 1990 when it was reunified with West Germany.\n\n### 3. Additional context:\nNone"

### different prompt template

In [18]:
prompt_templates = {'system_prompt': 'You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.\nTo do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.\nTo solve the task, you must plan forward to proceed in a series of steps, in a cycle of \'Thought:\', \'Code:\', and \'Observation:\' sequences.\n\nAt each step, in the \'Thought:\' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.\nThen in the \'Code:\' sequence, you should write the code in simple Python. The code sequence must end with \'<end_code>\' sequence.\nDuring each intermediate step, you can use \'print()\' to save whatever important information you will then need.\nThese print outputs will then appear in the \'Observation:\' field, which will be available as input for the next step.\nIn the end you have to return a final answer using the `final_answer` tool with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don\'t use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don\'t use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \n\nHere are a few examples using notional tools:\n---\nTask: "What is the result of the following operation: 5 + 3 + 1294.678?"\n\nThought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool\nCode:\n```py\nresult = 5 + 3 + 1294.678\nfinal_answer(print(f"FINAL ANSWER: {result}"))\n```<end_code>\n\n---\nTask:\n"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French.\nYou have been provided with these additional arguments, that you can access using the keys as variables in your python code:\n{\'question\': \'Quel est l\'animal sur l\'image?\', \'image\': \'path/to/image.jpg\'}"\n\nThought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.\nCode:\n```py\ntranslated_question = translator(question=question, src_lang="French", tgt_lang="English")\nprint(f"The translated question is {translated_question}.")\nanswer = image_qa(image=image, question=translated_question)\nfinal_answer(f"FINAL ANSWER {answer}")\n```<end_code>\n\n---\nTask:\nIn a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.\nWhat does he say was the consequence of Einstein learning too much math on his creativity, in one word?\n\nThought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.\nCode:\n```py\npages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")\nprint(pages)\n```<end_code>\nObservation:\nNo result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".\n\nThought: The query was maybe too restrictive and did not find any results. Let\'s try again with a broader query.\nCode:\n```py\npages = search(query="1979 interview Stanislaus Ulam")\nprint(pages)\n```<end_code>\nObservation:\nFound 6 pages:\n[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)\n\n[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)\n\n(truncated)\n\nThought: I will read the first 2 pages to know more.\nCode:\n```py\nfor url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:\n    whole_page = visit_webpage(url)\n    print(whole_page)\n    print("\\n" + "="*80 + "\\n")  # Print separator between pages\n```<end_code>\nObservation:\nManhattan Project Locations:\nLos Alamos, NM\nStanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at\n(truncated)\n\nThought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let\'s answer in one word.\nCode:\n```py\nfinal_answer("FINAL ANSWER diminished")\n```<end_code>\n\n---\nTask: "Which city has the highest population: Guangzhou or Shanghai?"\n\nThought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.\nCode:\n```py\nfor city in ["Guangzhou", "Shanghai"]:\n    print(f"Population {city}:", search(f"{city} population")\n```<end_code>\nObservation:\nPopulation Guangzhou: [\'Guangzhou has a population of 15 million inhabitants as of 2021.\']\nPopulation Shanghai: \'26 million (2019)\'\n\nThought: Now I know that Shanghai has the highest population.\nCode:\n```py\nfinal_answer("FINAL ANSWER Shanghai")\n```<end_code>\n\n---\nTask: "What is the current age of the pope, raised to the power 0.36?"\n\nThought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search.\nCode:\n```py\npope_age_wiki = wiki(query="current pope age")\nprint("Pope age as per wikipedia:", pope_age_wiki)\npope_age_search = web_search(query="current pope age")\nprint("Pope age as per google search:", pope_age_search)\n```<end_code>\nObservation:\nPope age: "The pope Francis is currently 88 years old."\n\nThought: I know that the pope is 88 years old. Let\'s compute the result using python code.\nCode:\n```py\npope_current_age = 88 ** 0.36\nfinal_answer(f"FINAL ANSWER {pope_current_age}")\n```<end_code>\n\nAbove example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools, behaving like regular python functions:\n```python\n{%- for tool in tools.values() %}\ndef {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:\n    """{{ tool.description }}\n\n    Args:\n    {%- for arg_name, arg_info in tool.inputs.items() %}\n        {{ arg_name }}: {{ arg_info.description }}\n    {%- endfor %}\n    """\n{% endfor %}\n```\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is \'task\'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n```python\n{%- for agent in managed_agents.values() %}\ndef {{ agent.name }}("Your query goes here.") -> str:\n    """{{ agent.description }}"""\n{% endfor %}\n```\n{%- endif %}\n\nHere are the rules you should always follow to solve your task:\n1. Always provide a \'Thought:\' sequence, and a \'Code:\\n```py\' sequence ending with \'```<end_code>\' sequence, else you will fail.\n2. Use only variables that you have defined!\n3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in \'answer = wiki({\'query\': "What is the place where James Bond lives?"})\', but use the arguments directly as in \'answer = wiki(query="What is the place where James Bond lives?")\'.\n4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.\n5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.\n6. Don\'t name any new variable with the same name as a tool: for instance don\'t name a variable \'final_answer\'.\n7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.\n8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}\n9. The state persists between code executions: so if in one step you\'ve created variables or imported modules, these will all persist.\n10. Don\'t give up! You\'re in charge of solving the task, not providing directions to solve it.\n\nNow Begin!',
    'planning': {'initial_plan': 'You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task.\nBelow I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task.\n\n## 1. Facts survey\nYou will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.\nThese "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:\n### 1.1. Facts given in the task\nList here the specific facts given in the task that could help you (there might be nothing here).\n\n### 1.2. Facts to look up\nList here any facts that we may need to look up.\nAlso list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.\n\n### 1.3. Facts to derive\nList here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.\n\nDon\'t make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above.\n\n## 2. Plan\nThen for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the \'\\n<end_plan>\' tag and stop there.\n\nYou can leverage these tools, behaving like regular python functions:\n```python\n{%- for tool in tools.values() %}\ndef {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:\n    """{{ tool.description }}\n\n    Args:\n    {%- for arg_name, arg_info in tool.inputs.items() %}\n        {{ arg_name }}: {{ arg_info.description }}\n    {%- endfor %}\n    """\n{% endfor %}\n```\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is \'task\'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n```python\n{%- for agent in managed_agents.values() %}\ndef {{ agent.name }}("Your query goes here.") -> str:\n    """{{ agent.description }}"""\n{% endfor %}\n```\n{%- endif %}\n\n---\nNow begin! Here is your task:\n```\n{{task}}\n```\nFirst in part 1, write the facts survey, then in part 2, write your plan.',
    'update_plan_pre_messages': 'You are a world expert at analyzing a situation, and plan accordingly towards solving a task.\nYou have been given the following task:\n```\n{{task}}\n```\n\nBelow you will find a history of attempts made to solve this task.\nYou will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task.\nIf the previous tries so far have met some success, your updated plan can build on these results.\nIf you are stalled, you can make a completely new plan starting from scratch.\n\nFind the task and history below:',
    'update_plan_post_messages': 'Now write your updated facts below, taking into account the above history:\n## 1. Updated facts survey\n### 1.1. Facts given in the task\n### 1.2. Facts that we have learned\n### 1.3. Facts still to look up\n### 1.4. Facts still to derive\n\nThen write a step-by-step high-level plan to solve the task above.\n## 2. Plan\n### 2. 1. ...\nEtc.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nBeware that you have {remaining_steps} steps remaining.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the \'\\n<end_plan>\' tag and stop there.\n\nYou can leverage these tools, behaving like regular python functions:\n```python\n{%- for tool in tools.values() %}\ndef {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:\n    """{{ tool.description }}\n\n    Args:\n    {%- for arg_name, arg_info in tool.inputs.items() %}\n        {{ arg_name }}: {{ arg_info.description }}\n    {%- endfor %}"""\n{% endfor %}\n```\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is \'task\'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n```python\n{%- for agent in managed_agents.values() %}\ndef {{ agent.name }}("Your query goes here.") -> str:\n    """{{ agent.description }}"""\n{% endfor %}\n```\n{%- endif %}\n\nNow write your updated facts survey below, then your new plan.'},
    'managed_agent': {'task':(
    "You are a highly capable and autonomous agent named {{name}}, designed to solve complex tasks efficiently.\n"
    "A valued client has assigned you the following task:\n"
    "---\n"
    "Task:\n"
    "{{task}}\n"
    "---\n"
    "To complete this task successfully, follow these steps carefully:\n"
    "    1. Comprehend the task and identify the intended goal.\n"
    "    2. Break the task into clear, logical steps.\n"
    "    3. Select and prepare the tools or resources you need.\n"
    "    4. Set up the required environment or context.\n"
    "    5. Execute each step methodically.\n"
    "    6. Monitor outcomes and identify any deviations.\n"
    "    7. Revise your plan if necessary based on feedback.\n"
    "    8. Maintain internal state and track progress.\n"
    "    9. Verify that the goal has been fully achieved.\n"
    "   10. Present the final result clearly and concisely.\n"
    "If you succeed, you will be rewarded with a significant bonus.\n\n"
    "Your final_answer MUST be:\n"
    "- a number (retain its original type; do not include units),\n"
    "- a concise phrase,\n"
    "- or a comma-separated list of numbers or strings (no articles, no abbreviations).\n\n"
    "Only the content passed to the final_answer tool will be preserved. Any other content will be discarded."),
    'report': "{{final_answer}}"},
    'final_answer': {
        'pre_messages': "",
        'post_messages': ""
    }}

In [13]:
agent = CodeAgent(
            tools=tools,
            model=model,
            prompt_templates = prompt_templates,
            max_steps=10,                 # should be enough, first guess --> to check
            planning_interval=3,          # should be enough, first guess --> to check
            verbosity_level=2,      # 0: no output, 1: only errors, 2: all outputs  
            additional_authorized_imports=["datetime", "numpy", "requests", "json", "re", 
                                           "bs4", "pandas", "lxml", "pymupdf", "openpyxl", 
                                           "scipy", "PIL", "cv2"],                 
        )

In [14]:
question_text

'What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?'

In [15]:
submitted_answer = agent(question_text)