Spaces:
Sleeping
Sleeping
| """ | |
| Defines function sonnet_errors(poem, target: str) -> Dict[str, Any] | |
| which takes a target rhyme scheme (and optionally a list of required words) and returns a dict of errors | |
| Returns an empty dictionary if there are no errors, so bool(sonnet_errors(poem, target)) is False if there are no | |
| errors. It's a permissive check for sonnets errors, meaning that if it is unsure then it doesn't return an error. | |
| Specifically, | |
| * Check if it adheres to a given rhyming scheme | |
| * Check if each line has 10-11 syllables, more precisely, there's some pronounciation of each line with 10-11 syllalbes | |
| This omits a few things like rhymes and iambic pentameter. | |
| # Rhymes | |
| For rhymes, we use python `pronouncing` library based on: | |
| * CMU pronouncing dictionary http://www.speech.cs.cmu.edu/cgi-bin/cmudict | |
| # Syllable counting | |
| Given that there are multiple ways to pronounce many words (e.g. "caramel" can be pronounced with 2 or 3 syllables), | |
| we adopt a "permissive" approach and consult multiple tools for syllable counting: | |
| * pronounce - a well-known pronunciation dict based on from CMU's pronouncing dictionary | |
| * syllables - a Python library for syllable counting | |
| * pyphen - a Python wrapper for the hyphenation library | |
| """ | |
| from typing import Set, Dict, Any | |
| import re | |
| import joblib | |
| import pyphen | |
| import syllables | |
| import pronouncing | |
| ALLOWED_SYLLABLES = { | |
| 10, | |
| 11, | |
| } # about 3-4% of legit lines have 11 syllables, so we allow it, > 99% have 10 or 11 | |
| NUM_REQUIRED_WORDS = 3 | |
| memory = joblib.Memory( | |
| ".cache", verbose=0 | |
| ) # use cache to speed up repeated rhyme/syllable calls | |
| def sonnet_errors(poem: str, target: str, verbose=False) -> Dict[str, Any]: | |
| """ | |
| Checks for sonnet errors with respect to target rhyme scheme (and optional required words) | |
| args: | |
| poem: the poem to check | |
| target: the rhyme scheme, e.g. "ABBA ABBA CDC DCD" | |
| optionally target can have a list of required words, like | |
| "ABBA ABBA CDC DCD, love train snail" each of these must be in the poem | |
| verbose: if True, print out more details | |
| """ | |
| if ", " in target: | |
| scheme, rest = target.split(", ") | |
| required_words = rest.split() | |
| else: | |
| scheme = target | |
| required_words = [] | |
| errors = scheme_errors(poem, scheme, verbose=verbose) | |
| assert isinstance(errors, dict) | |
| missing_words = [w for w in required_words if w.lower() not in poem.lower()] | |
| if any(missing_words): | |
| errors["missing words"] = missing_words | |
| syllable_errors = [] | |
| for line in split_poem(poem): | |
| variations = syllable_variations(line) | |
| if not (variations & ALLOWED_SYLLABLES): | |
| syllable_errors.append((line, sorted(variations))) | |
| if syllable_errors: | |
| errors["syllable errors"] = syllable_errors | |
| return errors | |
| def clean_word(text: str): | |
| return text.lower().strip(",.!?;: \"'[]()/") | |
| def clean_line(line: str): | |
| """ | |
| Clean a line from a poem. | |
| Check if line ends with (A) or (B) ... and remove it | |
| """ | |
| line = re.sub(r"\s*\([A-Za-z]\)\s*$", "", line) | |
| return line.strip() | |
| def split_poem(poem: str, min_line_len=3): | |
| ans = [clean_line(l) for l in poem.splitlines()] | |
| return [l for l in ans if len(l) > min_line_len] | |
| def slant_rhyming_parts(word: str): | |
| consonants = set("BCDFGHJKLMNPQRSTVWXYZ") | |
| ans = [ | |
| "".join( | |
| ("R" if "R" in p else (p if p in consonants else "?")) | |
| for p in pronouncing.rhyming_part(ph).split() | |
| ) | |
| for ph in pronouncing.phones_for_word(word) | |
| ] | |
| ans = [a for a in ans if not all(i == "?" for i in a)] | |
| ans = [a.replace("?", "") + ("?" if a.endswith("?") else "") for a in ans] | |
| return set(ans) | |
| def get_rhymes(w): | |
| return set(pronouncing.rhymes(w)) | |
| def scheme_errors(poem: str, scheme: str, verbose=False): | |
| """Find errors with respect to a given rhyming scheme""" | |
| lines = split_poem(poem) | |
| scheme = scheme.replace(" ", "") | |
| if len(lines) != len(scheme): | |
| return { | |
| "line count": f"Poem has {len(lines)} != {len(scheme)} lines in pattern {scheme}" | |
| } | |
| last_words = [clean_word(l.replace("-", " ").split()[-1]) for l in lines] | |
| dictionary = pronouncing.cmudict.dict() # we ignore words not in dictionary | |
| groups = [] | |
| for chars in sorted(set(scheme)): | |
| groups.append( | |
| [w for w, p in zip(last_words, scheme) if p == chars and w in dictionary] | |
| ) | |
| slant_sets = {w: set(slant_rhyming_parts(w)) for g in groups for w in g} | |
| scores = {} | |
| if verbose: | |
| print(groups) | |
| for g in groups: | |
| internal_words = set(g) | |
| external_words = {w for h in groups if h is not g for w in h} | |
| if len(internal_words) == 1: | |
| continue # don't check rhymes if only word word in the group is in dictionary | |
| for w in g: | |
| rhymes = get_rhymes(w) | |
| scores[w] = [] | |
| for comparisons in [internal_words, external_words]: | |
| m = dict(rhymes=[], slant_rhymes=[]) | |
| scores[w].append(m) | |
| for v in comparisons: | |
| if v == w: | |
| continue | |
| if v in rhymes: | |
| m["rhymes"].append(v) | |
| elif slant_sets[v] & slant_sets[w]: | |
| m["slant_rhymes"].append(v) | |
| error_reasons = {} | |
| suspicious_reasons = {} | |
| for w in scores: | |
| internal, external = scores[w] | |
| if internal["rhymes"] or internal["slant_rhymes"]: | |
| pass # ok if it rhymes (perfect or slant) with at least one other word in the group | |
| elif len(external["rhymes"]) >= 2: | |
| error_reasons[w] = "no internal rhymes, 2+ external perfect rhymes" | |
| elif external["rhymes"]: | |
| if len(external["slant_rhymes"]) >= 2: | |
| error_reasons[ | |
| w | |
| ] = "no internal rhymes, 1 external perfect rhyme, 2+ external slant rhymes" | |
| else: | |
| suspicious_reasons[ | |
| w | |
| ] = "no internal rhymes/slant rhymes, 1 external perfect rhymes" | |
| elif len(external["slant_rhymes"]) >= 3: | |
| error_reasons[ | |
| w | |
| ] = "no internal rhymes/slant rhymes, 3+ external slant rhymes" | |
| if verbose: | |
| print(w, "internal:", internal, "external:", external) | |
| if len(error_reasons) + len(suspicious_reasons) >= 3: | |
| error_reasons.update(suspicious_reasons) | |
| return { | |
| w: { | |
| "reason": error_reasons[w], | |
| "internal": scores[w][0], | |
| "external": scores[w][1], | |
| } | |
| for w in error_reasons | |
| } | |
| def syllable_variations(text, verbose=False) -> Set[int]: | |
| """ | |
| Given a text, return the set of possible numbers of syllables. It's a set because some words like "caramel" can | |
| be pronounced with different numbers of syllables. | |
| """ | |
| ans = {0} | |
| for word in re.split("[ -]+", text): | |
| word = clean_word(word) | |
| if not word: | |
| continue | |
| options = word_syllables(word) | |
| options = range( | |
| min(options), max(options) + 1 | |
| ) # make it a range (so {2, 4} moves to [2, 3, 4]) | |
| ans = {x + y for x in ans for y in options} | |
| return ans | |
| def word_syllables(word: str) -> Set[int]: | |
| assert word == clean_word( | |
| word | |
| ), "Word should be cleaned before hitting word_syllables cache" | |
| return SyllableCounters.count_word(word) | |
| class SyllableCounters: | |
| """ | |
| Simple class to count syllables in text. | |
| """ | |
| _cmu_dict = None | |
| _pyphen_counter = None | |
| def cmu_dict(): | |
| if not SyllableCounters._cmu_dict: | |
| SyllableCounters._cmu_dict = pronouncing.cmudict.dict() | |
| return SyllableCounters._cmu_dict | |
| def cmu(word): | |
| return { | |
| pronouncing.syllable_count(pro) for pro in pronouncing.phones_for_word(word) | |
| } | |
| def pyphen_counter(): | |
| if not SyllableCounters._pyphen_counter: | |
| SyllableCounters._pyphen_counter = pyphen.Pyphen(lang="en") | |
| return SyllableCounters._pyphen_counter | |
| def count_word(word) -> Set[int]: | |
| if not word: | |
| return {0} | |
| cmu = SyllableCounters.cmu(word) | |
| pyph = SyllableCounters.pyphen_counter().inserted(word).count("-") + 1 | |
| syll = syllables.estimate(word) | |
| ans = cmu | {pyph, syll} | |
| if 0 in ans and len(ans) > 1: | |
| ans.remove(0) | |
| return ans | |
| TESTS = [ | |
| ["In savannah where tall trees kiss the sky,", 10], | |
| ["A giraffe named Joe with love-stricken grace,", 10], | |
| ["Did find a turtle named Sarah nearby,", 10], | |
| ["Their eyes did meet, hearts raced in sweet embrace.", 10], | |
| ["Though nature's laws deemed their love quite absurd,", 10], | |
| ["Joe's neck would bend to whisper words of flame,", 10], | |
| ["And Sarah's shell would tremble at each word,", 10], | |
| ["In love's bizarre dance, they found no one to blame.", 11], | |
| ["Through sun and storm, they'd wander, hoof and claw,", 10], | |
| ["With love that no one ever could unravel,", 11], | |
| ["In each other's eyes, perfection they saw,", 10], | |
| ["A love so fierce, no distance could they travel.", 11], | |
| ["So let us learn from turtle and giraffe,", 10], | |
| ["That love's own shape can make the coldest laugh.", 10], | |
| ["In yonder sky where colours blend so high,", 10], | |
| ["A rainbow arcs, a bridge 'twixt earth and air.", 10], | |
| ["Its radiant hues draw every gazing eye,", 12], | |
| ["A painter's dream, a sight beyond compare.", 10], | |
| ["Yet in the world of man, delight so small,", 10], | |
| ["As gumball's sphere, with colours bright and clear.", 10], | |
| ["Such simple joy it brings to one and all,", 10], | |
| ["Its sweetness matched by colours we hold dear.", 10], | |
| ["Both nature's arc and candy sphere delight,", 10], | |
| ["The vast expanse and tiny bite unite,", 10], | |
| ["In tales of wonder, stories to be told.", 10], | |
| ["So let us cherish both the grand and small,", 10], | |
| ["For beauty’s found in rainbow and in gumball.", 11], | |
| ["When night's embrace hath shrouded all in black,", 10], | |
| ["A flashlight's beam doth pierce the dark so deep,", 10], | |
| ["From paths we've chosen, and vows we mean to keep.", 11], | |
| ["Thou art like that beam, true, clear, and bright,", 9], | |
| ["Cutting through the fog of my mind's own night,", 10], | |
| ["Yet oft I find, by folly or by chance,", 10], | |
| ["Distractions lead my wandering glance.", 9], | |
| ["But even as stars, obscured by fleeting cloud,", 11], | |
| ["Return to grace the heavens, proud and loud,", 10], | |
| ["So shall my focus, once by ails distraught,", 10], | |
| ["Return to thee, as ever it hath sought.", 10], | |
| ["For in this world of fleeting sight and sound,", 10], | |
| ] | |
| def fixed_tests(): | |
| failures = [] | |
| for line, expected in TESTS: | |
| variations = syllable_variations(line) | |
| if expected not in variations: | |
| print(f"Line `{line}` has {expected} syllables which isn't in {variations}") | |
| failures.append((line, expected, variations)) | |
| # tests from https://www.mentalfloss.com/article/53661/car-mel-or-car-mel-3-reasons-syllabically-ambiguous-words : | |
| for words, expected in [ | |
| ( | |
| "fire tire hour liar buyer flower drawer layer loyal royal file orange poem crayon".split(), | |
| [1, 2], | |
| ), | |
| ( | |
| "caramel mayonnaise family chocolate camera different separate favorite realtor".split(), | |
| [2, 3], | |
| ), | |
| ("mischievous".split(), [3, 4]), | |
| ]: | |
| for w in words: | |
| variations = syllable_variations(w) | |
| for i in expected: | |
| if i not in variations: | |
| print( | |
| f"{w} give syllable_variations {variations} but should include {i}" | |
| ) | |
| failures.append((w, i, variations)) | |
| return failures | |
| def summarize_errors(errors, num_samples): | |
| print( | |
| f"Sonnet failure rate: {len(errors)/num_samples:.1%} out of {num_samples:,}, breakdown:" | |
| ) | |
| wnl = sum("line count" in e for e in errors.values()) / num_samples | |
| print(f"{wnl:.1%} wrong number of lines") | |
| mw = sum(bool("missing words" in e) for e in errors.values()) / num_samples | |
| print(f"{mw:.1%} missing words") | |
| bl = sum(bool("syllable errors" in e) for e in errors.values()) / num_samples | |
| print(f"{bl:.1%} poems with at least one line with wrong number of syllables") | |
| rhyme_errors = ( | |
| sum(any(" " not in k for k in e) for e in errors.values()) / num_samples | |
| ) | |
| both = ( | |
| sum( | |
| (bool("syllable errors" in e) and any(" " not in k for k in e)) | |
| for e in errors.values() | |
| ) | |
| / num_samples | |
| ) | |
| print( | |
| f"{rhyme_errors:.1%} poems with rhyme errors ({both:.1%} poems with both rhyme and syllable errors)" | |
| ) | |
| def corpus_check_scheme(corpus_filename, scheme): | |
| with open(corpus_filename, "r") as f: | |
| poems = [p.strip() for p in f.read().split("\n\n") if p] | |
| errors = {} | |
| for p in poems: | |
| e = sonnet_errors(p, scheme) | |
| if e: | |
| errors[p] = e | |
| print("*" * 50) | |
| sonnet_errors(p, scheme, verbose=True) | |
| print("scheme", scheme) | |
| print(p) | |
| print() | |
| print(e) | |
| print("<" * 50) | |
| summarize_errors(errors, len(poems)) | |
| def test(): | |
| assert not sonnet_errors( | |
| """Not like the brazen giant of Greek fame, | |
| With conquering limbs astride from land to land; | |
| Here at our sea-washed, sunset gates shall stand | |
| A mighty woman with a torch, whose flame | |
| Is the imprisoned lightning, and her name | |
| Mother of Exiles. From her beacon-hand | |
| Glows world-wide welcome; her mild eyes command | |
| The air-bridged harbor that twin cities frame. | |
| "Keep, ancient lands, your storied pomp!" cries she | |
| With silent lips. "Give me your tired, your poor, | |
| Your huddled masses yearning to breathe free, | |
| The wretched refuse of your teeming shore. | |
| Send these, the homeless, tempest-tost to me, | |
| I lift my lamp beside the golden door!" | |
| """, | |
| "ABBA ABBA CDCDCD", | |
| ) | |
| assert not sonnet_errors( | |
| """How do I love thee? Let me count the ways. | |
| I love thee to the depth and breadth and height | |
| My soul can reach, when feeling out of sight | |
| For the ends of being and ideal grace. | |
| I love thee to the level of every day’s | |
| Most quiet need, by sun and candle-light. | |
| I love thee freely, as men strive for right. | |
| I love thee purely, as they turn from praise. | |
| I love thee with the passion put to use | |
| In my old griefs, and with my childhood’s faith. | |
| I love thee with a love I seemed to lose | |
| With my lost saints. I love thee with the breath, | |
| Smiles, tears, of all my life; and, if God choose, | |
| I shall but love thee better after death.""", | |
| "abba abba cdcdcd", | |
| ) | |
| assert not sonnet_errors( | |
| """When, in disgrace with fortune and men’s eyes, | |
| I all alone beweep my outcast state, | |
| And trouble deaf heaven with my bootless cries, | |
| And look upon myself, and curse my fate, | |
| Wishing me like to one more rich in hope, | |
| Featur’d like him, like him with friends possess’d, | |
| Desiring this man’s art and that man’s scope, | |
| With what I most enjoy contented least; | |
| Yet in these thoughts myself almost despising, | |
| Haply I think on thee, and then my state, | |
| Like to the lark at break of day arising | |
| From sullen earth, sings hymns at heaven’s gate; | |
| For thy sweet love remember’d such wealth brings | |
| That then I scorn to change my state with kings.""", | |
| "ABAB CDCD EFEF GG", | |
| ) | |
| assert sonnet_errors( | |
| """How do I love thee? Let me count the ways. | |
| I love thee to the depth and breadth and height | |
| My soul can reach, when feeling out of sight | |
| For the ends of being and ideal grace. | |
| I love thee to the level of every day’s | |
| Most quiet need, by sun and candle-light. | |
| I love thee freely, as men strive for right. | |
| I love thee purely, as they turn from praise. | |
| I love thee with the passion put to use | |
| In my old griefs, and with my childhood’s faith. | |
| I love thee with a love I seemed to lose | |
| With my lost saints. I love thee with the breath, | |
| Smiles, tears, of all my life; and, if God choose, | |
| I shall but love thee better after death.""", | |
| "ABAB CDCD EFEF GG", | |
| ) | |
| aaa = sonnet_errors( | |
| """How do I love thee? Let me count the ways. | |
| I love thee to the depth and breadth and height | |
| My soul can reach, when feeling out of sight | |
| For the ends of being and ideal grace. | |
| I love thee to the level of every day’s | |
| Most quiet need, by sun and candle-light. | |
| I love thee freely, as men strive for right. | |
| I love thee purely, as they turn from praise. | |
| I love thee with the passion put to use | |
| In my old griefs, and with my childhood’s faith. | |
| I love thee with a love I seemed to lose | |
| With my lost saints. I love thee with the breath, | |
| Smiles, tears, of all my life; and, if God choose, | |
| I shall but love thee better after death.""", | |
| "ABBA ABBA CDC DCD", | |
| # abba abba cdc dcd: (correct) | |
| # "ABAB CDCD EFEF GG", (false) | |
| ) | |
| print(aaa) | |
| aaa = sonnet_errors( | |
| """How do I love thee? Let me count the ways (A) | |
| I love thee to the depth and breadth and height (B) | |
| My soul can reach, when feeling out of sight (B) | |
| For the ends of being and ideal grace (A) | |
| I love thee to the level of every day’s (A) | |
| Most quiet need, by sun and candle-light (B) | |
| I love thee freely, as men strive for right (B) | |
| I love thee purely, as they turn from praise (A) | |
| I love thee with the passion put to use (C) | |
| In my old griefs, and with my childhood’s faith (D) | |
| I love thee with a love I seemed to lose (C) | |
| With my lost saints. I love thee with the breath (D) | |
| Smiles, tears, of all my life; and, if God choose (C) | |
| I shall but love thee better after death (D).""", | |
| "ABBA ABBA CDC DCD", | |
| # abba abba cdc dcd: (correct) | |
| # "ABAB CDCD EFEF GG", (false) | |
| ) |