Skip to content

Commit

Permalink
Fix #57 -- SmartyPants handling of single quotes.
Browse files Browse the repository at this point in the history
  • Loading branch information
mmorearty authored and Mike Morearty committed Feb 14, 2013
1 parent 6f515c2 commit d949cbd
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 3 deletions.
45 changes: 42 additions & 3 deletions ext/redcarpet/html_smartypants.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,25 @@ word_boundary(uint8_t c)
return c == 0 || isspace(c) || ispunct(c);
}

// If 'text' begins with any kind of single quote (e.g. "'" or "'" etc.),
// returns the length of the sequence of characters that makes up the single-
// quote. Otherwise, returns zero.
static int
squote_len(const uint8_t *text, size_t size)
{
static char* single_quote_list[] = { "'", "'", "'", "'", NULL };

for (char** p = single_quote_list; *p; ++p) {
int len = strlen(*p);
if (size >= len && memcmp(text, *p, len) == 0) {
return len;
}
}

return 0;
}

// Converts " or ' at very beginning or end of a word to left or right quote
static int
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
{
Expand All @@ -100,23 +119,31 @@ smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uin
return 1;
}

// Converts ' to left or right single quote.
// Be careful not to assume that text[0] is "'" -- it might not be. It might be
// pointing to the semicolon in "'" or something similar.
static size_t
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 2) {
uint8_t t1 = tolower(text[1]);
int next_squote_len = squote_len(text+1, size-1);

if (t1 == '\'') {
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
return 1;
// convert '' to “ or ”
if (next_squote_len > 0) {
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
return next_squote_len;
}

// Tom's, isn't, I'm, I'd
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
(size == 3 || word_boundary(text[2]))) {
BUFPUTSL(ob, "’");
return 0;
}

// you're, you'll, you've
if (size >= 3) {
uint8_t t2 = tolower(text[2]);

Expand All @@ -137,6 +164,7 @@ smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
return 0;
}

// Converts (c), (r), (tm)
static size_t
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
Expand Down Expand Up @@ -164,6 +192,7 @@ smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
return 0;
}

// Converts "--" to em-dash, etc.

This comment has been minimized.

Copy link
@parkr

parkr Sep 29, 2013

This is really annoying for programmers, just a head's up:

jekyll serve --watch

If that were to turn into &emdash; watch, it would be incorrect. Is there a way to override this?

This comment has been minimized.

Copy link
@mmorearty

mmorearty Sep 29, 2013

Author Contributor

Enclose it in backticks.

This comment has been minimized.

Copy link
@parkr

parkr Sep 29, 2013

Kewl.

static size_t
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
Expand All @@ -181,6 +210,7 @@ smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t prev
return 0;
}

// Converts " etc.
static size_t
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
Expand All @@ -189,13 +219,19 @@ smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previ
return 5;
}

int len = squote_len(text, size);
if (len > 0) {
return (len-1) + smartypants_cb__squote(ob, smrt, previous_char, text+(len-1), size-(len-1));
}

if (size >= 4 && memcmp(text, "�", 4) == 0)
return 3;

bufputc(ob, '&');
return 0;
}

// Converts "..." to ellipsis
static size_t
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
Expand All @@ -213,6 +249,7 @@ smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
return 0;
}

// Converts `` to opening double quote
static size_t
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
Expand All @@ -224,6 +261,7 @@ smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t
return 0;
}

// Converts 1/2, 1/4, 3/4
static size_t
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
Expand Down Expand Up @@ -256,6 +294,7 @@ smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
return 0;
}

// Converts " to left or right double quote
static size_t
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
Expand Down
28 changes: 28 additions & 0 deletions test/redcarpet_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,34 @@ def test_that_smart_gives_d_suffix_a_rsquo
end
end

class SmartyHTMLTest < Test::Unit::TestCase
def setup
@smarty_markdown = Redcarpet::Markdown.new(Redcarpet::Render::SmartyHTML)
end

def test_that_smartyhtml_converts_single_quotes
markdown = @smarty_markdown.render("They're not for sale.")
assert_equal "<p>They&rsquo;re not for sale.</p>\n", markdown
end

def test_that_smartyhtml_converts_double_quotes
rd = @smarty_markdown.render(%("Quoted text"))
assert_equal %(<p>&ldquo;Quoted text&rdquo;</p>\n), rd
end

def test_that_smartyhtml_ignores_pre
rd = @smarty_markdown.render(" It's a test of \"pre\"\n")
expected = "It&#39;s a test of &quot;pre&quot;"
assert rd.include?(expected), "\"#{rd}\" should contain \"#{expected}\""
end

def test_that_smartyhtml_ignores_code
rd = @smarty_markdown.render("`It's a test of \"code\"`\n")
expected = "It&#39;s a test of &quot;code&quot;"
assert rd.include?(expected), "\"#{rd}\" should contain \"#{expected}\""
end
end

class HTMLRenderTest < Test::Unit::TestCase
def setup
@markdown = Redcarpet::Markdown.new(Redcarpet::Render::HTML)
Expand Down

0 comments on commit d949cbd

Please sign in to comment.