Problem of the Day
A new programming or logic puzzle every Mon-Fri

Markdown Parser

Happy Monday! Hope you had a good weekend. Over the weekend Github Flavored Markdown and permalinking were added to comments. Hopefully this makes posting and sharing easier for all. Other cool new features are on the way!

In honor of the Markdown parser upgrade today's problem will be somewhat related. We'll be building a very basic Markdown compiler of our own. Our Markdown parser will parse the following:

  • Headers
  • Single depth lists
  • Paragraphs

Sample input:

## Monday March 24th 2014 - Todo List

* Solve the Problem of the Day
* Eat breakfast
* Do work
* Party

Today was a good day

Sample output:

<h2>Monday March 24th 2014 - Todo List</h2>
<ul>
    <li>Solve the Problem of the Day</li>
    <li>Eat breakfast</li>
    <li>Do work</li>
    <li>Party</li>
</ul>
<p>Today was a good day</p>

Permalink: http://problemotd.com/problem/markdown-parser/

Comments:

  • Carlos - 10 years, 9 months ago

    Well, parsers are just hard work, so here's a simple solution that might work for most cases:

    public static void parse(String text) { boolean bullets = false; String[] lines = text.split("\r\n|\r|\n"); String result = ""; int size = lines.length; for(int i=0; i<size; i++) { if(lines[i].compareTo("\n") == 0) { result += ""; } else if(lines[i].startsWith("##")) { result += "<h2>" + lines[i].substring(3, lines[i].length()) + "</h2>\n"; } else if(lines[i].startsWith("") && !bullets) { result += "<ul>\n"; result += "\t<li>" + lines[i].substring(2, lines[i].length()) + "</li>\n"; bullets = true; } else if(lines[i].startsWith("") && bullets) { result += "\t<li>" + lines[i].substring(2, lines[i].length()) + "</li>\n"; if(i+1 < size) { if(!lines[i+1].startsWith("*")) { result += "</ul>\n"; bullets = false; }
    } } else { result += "<p>" + lines[i] + "</p>\n"; } }

        System.out.println(result);
    }
    

    reply permalink

  • Carlos - 10 years, 9 months ago

    Well, the code got f*cked up, have no idea what went wrong, it's about time you some accounts into this site and give us the ability to edit OR a preview button.

    reply permalink

  • Max Burstein - 10 years, 9 months ago

    Accounts are on the way. I realize this is a major point so it should hopefully be done sometime within the next week or 2. The site now supports github flavored markdown though so posting code is as easy as placing it between 3 backticks. https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks

    Thanks for your submission!

    reply permalink

  • Carlos - 10 years, 9 months ago

    Thanks for the reply and the understanding, sorry if i sounded pedantic and uneducated, had a bit of a rough morning.

    reply permalink

  • Apanatshka - 10 years, 9 months ago

    I was going to implement this in SDF3 and Stratego, but then I realised that Markdown has some pretty specific whitespace rules in its syntax and SDF3 doesn't have a nice way of expressing that yet :(

    reply permalink

  • Hueho - 10 years, 9 months ago

    I went with the easiest way to just get the output for the input, without cheating:

    def header(line)
      size = line.each_char.reduce(0) { |p, n| if n == '#' then p + 1 else break p end }
      content = (line.gsub /[#]+/, '').strip
      "<h#{size}>#{content}<h#{size}>\n"
    end
    
    def list(lines)
      size = lines.reduce(0) { |p, n| if n.start_with? '*' then p + 1 else break p end }
      content = lines[0, size].map { |l| 
        stripped = l.gsub(/[\*]+/, '').strip
        "\t<li>#{stripped}</li>" 
      }.join("\n")
      return "<ul>\n#{content}\n</ul>\n", size
    end
    
    def paragraph(lines)
      size = lines.reduce(0) { |p, n| 
        unless n.strip.empty? or n.start_with? '#', '*' then p + 1 else break p end 
      }
      return "<p>#{lines[0, size].join.strip}</p>\n", size
    end
    
    def parse(lines)
      idx, len = 0, lines.size
      result = ''
    
      while idx < len
        case lines[idx][0]
        when '#' then 
          result << header(lines[idx])
          idx += 1
        when '*' then
          content, skip = list(lines[idx..-1])
          result << content
          idx += skip
        else
          unless lines[idx].strip.empty? then
            content, skip = paragraph(lines[idx..-1])
            result << content
            idx += skip
          else
            idx += 1
          end
        end
      end
    
      result
    end
    
    print(parse ARGF.readlines)
    

    reply permalink

  • Max Burstein - 10 years, 9 months ago

    Looks good and easy to read. I like it

    reply permalink

  • vick - 10 years, 9 months ago

    My humble solution would be: public static String render(String markdownText) throws Exception { BufferedReader br = new BufferedReader(new StringReader(markdownText)); StringBuilder sb = new StringBuilder(); String line; boolean inList = false; while ((line = br.readLine()) != null) { if (line.startsWith("*")) { if (!inList) { inList = true; sb.append("<ul>").append("\n"); } else { sb.append("<li>").append(line.substring(1).trim()).append("</li>").append("\n"); } } else { if (inList) { inList = false; sb.append("</ul>").append("\n"); } if (line.startsWith("##")) { sb.append("<h2>").append(line.substring(2).trim()).append("</h2>").append("\n"); } else if (!line.trim().isEmpty()) { sb.append("<p>").append(line.trim()).append("</p>").append("\n"); } } } return sb.toString(); } ```

    reply permalink

  • vick - 10 years, 9 months ago

    public class MarkdownParser {
    
        public static String render(String markdownText) throws Exception {
            BufferedReader br = new BufferedReader(new StringReader(markdownText));
            StringBuilder sb = new StringBuilder();
            String line;
            boolean inList = false;
            while ((line = br.readLine()) != null) {
                if (line.startsWith("*")) {
                    if (!inList) {
                        inList = true;
                        sb.append("<ul>").append("\n");
                    } else {
                        sb.append("<li>").append(line.substring(1).trim()).append("</li>").append("\n");
                    }
                } else {
                    if (inList) {
                        inList = false;
                        sb.append("</ul>").append("\n");
                    }
                    if (line.startsWith("##")) {
                        sb.append("<h2>").append(line.substring(2).trim()).append("</h2>").append("\n");
                    } else if (!line.trim().isEmpty()) {
                        sb.append("<p>").append(line.trim()).append("</p>").append("\n");
                    }
                }
            }
            return sb.toString();
        }
    }
    

    reply permalink

  • Efraya - 10 years, 9 months ago

    using System; using System.Collections.Generic; using System.IO; using System.Linq;

    namespace Markdown_Parser { class Program { static void Main(string[] args) { if(args.Any() == false || args.Count() > 1 || args[0].EndsWith(".txt") == false) { Console.WriteLine("Supply a .txt file to get a markdown parser"); Console.ReadKey(); }

            List<string> inFile = File.ReadAllLines(args[0]).ToList();
            List<string> outFile = new List<string>();
    
            bool startedList = false;
    
            foreach(string line in inFile)
            {
                if (startedList && line.StartsWith("* ") == false)
                {
                    startedList = false;
                    outFile.Add("</ul>");
                }
    
                if (line.StartsWith("## "))
                {
                    outFile.Add(string.Format("<h2>{0}</h2>", line.TrimStart("## ".ToCharArray())));
                }
                else if(string.IsNullOrEmpty(line))
                {
                    outFile.Add(string.Empty);
                }
                else if(line.StartsWith("* "))
                {
                    if(startedList == false)
                    {
                        startedList = true;
                        outFile.Add("<ul>");
                    }
                    else
                    {
                        outFile.Add(string.Format("\t<li>{0}</li>", line.TrimStart("* ".ToCharArray())));
                    }
                }
                else
                {
                    outFile.Add(string.Format("<p>{0}</p>", line));
                }
            }
    
            outFile.ForEach(Console.WriteLine);
            Console.ReadKey();
        }
    }
    

    }

    reply permalink

  • Evgeny Tataurov - 10 years, 9 months ago

    My very first Elixir code ```elixir defmodule Markdown do def parse(filename) do {:ok, file} = File.open(filename, [:read, :utf8]) IO.puts parse_line(IO.read(file, :line), file, []) end

    defp parse_line(line, file, list) do case line do << "## ", rest :: binary >> -> parse_line(readline(file), file, ["<h2>"<>String.strip(rest)<>"</h2>" | list]) << "* ", _ :: binary >> -> {new_line, li_list} = parse_list(line, file, []) parse_line(new_line, file, ["</ul>"] ++ li_list ++ ["<ul>"] ++ list) "\n" -> parse_line(readline(file), file, list) :eof -> Enum.join(Enum.reverse(list), "\n") other -> parse_line(readline(file), file, ["<p>"<>String.strip(other)<>"</p>" | list]) end end

    defp parse_list(line, file, list) do case line do << "* ", rest :: binary >> -> parse_list(readline(file), file, ["\t<li>"<>String.strip(rest)<>"</li>" | list]) _ -> {line, list} end end

    defp readline(file) do IO.read(file, :line) end end

    Markdown.parse(hd System.argv) ```

    reply permalink

  • Evgeny Tataurov - 10 years, 9 months ago

    Another try ``` defmodule Markdown do def parse(filename) do {:ok, file} = File.open(filename, [:read, :utf8]) IO.puts parse_line(IO.read(file, :line), file, []) end

    defp parse_line(line, file, list) do case line do << "## ", rest :: binary >> -> parse_line(readline(file), file, ["<h2>"<>String.strip(rest)<>"</h2>" | list]) << "* ", _ :: binary >> -> {new_line, li_list} = parse_list(line, file, []) parse_line(new_line, file, ["</ul>"] ++ li_list ++ ["<ul>"] ++ list) "\n" -> parse_line(readline(file), file, list) :eof -> Enum.join(Enum.reverse(list), "\n") other -> parse_line(readline(file), file, ["<p>"<>String.strip(other)<>"</p>" | list]) end end

    defp parse_list(line, file, list) do case line do << "* ", rest :: binary >> -> parse_list(readline(file), file, ["\t<li>"<>String.strip(rest)<>"</li>" | list]) _ -> {line, list} end end

    defp readline(file) do IO.read(file, :line) end end

    Markdown.parse(hd System.argv) ``` I think commenting form lacks previewer

    reply permalink

Content curated by @MaxBurstein