A header line in the Org-mode markdown syntax (commonly used by Emacs users, as Emacs has a major mode which supports working with this markdown format)
is described in the documentation as follows:
A Heading is an unindented line structured according to the following pattern:
STARS KEYWORD PRIORITY COMMENT TITLE TAGS
where
STARS
refers to a sequence of asterisksKEYWORD
is either the string “DONE” or the string “TODO”PRIORITY
is of the form “[#X
]” where ‘[’, ‘#’, and ‘]’ are the literal characters andX
is any alphanumeric characterCOMMENT
is the string “COMMENT”TITLE
is a string which does not start or end with spaces and contains no line breaksTAGS
is a list of strings “:TAG1
:TAG2
:TAG3
:...
:” whereTAGi
are strings containing a mix of alphanumeric characters as well as any of the special characters ‘%’, ‘@’, ‘_’, and ‘#’
All of the fields are optional except STARS.
I want to parse an Org header line and return the data. The code that I wrote for this is long and verbose and I am looking for constructive criticism.
module Heading : sig
type keyword =
| TODO
| DONE
val pp_keyword : Format.formatter -> keyword -> unit
val reserved_keywords : string list
type comment = Comment
type priority = char
type tag = string
val reserved_tags : tag list
(** Raises an exception if the input does not contain a space.
Otherwise returns the index of the first space. *)
val stars : string -> int
(* type timestamp = int * int * int * string *)
type line_data = int * keyword option * priority option * comment option * string * tag list
val parse_headingline : string -> int * keyword option * priority option * comment option * string * tag list
val is_headingline : string -> bool
end = struct
type keyword =
| TODO
| DONE
type comment = Comment
let reserved_tags = ["ARCHIVED"]
let reserved_keywords = [ "TODO"; "DONE"]
let stars str = String.index str ' '
let is_headingline =
let header_regexp = Str.regexp {|\*+ |} in
fun str -> Str.string_match header_regexp str 0
let count_and_strip_stars : string -> int * string =
fun str ->
let n = stars str in
let strip_stars = Core.String.drop_prefix str n |> Core.String.lstrip in
(n, strip_stars)
let keyword_and_strip_keyword : string -> keyword option * string =
fun str ->
let keyword = List.find_opt
(fun keyword -> StringLabels.starts_with ~prefix:keyword str) reserved_keywords in
let open Core.String in
match keyword with
| Some "TODO" -> (Some TODO, drop_prefix str (String.length "TODO") |> lstrip)
| Some "DONE" -> (Some DONE, drop_prefix str (String.length "DONE") |> lstrip)
| None -> (None, lstrip str)
| _ -> failwith "Something is broken, the list reserved_keywords doesn't recognize this"
let priority_regexp = Str.regexp {|\[#[A-Za-z0-9]\]|}
let priority_and_strip_priority : string -> char option * string =
let open Core.String in
fun str -> if (Str.string_match priority_regexp str 0) then
let priority = str.[2] in
(Some priority, lstrip (drop_prefix str 4))
else
(None, lstrip str)
let comment_regexp = Str.regexp_string "COMMENT"
let comment_and_strip_comment : string -> comment option * string =
let open Core.String in
fun str -> if (Str.string_match comment_regexp str 0) then
(Some Comment, lstrip @@ drop_prefix str (length "COMMENT"))
else
(None, lstrip str)
let tag_regexp = Str.regexp {|:\([A-Za-z0-9_@#%]+:\)+ *$|}
let tag_and_strip_tags : string -> string * string list =
fun str ->
let open Core.String in
try
(let _ = Str.search_forward tag_regexp str 0 in
(* if (Str.search_forward tag_regexp str 0) then *)
let tag_string = Str.matched_string str in
let match_index = Str.match_beginning () in
let title = prefix str match_index in
let tags = String.split_on_char ':' tag_string
|> List.filter (fun a -> not @@ Core.String.is_empty a) in
(String.trim title, tags))
with
Not_found -> (str, [])
type priority = char [@@deriving show]
type tag = string [@@deriving show]
(* Format.formatter -> tag -> unit *)
type line_data = int * keyword option * priority option * comment option * string * tag list
let parse_headingline (str: string) : line_data =
if (not @@ is_headingline str) then
raise (Invalid_argument "Str should be of the form \"**** \"")
else
let n_stars, str1 = count_and_strip_stars str in
let keyword, str2 = keyword_and_strip_keyword str1 in
let priority, str3 = priority_and_strip_priority str2 in
let comment, str4 = comment_and_strip_comment str3 in
let title, tags = tag_and_strip_tags str4 in
(n_stars, keyword, priority, comment, title, tags)
end