Hello!
I’m quite new to OCaml, and my brain is still stuck in C-world. Reading questions that I don’t even understand at all is proof enough.
So I ask you to comment my code and to get it more OCaml-ish. I encourage you to nitpick. Indentation, naming, … as you like.
This is only part of a self-assigned small project.
type tag_variant = XML_tag_empty | XML_tag_open | XML_tag_close | XML_no_tag (* <x/> | <x> | </x> | no tag found *)
exception XML_attrib_failure
let escape_pair_list = [("&", "&"); ("<", "<"); (">", ">"); ("'", "'"); (""", "\"")]
type tag_attr_rcrd_t = {
attrib : string;
value : string;
}
type xml_element_rcrd_t = {
tag_str : string;
tag_type : tag_variant;
tag_attrs : tag_attr_rcrd_t list;
value : string;
children : xml_element_rcrd_t list;
}
let rec xml_format ?(indent = 2) ?(level = 0) element =
let flat = indent = 0 in
let get_fmt tag_type : _ format =
match tag_type with
| XML_tag_empty -> if flat then "%s<%s/>" else "\n%s<%s/>"
| XML_tag_open -> if flat then "%s<%s" else "\n%s<%s"
| XML_tag_close -> "%s<%s/>"
| XML_no_tag -> "\n%s error %s"
in
let buff = Buffer.create 100 in
bprintf buff (get_fmt element.tag_type) (String.make (indent * level) ' ') element.tag_str;
List.iter (fun attr -> (bprintf buff " %s='%s'" attr.attrib attr.value)) element.tag_attrs;
bprintf buff ">";
(* No value may follow when children follow *)
if element.children = [] then
bprintf buff "\"%s\"" element.value;
List.iter (fun child -> bprintf buff "%s" (xml_format ~indent:indent ~level:(level + 1) child)) element.children;
if element.tag_type = XML_tag_open then begin
if element.children <> [] && not flat then
bprintf buff "\n%s" (String.make (indent * level) ' ');
bprintf buff (get_fmt XML_tag_close) "" element.tag_str;
end;
Buffer.contents buff
(* ------------------------------------------------------------------------- *)
let get_rest_str in_str start : string =
if start >= String.length in_str then
""
else
String.sub in_str start ((String.length in_str) - start)
(* ------------------------------------------------------------------------- *)
let rec unescape_str ?(ndx = 0) ?(pair_list = escape_pair_list) str : string =
let matches_at_index pair_list str index =
try
Some (List.find (fun pair -> match pair with (l, _) -> String.starts_with ~prefix:l (get_rest_str str index)) pair_list)
with Not_found -> None
in
if ndx >= String.length str then
str
else begin
let ref_str = ref str in
try
let amp_ndx = String.index_from !ref_str ndx '&' in
match (matches_at_index pair_list !ref_str amp_ndx) with
| Some (l, r) -> ref_str := String.sub str 0 amp_ndx ^ r ^ String.sub str (amp_ndx + String.length l) (String.length str - amp_ndx - String.length l);
unescape_str ~ndx:(amp_ndx + 1) ~pair_list:(pair_list) !ref_str
| None -> !ref_str
with Not_found -> !ref_str
end
(* ------------------------------------------------------------------------- *)
let rec trim_left in_str : string =
if String.length in_str = 0 then
""
else
match in_str.[0] with
| '\r'
| '\n'
| '\t'
| '\012'
| ' ' -> trim_left (String.sub in_str 1 ((String.length in_str) - 1))
| _ -> in_str
type state = Start | Att_start | Equal | Value_start | Assemble
(* ------------------------------------------------------------------------- *)
type gatt_att_rcrd_t = {
attr : string;
rest : string;
}
let get_att_att in_str : gatt_att_rcrd_t =
if (Str.string_match (Str.regexp "[A-Za-z][A-Za-z0-9]*") in_str 0) then begin (* BUGBUG: Verify charset *)
let attr = String.sub in_str 0 (Str.match_end ()) in
{rest = get_rest_str in_str (String.length attr); attr = attr}
end
else
raise XML_attrib_failure
(* ------------------------------------------------------------------------- *)
type gatt_equ_rcrd_t = {
attr : string;
quote_char : char;
rest : string;
}
let get_att_equ (prev : gatt_att_rcrd_t) : gatt_equ_rcrd_t =
let in_str = trim_left prev.rest in
if String.length in_str = 0 then
raise XML_attrib_failure
else if in_str.[0] = '=' then begin
let in_str = trim_left (get_rest_str in_str 1) in
if String.length in_str = 0 then
raise XML_attrib_failure
else if (in_str.[0] = '\'') || (in_str.[0] = '"') then
{attr = prev.attr; quote_char = in_str.[0]; rest = get_rest_str in_str 1}
else
raise XML_attrib_failure
end
else
raise XML_attrib_failure
(* ------------------------------------------------------------------------- *)
type gatt_val_rcrd_t = {
attr : string;
value : string;
rest : string;
}
let get_att_val (prev : gatt_equ_rcrd_t) : gatt_val_rcrd_t =
let in_str = trim_left prev.rest in (* BUGBUG: really trim left? Then we would need a trim right too *)
try
let right = String.index in_str prev.quote_char in
{attr = prev.attr; value = (String.sub in_str 0 right); rest = trim_left (get_rest_str in_str (right + 1))}
with Not_found -> raise XML_attrib_failure
(* ------------------------------------------------------------------------- *)
let get_attributes in_str : tag_attr_rcrd_t list =
let in_str = trim_left in_str in
let rec loop in_str in_list =
let res = get_att_att in_str |> get_att_equ |> get_att_val in
let in_list = in_list @ [{attrib = res.attr; value = res.value}] in (* BUGBUG: unescape res.value *)
if String.length res.rest = 0 then (* get_att_val returns a left-trimmed rest-string *)
in_list
else
loop res.rest in_list
in
loop in_str []
(* ------------------------------------------------------------------------- *)
let get_tag_attrs in_tag_wrapped : wrapped_tag_rcrd_t =
(* we are looking for a pattern like "tag_str attr1='value1' attr2="value2" ..." *) (* BUGBUG: Not sure about spaces around the '=' *)
(* Are there any blanks? *)
let in_tag_str = in_tag_wrapped.tag_rcrd.tag_str in (* just for convenience *)
try
let left = String.index in_tag_str ' ' in begin
let tag_str = String.sub in_tag_str 0 left
and attr_str = String.sub in_tag_str left (String.length in_tag_str - left) in
printf "\ngetTagAttrs '%s':'%s'\n" tag_str attr_str;
{tag_rcrd = {tag_type = in_tag_wrapped.tag_rcrd.tag_type; tag_str = tag_str; tag_attrs = (get_attributes attr_str)}; rest = in_tag_wrapped.rest; failed = in_tag_wrapped.failed}
end
with Not_found -> in_tag_wrapped (* Nothing to do and tag_attrs is already [] *)
Thanks for any input or tips.