1
\$\begingroup\$

There are two versions of the CSV parser. The latter seems to be more performant than the former, although the former's code is easier to read.

Variant 1, which parses a string:

(defn newline?
  [[a b]]
  (or (= a \newline) (and (= a \return) (= b \newline))))

(defn parse-value-raw
  [s]
  (loop [[a & rest :as all] s
         res ""]
    (cond
      (empty? all) ["" all]
      (or (= a \,) (newline? all)) [res all]
      :else (recur rest (str res a)))))

(defn parse-value-quoted
  [[q & val]]
  (loop [[a & tail] val
         res ""]
    (cond
      (and (= a \") (= (first tail) \")) (recur (rest tail) (str res \"))
      (= a q) [res tail]
      :else (recur tail (str res a)))))

(defn parse-value
  [s]
  (if (= (first s) \")
    (parse-value-quoted s)
    (parse-value-raw s)))

(defn parse-record
  [s]
  (loop [records []
         [v tail] (parse-value s)]
    (cond
      (= (first tail) \,) (recur (conj records v) (parse-value (rest tail)))
      (or (newline? tail) (empty? tail)) [(conj records v) tail]
      :else [(conj records v) :error])))

(defn parse-csv
  [s]
  (loop [rows []
         [rec tail] (parse-record s)]
    (cond
      (empty? tail) (conj rows rec)
      (newline? tail) (recur (conj rows rec) (parse-record (rest tail)))
      :else (conj rows rec))))

Variant 2, which uses java.io.PushbackReader and StringBuilder:

(defn create-reader
  [^String path]
  (java.io.PushbackReader. (java.io.BufferedReader. (java.io.FileReader. path)) 2))

(defn line-break?
  [c ^java.io.PushbackReader r]
  (cond
    (= c \newline) true
    (= c \return) (let [nx (.read r) nxc (char nx)]
                    (if (= nxc \newline) true (do (.unread r nx) false))
                    )
    :else false))

(defn read-next
  [^java.io.PushbackReader r]
  (let [cd (.read r)]
    (cond
      (= cd -1) [:eof]
      (= (char cd) \") (let [nx (.read r)]
                         (if (= (char nx) \")
                           [:escaped-quote (char nx)]
                           (do (.unread r nx) [:char \"])))
      :else [:char (char cd)])))

(defn parse-value
  [^java.io.PushbackReader r]
  (let [[t first] (read-next r)
        is-quoted (and (not= t :eof) (= (char first) \"))
        sb (StringBuilder.)]
    (when (and (not= t :eof) (not is-quoted)) (.append sb first))
    (loop [closed false]
      (let [[ctype c] (read-next r)]
        (if is-quoted
          (cond
            closed (cond
                     (= ctype :eof) [:eof (.toString sb)]
                     (= c \,) [:separator (.toString sb)]
                     (line-break? c r) [:linebreak (.toString sb)])
            (= ctype :eof) [:error (.toString sb)]
            (= ctype :escaped-quote) (do (.append sb c) (recur false))
            (= c first) (recur true)
            :else (do (.append sb c) (recur false)))
          (cond
            (= ctype :eof) [:eof (.toString sb)]
            (= ctype :escaped-quote) (do (.append sb "\"\"") (recur false))
            (= c \,) [:separator (.toString sb)]
            (line-break? c r) [:linebreak (.toString sb)]
            :else (do (.append sb c) (recur false))))))))

(defn parse-csv
  [^java.io.PushbackReader r]
  (loop [records []]
    (let [
          [st rec] (loop [record []]
                    (let [[state v] (parse-value r)]
                      (cond
                        (= state :error) [state (conj record v)]
                        (= state :eof) [state (conj record v)]
                        (= state :linebreak) [state (conj record v)]
                        :else (recur (conj record v)))))]
      (cond
        (= st :error) (conj records rec)
        (= st :eof) (conj records rec)
        (= st :linebreak) (recur (conj records rec)))
      )
    )
  )

Please review both of them and tell me how to improve them.

\$\endgroup\$
1
  • \$\begingroup\$ "more performant" -- show us an example .csv workload you ran each one against, along with timings please. \$\endgroup\$ Commented Oct 15 at 15:46

0

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.