-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconfig_parse.clj
132 lines (122 loc) · 4.88 KB
/
config_parse.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
(ns ^{:doc "Parse a pipeline configruation. This namespace supports a simple
DSL for parsing a pipeline configuration (see [[zensols.nlparse.config]]). The
*configuration string* represents is a component separated by commas as a set
of *forms*. For example the forms:
```
zensols.nlparse.config/tokenize(\"en\"),zensols.nlparse.config/sentence,part-of-speech(\"english.tagger\"),zensols.nlparse.config/morphology
```
creates a pipeline that tokenizes, adds POS and lemmas when called
with [[parse]]. Note the double quotes in the `tokenize` and `part-of-speech`
mnemonics. The [[parse]] function does this by calling in order:
* ([[zensols.nlparse.config/tokenize]] \"en\")
* ([[zensols.nlparse.config/sentence]])
* ([[zensols.nlparse.config/part-of-speech]] \"english.tagger\")
* ([[zensols.nlparse.config/morphology]])
Soem configuration functions are parameterized by positions or maps.
Positional functions are shown in the above example and a map configuration
follows:
```
parse-tree({:use-shift-reduce? true :maxtime 1000})
```
which creates a shift reduce parser that times out after a second (per
sentence).
Note that arguments are option (the parenthetical portion of the form) and so
is the namespace, which defaults to `zensols.nlparse.config`. To use a
separate namespace for custom plug and play To use a separate namespace for
custom plug and play
components (see [[zensols.nlparse.config/register-library]]) you can specify
your own namespace with a `/`, for example:
```
example.namespace/myfunc(arg1,arg2)
```"
:author "Paul Landes"}
zensols.nlparse.config-parse
(:require [clojure.string :as s]
[clojure.tools.logging :as log])
(:require [instaparse.core :as insta]))
(def ^:private conf-bnf-fn
"Generated DSL parser."
(insta/parser
"forms = form (',' form)*
form = (namespace '/')? func params?
params = '(' param-list ')'
param-list = arg+{',' arg}
arg = #\"[^,)]+\"
func = #\"[a-zA-Z0-9-]+\"
namespace = #\"[a-zA-Z0-9-.]+\""))
(def ^:private config-ns
'zensols.nlparse.config)
(defn to-forms
"Parse a configuration string into DSL forms."
[config-str]
(conf-bnf-fn config-str))
(defn- parse-to-map
"Flatten vector argument DSL forms into something more Clojure 'friendly',
which is a map of arguments or single argument."
[formo & {:keys [seq?]}]
(->> formo
(filter sequential?)
(map rest)
(map #(filter sequential? %))
(map (fn [form]
(if seq?
(map second form)
(->> form
(map (fn [form]
(if (> (count form) 2)
(rest form)
(second form))))
(zipmap (map first form))))))))
(defn to-metadata
"Create form metadata data structures from configuration string
**config-str**."
[config-str]
(let [forms (to-forms config-str)]
(if (insta/failure? forms)
(throw (ex-info (str "Could not parse: '" config-str \')
{:failure forms
:config config-str})))
(->> (parse-to-map forms)
(map #(assoc % :params (first (parse-to-map (:params %) :seq? true)))))))
(defn- find-function [namespaces func-sym]
(->> namespaces
;; ditch bad namespaces to create a better exception downstream
(map find-ns)
(remove nil?)
(map ns-name)
(map (fn [ns]
(ns-resolve ns func-sym)))
(drop-while nil?)
first))
(defn parse
"Parse configuration string **config-str** into a pipeline configuration.
See the namespace ([[zensols.nlparse.config-parse]]) documentation for more
information."
([config-str] (parse config-str nil))
([config-str namespaces]
(letfn [(validate [cfn namespace func]
(if (nil? cfn)
(-> (str" No such component: "
(if namespace (str namespace "/"))
func)
(ex-info {:namespace namespace
:func func})
throw))
cfn)]
(->> (to-metadata config-str)
(map (fn [{:keys [namespace func params] :as meta}]
(try
(let [args (map read-string params)
nssym (if namespace (symbol namespace))]
(if nssym (eval (list 'require `(quote [~nssym]))))
(-> (concat (if namespace
(list nssym)
namespaces))
(find-function (symbol func))
(validate namespace func)
(apply args)))
(catch Exception e
(-> (format "Cannot parse (%s/%s %s): %s"
namespace func (pr-str params) (.toString e))
(ex-info {:meta meta} e)
throw)))))))))