Parse.ih
Go to the documentation of this file.
1 //
2 // Copyright (c) 2020 Fraunhofer Institute for Applied Information Technology (FIT)
3 // Network Research Group (NET)
4 // Schloss Birlinghoven, 53754 Sankt Augustin, GERMANY
5 // Contact: support@wiback.org
6 //
7 // This file is part of the SENF code tree.
8 // It is licensed under the 3-clause BSD License (aka New BSD License).
9 // See LICENSE.txt in the top level directory for details or visit
10 // https://opensource.org/licenses/BSD-3-Clause
11 //
12 
13 
14 /** \file
15  \brief Parse internal header */
16 
17 #ifndef IH_SENF_Scheduler_Console_Parse_
18 #define IH_SENF_Scheduler_Console_Parse_ 1
19 
20 // Custom includes
21 #include <vector>
22 #include <boost/spirit/include/classic.hpp>
23 #include <boost/spirit/include/classic_grammar_def.hpp>
24 #include <boost/spirit/include/classic_dynamic.hpp>
25 #include <boost/spirit/include/phoenix1.hpp>
26 #include <senf/Utils/Phoenix.hh>
27 
28 //-/////////////////////////////////////////////////////////////////////////////////////////////////
29 
30 namespace senf {
31 namespace console {
32 namespace detail {
33 
34  namespace boost_spirit = ::boost::spirit::classic;
35 
36 #ifndef DOXYGEN
37 
38  struct FilePositionWithIndex
39  : public boost_spirit::file_position
40  {
41  int index;
42 
43  FilePositionWithIndex(std::string const & file_ = std::string(),
44  int line_ = 1, int column_ = 1, int index_ = 0)
45  : boost_spirit::file_position (file_, line_, column_), index (index_)
46  {}
47 
48  bool operator==(const FilePositionWithIndex & fp) const
49  {
50  return boost_spirit::file_position::operator==(fp) && index == fp.index;
51  }
52  };
53 
54  struct PositionOf {
55  template <class A1> struct result { typedef FilePositionWithIndex type; };
56  template <class A1> FilePositionWithIndex operator()(A1 & a1) { return a1.get_position(); }
57  FilePositionWithIndex operator()(char const * a1) { return FilePositionWithIndex(); }
58  };
59 
60  ::phoenix::function<PositionOf> const positionOf;
61 
62  //-/////////////////////////////////////////////////////////////////////////////////////////////
63  // Grammar
64 
65  template <class ParseDispatcher>
66  struct CommandGrammar : boost_spirit::grammar<CommandGrammar<ParseDispatcher> >
67  {
68  //-/////////////////////////////////////////////////////////////////////////////////////////
69  // Start rules
70 
71  enum { CommandParser, SkipParser, ArgumentsParser, PathParser };
72 
73  //-/////////////////////////////////////////////////////////////////////////////////////////
74  // The parse context (variables needed while parsing)
75 
76  typedef Token::TokenType TokenType;
77 
78  struct Context {
79  std::string str;
80  std::vector<Token> path;
81  char ch;
82  Token token;
83  FilePositionWithIndex pos;
84  };
85 
86  Context & context;
87 
88  //-/////////////////////////////////////////////////////////////////////////////////////////
89  // Configuration
90 
91  bool incremental;
92 
93  //-/////////////////////////////////////////////////////////////////////////////////////////
94  // Dispatching semantic actions
95 
96  ParseDispatcher & dispatcher;
97 
98  //-/////////////////////////////////////////////////////////////////////////////////////////
99  // character sets
100 
101  static boost_spirit::chset<> & special_p() {
102  static boost_spirit::chset<> p ("/(){};\"");
103  return p;
104  }
105  static boost_spirit::chset<> & punctuation_p() {
106  static boost_spirit::chset<> p (",=");
107  return p;
108  }
109  static boost_spirit::chset<> & space_p() {
110  static boost_spirit::chset<> p (" \t\n\r");
111  return p;
112  }
113  static boost_spirit::chset<> & invalid_p() {
114  static boost_spirit::chset<> p ((boost_spirit::chset<>('\0') | boost_spirit::chset<>("\x01-\x20")) - space_p() );
115  return p;
116  }
117  static boost_spirit::chset<> & word_p() {
118  static boost_spirit::chset<> p (boost_spirit::anychar_p - special_p() - punctuation_p() - space_p() - invalid_p());
119  return p;
120  }
121  static boost_spirit::distinct_parser<> & keyword_p() {
122  static boost_spirit::distinct_parser<> p (word_p() | boost_spirit::ch_p('/'));
123  return p;
124  }
125 
126  //-/////////////////////////////////////////////////////////////////////////////////////////
127  // Errors
128 
129  enum Errors {
130  EndOfStatementExpected,
131  PathExpected,
132  ClosingParenExpected,
133  QuoteExpected
134  };
135 
136  //-/////////////////////////////////////////////////////////////////////////////////////////
137 
138  CommandGrammar(ParseDispatcher & d, Context & c)
139  : context(c), incremental(false), dispatcher(d) {}
140 
141  template <class Scanner>
142  struct definition
143  : public boost_spirit::grammar_def< boost_spirit::rule<Scanner>,
144  boost_spirit::rule<Scanner>,
145  boost_spirit::rule<Scanner>,
146  boost_spirit::rule<Scanner> >
147  {
148  boost_spirit::rule<Scanner> command, path, argument, word, string, hexstring,
149  word_or_string, token, punctuation, hexbyte, balanced_tokens, simple_argument,
150  complex_argument, builtin, skip, statement, relpath, abspath, arguments,
151  group_start, group_close, statement_end, opt_path;
152 
153  definition(CommandGrammar const & self)
154  {
155  using namespace boost_spirit;
156  using namespace ::phoenix;
157  using namespace senf::phoenix;
158  typedef ParseDispatcher PD;
159 
160  actor< variable< char > > ch_ (self.context.ch);
161  actor< variable< std::string > > str_ (self.context.str);
162  actor< variable< std::vector<Token> > > path_ (self.context.path);
163  actor< variable< Token > > token_ (self.context.token);
164  actor< variable< FilePositionWithIndex > > pos_ (self.context.pos);
165  actor< variable< ParseDispatcher > > d_ (self.dispatcher);
166 
167  assertion<Errors> end_of_statement_expected (EndOfStatementExpected);
168  assertion<Errors> path_expected (PathExpected);
169  assertion<Errors> closing_paren_expected (ClosingParenExpected);
170  assertion<Errors> quote_expected (QuoteExpected);
171 
172  //-/////////////////////////////////////////////////////////////////////////////////
173  // Spirit grammar
174  //
175  // Syntax summary:
176  // This is EBNF with some minor tweaks to accommodate C++ syntax
177  //
178  // * a any number of a's
179  // + a at least one a
180  // ! a an optional a
181  // a >> b a followed by b
182  // a | b a or b
183  // a % b any number of a's separated by b's
184  // a - b a but not b
185  //
186  // Beside this, we use some special parsers (ch_p, eps_p, confix_p, lex_escape_ch_p,
187  // keyword_p, comment_p) and directives (lexeme_d), however, the parser should be
188  // quite readable.
189  //
190  // ch_p match character
191  // eps_p always matches nothing (to attach unconditional actions)
192  // confix_p(a,b,c) match b, preceded by a and terminated by c. Used to parse
193  // string literals and comments
194  // lex_escape_ch_p match a lex style escape char. This is like a C++ style
195  // literal string escape char, however \x will be replaced by 'x'
196  // for any char 'x' if it has no special meaning.
197  // keyword_p match a delimited keyword
198  // comment_p(a,b) match comment starting with a and terminated with b. b
199  // defaults to end-of-line
200  //
201  // lexeme_d don't skip whitespace (as defined by the skip parser)
202  //
203  // Aligned to the right at column 50 are semantic actions.
204  //
205  // For clarity, I have used 'ch_p' explicitly throughout even though it is optional
206  // in most cases.
207  //
208  // More info is in the Boost.Spirit documentation
209 
210  command
211  = builtin >> end_of_statement_expected(statement_end)
212  | group_close
213  | ch_p(';') // Ignore empty commands
214  | statement
215  ;
216 
217  statement
218  = path_expected(path) [ ::phoenix::bind(&PD::beginCommand)(d_, path_) ]
219  >> arguments
220  >> end_of_statement_expected(
221  ( group_start | statement_end )
222  [ ::phoenix::bind(&PD::endCommand)(d_) ]
223  )
224  ;
225 
226  builtin
227  = self.keyword_p()("cd")
228  >> path_expected(path)
229  >> eps_p [ ::phoenix::bind(&PD::builtin_cd)(d_, path_) ]
230  | self.keyword_p()("ls")
231  >> ! path
232  >> eps_p [ ::phoenix::bind(&PD::builtin_ls)(d_, path_) ]
233  | self.keyword_p()("ll")
234  >> ! path
235  >> eps_p [ ::phoenix::bind(&PD::builtin_ll)(d_, path_) ]
236  | self.keyword_p()("lr")
237  >> ! path
238  >> eps_p [ ::phoenix::bind(&PD::builtin_lr)(d_, path_) ]
239  | self.keyword_p()("exit") [ ::phoenix::bind(&PD::builtin_exit)(d_) ]
240  | self.keyword_p()("help")
241  >> ! path
242  >> eps_p [ ::phoenix::bind(&PD::builtin_help)(d_, path_) ]
243  | self.keyword_p()("echo")
244  >> ! arguments
245  >> eps_p [ ::phoenix::bind(&PD::builtin_echo)(d_) ]
246  ;
247 
248  group_start
249  = ch_p('{') [ ::phoenix::bind(&PD::pushDirectory)(d_) ]
250  ;
251 
252  group_close
253  = ch_p('}') [ ::phoenix::bind(&PD::popDirectory)(d_) ]
254  ;
255 
256  arguments
257  = * argument
258  ;
259 
260  argument
261  = simple_argument [ ::phoenix::bind(&PD::pushToken)(d_, token_) ]
262  | balanced_tokens
263  ;
264 
265  simple_argument // All these return their value in context.token
266  = string
267  | hexstring
268  | word
269  ;
270 
271  string // Returns value in context.token
272  = eps_p [ pos_ = positionOf(arg1) ][ clear(str_) ]
273  >> lexeme_d
274  [
275  ch_p('"')
276  >> * ( ( lex_escape_ch_p[ ch_ = arg1 ]
277  - '"'
278  ) [ str_ += ch_ ]
279  )
280  >> quote_expected(ch_p('"'))
281  [ token_ = construct_<Token>(Token::BasicString,
282  str_,
283  pos_) ]
284  ]
285  ;
286 
287  hexstring // Returns value in context.token
288  = eps_p [ pos_ = positionOf(arg1) ][ clear(str_) ]
289  >> "x\""
290  >> * ( hexbyte - ch_p('"') )
291  >> quote_expected(ch_p('"'))
292  [ token_ = construct_<Token>(Token::HexString,
293  str_,
294  pos_) ]
295  ;
296 
297  opt_path
298  = ! path [ ::phoenix::bind(&PD::beginCommand)(d_, path_) ]
299  [ ::phoenix::bind(&PD::endCommand)(d_) ]
300  ;
301 
302  path // Returns value in context.path
303  = eps_p [ clear(path_) ]
304  >> relpath | abspath
305  ;
306 
307  relpath
308  = ( word_or_string [ push_back(path_, token_) ]
309  % +ch_p('/') )
310  >> ( ! (+ch_p('/') ) [ push_back(path_, construct_<Token>()) ] )
311  ;
312 
313  abspath
314  = (+ch_p('/')) [ push_back(path_, construct_<Token>()) ]
315  >> ( relpath
316  | eps_p [ push_back(path_, construct_<Token>()) ] )
317  ;
318 
319  balanced_tokens
320  = eps_p [ pos_ = positionOf(arg1) ]
321  >> ch_p('(') [ token_ = construct_<Token>(
322  Token::ArgumentGroupOpen,
323  "(",
324  pos_) ]
325  [ ::phoenix::bind(&PD::pushToken)(d_, token_) ]
326  >> * token
327  >> eps_p [ pos_ = positionOf(arg1) ]
328  >> closing_paren_expected(ch_p(')'))
329  [ token_ = construct_<Token>(
330  Token::ArgumentGroupClose,
331  ")",
332  pos_) ]
333  [ ::phoenix::bind(&PD::pushToken)(d_, token_) ]
334  ;
335 
336  token
337  = simple_argument [ ::phoenix::bind(&PD::pushToken)(d_, token_) ]
338  | punctuation [ ::phoenix::bind(&PD::pushToken)(d_, token_) ]
339  | balanced_tokens
340  ;
341 
342  punctuation // Returns value in context.str
343  = eps_p [ pos_ = positionOf(arg1) ]
344  >> (
345  ch_p('/') [ token_ = construct_<Token>(
346  Token::PathSeparator,
347  "/") ]
348  | ch_p('{') [ token_ = construct_<Token>(
349  Token::DirectoryGroupOpen,
350  "{") ]
351  | ch_p('}') [ token_ = construct_<Token>(
352  Token::DirectoryGroupClose,
353  "}") ]
354  | ch_p(';') [ token_ = construct_<Token>(
355  Token::CommandTerminator,
356  ";") ]
357  | self.punctuation_p() [ token_ = construct_<Token>(
358  Token::OtherPunctuation,
359  construct_<std::string>(1u, arg1),
360  pos_) ]
361  )
362  ;
363 
364  word // Returns value in context.token
365  = eps_p [ pos_ = positionOf(arg1) ]
366  >> lexeme_d
367  [
368  (+ self.word_p()) [ str_ = construct_<std::string>(arg1, arg2) ]
369  ]
370  >> eps_p [ token_ = construct_<Token>(
371  Token::Word,
372  str_,
373  pos_) ]
374  ;
375 
376  word_or_string
377  = word
378  | string
379  ;
380 
381  hexbyte
382  = uint_parser<char, 16, 2, 2>()
383  [ push_back(str_, arg1) ]
384  ;
385 
386  statement_end
387  = if_p(var(self.incremental)) [
388  ch_p(';')
389  ]
390  .else_p [
391  ch_p(';')
392  | end_p
393  ]
394  ;
395 
396  skip
397  = self.space_p() | comment_p('#')
398  ;
399 
400  //-/////////////////////////////////////////////////////////////////////////////////
401 
402  this->start_parsers(
403  command, // CommandParser
404  skip, // SkipParser
405  arguments, // ArgumentsParser
406  opt_path // PathParser
407  );
408 
409  BOOST_SPIRIT_DEBUG_TRACE_RULE(command,1);
410  BOOST_SPIRIT_DEBUG_TRACE_RULE(path,1);
411  BOOST_SPIRIT_DEBUG_TRACE_RULE(argument,1);
412  BOOST_SPIRIT_DEBUG_TRACE_RULE(word,1);
413  BOOST_SPIRIT_DEBUG_TRACE_RULE(string,1);
414  BOOST_SPIRIT_DEBUG_TRACE_RULE(hexstring,1);
415  BOOST_SPIRIT_DEBUG_TRACE_RULE(token,1);
416  BOOST_SPIRIT_DEBUG_TRACE_RULE(punctuation,1);
417  BOOST_SPIRIT_DEBUG_TRACE_RULE(hexbyte,1);
418  BOOST_SPIRIT_DEBUG_TRACE_RULE(balanced_tokens,1);
419  BOOST_SPIRIT_DEBUG_TRACE_RULE(simple_argument,1);
420  BOOST_SPIRIT_DEBUG_TRACE_RULE(complex_argument,1);
421  BOOST_SPIRIT_DEBUG_TRACE_RULE(builtin,1);
422  BOOST_SPIRIT_DEBUG_TRACE_RULE(commands,1);
423  BOOST_SPIRIT_DEBUG_TRACE_RULE(block,1);
424  BOOST_SPIRIT_DEBUG_TRACE_RULE(statement,1);
425  BOOST_SPIRIT_DEBUG_TRACE_RULE(relpath,1);
426  BOOST_SPIRIT_DEBUG_TRACE_RULE(abspath,1);
427  }
428  };
429  };
430 
431 #endif
432 
433 }}}
434 
435 //-/////////////////////////////////////////////////////////////////////////////////////////////////
436 #endif
437 
438 
439 // Local Variables:
440 // mode: c++
441 // fill-column: 100
442 // comment-column: 40
443 // c-file-style: "senf"
444 // indent-tabs-mode: nil
445 // ispell-local-dictionary: "american"
446 // compile-command: "scons -u test"
447 // End: