TuringSim
C++ framework to simulate abstract computing models
turingStyleMixedSymbolPatternParser.cpp
1 #include <utils/messageException.h>
2 #include <symbol/turingStyleMixedSymbolPatternParser.h>
3 
4 namespace TuringSim::Symbol {
6  std::set<char> symbols{left, right, separator};
7  if(symbols.size() != 3) {
9  "Left, right and separator should be distinct",
10  left, right, separator, blank, ignore, variablePrefix, escapePrefix);
11  }
12  for(char i: ignore) {
13  if(symbols.count(i) > 0) {
15  "Left, right and separator should not be ignore characters",
16  left, right, separator, blank, ignore, variablePrefix, escapePrefix);
17  }
18  }
19  for(char c: variablePrefix) {
20  if(symbols.count(c) > 0) {
22  "variablePrefix should not contain delimiters and separators",
23  left, right, separator, blank, ignore, variablePrefix, escapePrefix);
24  }
25  if(ignore.count(c) > 0) {
27  "variablePrefix should not contain ignore characters",
28  left, right, separator, blank, ignore, variablePrefix, escapePrefix);
29  }
30  }
31  if(variablePrefix.empty()) {
33  "variablePrefix should not be empty",
34  left, right, separator, blank, ignore, variablePrefix, escapePrefix);
35  }
36  for(char c: escapePrefix) {
37  if(symbols.count(c) > 0) {
39  "escapePrefix should not contain delimiters and separators",
40  left, right, separator, blank, ignore, variablePrefix, escapePrefix);
41  }
42  if(ignore.count(c) > 0) {
44  "escapePrefix should not contain ignore characters",
45  left, right, separator, blank, ignore, variablePrefix, escapePrefix);
46  }
47  }
48  if(escapePrefix.empty()) {
50  "escapePrefix should not be empty",
51  left, right, separator, blank, ignore, variablePrefix, escapePrefix);
52  }
53  if(variablePrefix.substr(0, std::min(variablePrefix.size(), escapePrefix.size())) == escapePrefix.substr(0, std::min(variablePrefix.size(), escapePrefix.size()))) {
55  "variablePrefix should not be a prefix of escapePrefix, or conversely.",
56  left, right, separator, blank, ignore, variablePrefix, escapePrefix);
57  }
58 
59  }
60 
63  }
64 
65  TuringStyleMixedSymbolPatternParser::TuringStyleMixedSymbolPatternParser(char left, char right, char separator, std::string blank, std::set<char> ignore, std::string variablePrefix, std::string escapePrefix) :
66  left(left), right(right), separator(separator), blank(blank), ignore(ignore), variablePrefix(variablePrefix), escapePrefix(escapePrefix) {
68  }
69 
71  std::vector <Token> tokens = tokenize(pattern);
72  translateKeywords(pattern, tokens);
73 
74  if(tokens.size() == 1 && std::get<TokenKind>(tokens[0]) == TokenKind::EOS) {
76  }
77  if(tokens.size() == 2 && std::get<TokenKind>(tokens[0]) == TokenKind::ANY &&
78  std::get<TokenKind>(tokens[1]) == TokenKind::EOS) {
80  }
81  if(tokens.size() == 2 && std::get<TokenKind>(tokens[0]) == TokenKind::TRUE &&
82  std::get<TokenKind>(tokens[1]) == TokenKind::EOS) {
84  }
85 
86  typedef std::function<std::pair<SymbolPattern, size_t>(void)> parser;
87  parser parse_pattern =
88  (std::get<TokenKind>(tokens[0]) == TokenKind::NON)
89  ? static_cast<parser>([this, &pattern, &tokens = std::as_const(
90  tokens)]() { return parse_negative_pattern(pattern, tokens, 1); })
91  : static_cast<parser>([this, &pattern, &tokens = std::as_const(
92  tokens)]() { return parse_positive_pattern(pattern, tokens, 0); });
93 
94  auto[parsed, position] = parse_pattern();
95 
96  if(position != tokens.size() - 1) {
97  using Utils::Debug::operator<<;
98  std::stringstream ss;
99  ss << "parsed:" << parsed;
100  throw TuringStyleMixedSymbolPatternSyntaxErrorException("Extra token at the end", pattern, tokens, position, ss.str());
101  }
102  else if(std::get<0>(tokens[position]) != TokenKind::EOS) {
103  using Utils::Debug::operator<<;
104  std::stringstream ss;
105  ss << "parsed:" << parsed;
106  throw TuringStyleMixedSymbolPatternSyntaxErrorException("Non terminated token stream", pattern, tokens, position, ss.str());
107  }
108 
109  return parsed;
110  }
111 
112  std::pair<bool, std::string> TuringStyleMixedSymbolPatternParser::isVariableNode(const std::string &node) const {
113  size_t nodeLen = variablePrefix.size();
114  size_t prefixLen = variablePrefix.size();
115  if(nodeLen < prefixLen) {
116  return {false, node};
117  }
118  if(variablePrefix == node.substr(0, prefixLen)) {
119  return {true, node.substr(prefixLen)};
120  }
121  return {false, node};
122  }
123 
124  std::pair<TuringStyleMixedSymbolPatternParser::SymbolPattern, size_t> TuringStyleMixedSymbolPatternParser::parse_negative_pattern(const std::string &pattern, const std::vector <Token> &tokens, size_t position) const {
125  std::vector <std::string> symbols;
126  std::vector <std::string> keys;
127 
128  std::function<void()> push_ident = [this, &pattern, &tokens, &keys, &symbols, &position]() {
129  std::string ident = sub(pattern, tokens[position]);
130  auto[isVar, content] = isVariableNode(ident);
131  if(isVar) {
132  keys.push_back(content);
133  } else {
134  symbols.push_back(content);
135  }
136  };
137 
138  std::function<TuringStyleMixedSymbolPatternSyntaxErrorException(const std::string&)> make_exception =
139  [&](const std::string &message) -> TuringStyleMixedSymbolPatternSyntaxErrorException {
140  using Utils::Debug::operator<<;
141  std::stringstream ss;
142  ss << "symbols: " << Utils::Debug::debug(symbols) << std::endl;
143  ss << " keys: " << Utils::Debug::debug(keys);
144  return TuringStyleMixedSymbolPatternSyntaxErrorException(message, pattern, tokens, position, ss.str());
145  };
146 
147  if(std::get<TokenKind>(tokens[position]) != TokenKind::LEFT) {
148  throw make_exception("Expected left parenthesis.");
149  }
150  position++;
151 
152  if(std::get<TokenKind>(tokens[position]) == TokenKind::IDENT)
153  push_ident();
154  else if(std::get<TokenKind>(tokens[position]) == TokenKind::NONE)
155  symbols.push_back(blank);
156  else {
157  std::stringstream ss;
158  ss << "Expected first identifier or none in negative pattern. Got " << tokens[position];
159  throw make_exception(ss.str());
160  }
161 
162  position++;
163 
164  while (std::get<TokenKind>(tokens[position]) != TokenKind::RIGHT &&
165  std::get<TokenKind>(tokens[position]) != TokenKind::EOS) {
166  if(std::get<TokenKind>(tokens[position]) != TokenKind::SEPARATOR)
167  throw make_exception("Expected separator.");
168  position++;
169  if(std::get<TokenKind>(tokens[position]) == TokenKind::IDENT)
170  push_ident();
171  else if(std::get<TokenKind>(tokens[position]) == TokenKind::NONE)
172  symbols.push_back(blank);
173  else
174  throw make_exception("Expected identifier or none after a separator in a negative pattern.");
175  position++;
176  }
177 
178  if(std::get<TokenKind>(tokens[position]) != TokenKind::RIGHT)
179  throw make_exception("Expected right parenthesis.");
180 
181  position++;
182  if(std::get<TokenKind>(tokens[position]) != TokenKind::EOS)
183  throw make_exception("Expected EOS.");
184 
185  if(symbols.empty() && keys.empty())
186  throw make_exception("This should not happen. At least one identifier is required.");
187 
188  return std::make_pair(
190  std::set < std::string > {symbols.begin(), symbols.end()},
191  std::set < std::string > {keys.begin(), keys.end()},
192  true
193  },
194  position);
195  }
196 
197  std::pair<TuringStyleMixedSymbolPatternParser::SymbolPattern, size_t> TuringStyleMixedSymbolPatternParser::parse_positive_pattern(const std::string &pattern, const std::vector <Token> &tokens, size_t position) const {
198  std::vector <std::string> symbols;
199  std::vector <std::string> keys;
200 
201  std::function<void()> push_ident = [this, &pattern, &tokens, &keys, &symbols, &position]() {
202  std::string ident = sub(pattern, tokens[position]);
203  auto[isVar, content] = isVariableNode(ident);
204  if(isVar) {
205  keys.push_back(content);
206  } else {
207  symbols.push_back(content);
208  }
209  };
210 
211  std::function<TuringStyleMixedSymbolPatternSyntaxErrorException(const std::string&)> make_exception =
212  [&](const std::string &message) -> TuringStyleMixedSymbolPatternSyntaxErrorException {
213  using Utils::Debug::operator<<;
214  std::stringstream ss;
215  ss << message << std::endl;
216  ss << "symbols: " << Utils::Debug::debug(symbols) << std::endl;
217  ss << " keys: " << Utils::Debug::debug(keys);
218  return TuringStyleMixedSymbolPatternSyntaxErrorException(message, pattern, tokens, position, ss.str());
219  };
220 
221  if(std::get<TokenKind>(tokens[position]) == TokenKind::IDENT)
222  push_ident();
223  else if(std::get<TokenKind>(tokens[position]) == TokenKind::NONE)
224  symbols.push_back(blank);
225  else {
226  std::stringstream ss;
227  ss << "Expected first identifier or none in positive pattern. Got " << tokens[position];
228  throw make_exception(ss.str());
229  }
230 
231  position++;
232 
233  while (std::get<TokenKind>(tokens[position]) != TokenKind::EOS) {
234  if(std::get<TokenKind>(tokens[position]) != TokenKind::SEPARATOR)
235  throw make_exception("Expected separator.");
236  position++;
237  if(std::get<TokenKind>(tokens[position]) == TokenKind::IDENT)
238  push_ident();
239  else if(std::get<TokenKind>(tokens[position]) == TokenKind::NONE)
240  symbols.push_back(blank);
241  else
242  throw make_exception("Expected identifier or none after separator in positive pattern.");
243  position++;
244  }
245 
246  if(std::get<TokenKind>(tokens[position]) != TokenKind::EOS)
247  throw make_exception("Expected EOS.");
248 
249  if(symbols.empty() && keys.empty())
250  throw make_exception("This should not happen. At least one identifier is required.");
251 
252 
253  return std::make_pair(
255  std::set < std::string > {symbols.begin(), symbols.end()},
256  std::set < std::string > {keys.begin(), keys.end()},
257  false
258  },
259  position);
260  }
261 
262  std::string TuringStyleMixedSymbolPatternParser::sub(const std::string &pattern, const TuringStyleMixedSymbolPatternParser::Token &token) const {
263  size_t begin, end;
264  std::tie(std::ignore, begin, end) = token;
265  size_t len = end - begin + 1;
266  return pattern.substr(begin, len);
267  }
268 
269  std::vector<TuringStyleMixedSymbolPatternParser::Token> TuringStyleMixedSymbolPatternParser::tokenize(const std::string &pattern) const {
270  std::vector <Token> tokens;
271  size_t start = 0;
272  while (true) {
273  Token t = tokenize(pattern, start);
274  tokens.push_back(t);
275  if(std::get<0>(t) == TokenKind::EOS)
276  break;
277  start = std::get<2>(t) + 1;
278  }
279  return tokens;
280  }
281 
283  for(; start < pattern.size() && ignore.count(pattern[start]) != 0; ++start);
284  if(start == pattern.size())
285  return {TokenKind::EOS, start, start};
286  if(pattern[start] == left)
287  return {TokenKind::LEFT, start, start};
288  if(pattern[start] == right)
289  return {TokenKind::RIGHT, start, start};
290  if(pattern[start] == separator)
291  return {TokenKind::SEPARATOR, start, start};
292  size_t end;
293  for(end = start; end < pattern.size() && ignore.count(pattern[end]) == 0 && pattern[end] != left &&
294  pattern[end] != right && pattern[end] != separator; ++end);
295  return {TokenKind::IDENT, start, end - 1};
296  }
297 
298  std::optional<TuringStyleMixedSymbolPatternParser::Token> TuringStyleMixedSymbolPatternParser::translateKeyword(const std::string &pattern, const TuringStyleMixedSymbolPatternParser::Token &token) const {
299  if(std::get<TokenKind>(token) != TokenKind::IDENT)
300  return std::optional<Token>(std::nullopt);
301 
302  std::string name = sub(pattern, token);
303  if(name.size() >= escapePrefix.size() && name.substr(0, escapePrefix.size()) == escapePrefix) {
304  return std::make_optional(std::make_tuple(TokenKind::IDENT, std::get<1>(token) + escapePrefix.size(), std::get<2>(token)));
305  }
306 
307  if(name == "none")
308  return std::make_optional(std::make_tuple(TokenKind::NONE, std::get<1>(token), std::get<2>(token)));
309  if(name == "non")
310  return std::make_optional(std::make_tuple(TokenKind::NON, std::get<1>(token), std::get<2>(token)));
311  if(name == "any")
312  return std::make_optional(std::make_tuple(TokenKind::ANY, std::get<1>(token), std::get<2>(token)));
313  if(name == "true")
314  return std::make_optional(std::make_tuple(TokenKind::TRUE, std::get<1>(token), std::get<2>(token)));
315  return std::optional<Token>(std::nullopt);
316  }
317 
318  void TuringStyleMixedSymbolPatternParser::translateKeywords(const std::string &pattern, std::vector<Token> &tokens) const {
319  for(size_t i = 0; i < tokens.size(); ++i) {
320  const Token &token = tokens[i];
321  std::optional <Token> new_token = translateKeyword(pattern, token);
322  if(new_token)
323  tokens[i] = new_token.value();
324  }
325  }
326 }
TuringSim::Symbol::TuringStyleMixedSymbolPatternSyntaxErrorException
Exception launched when we try to parse a string with a syntax error as a MConfiguration<std::string>...
Definition: turingStyleMixedSymbolPatternParser.h:300
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TokenKind::EOS
@ EOS
The token at the end of the string.
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TokenKind::ANY
@ ANY
"any" keyword.
TuringSim::Symbol
The namespace for symbol patterns.
Definition: simpleSymbolPattern.h:5
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::sub
std::string sub(const std::string &pattern, const TuringStyleMixedSymbolPatternParser::Token &token) const
Get the text of a token.
Definition: turingStyleMixedSymbolPatternParser.cpp:262
TuringSim::Symbol::TuringStyleMixedSymbolPatternParserBadInitializationException
Exception thrown when the special characters of a SymbolPatternParser are not consistent.
Definition: turingStyleMixedSymbolPatternParser.h:222
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TokenKind::TRUE
@ TRUE
"true" keyword.
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TokenKind::IDENT
@ IDENT
Symbols.
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::isVariableNode
std::pair< bool, std::string > isVariableNode(const std::string &node) const
Test is a raw node name is a variable name. A node name is a variable name if it starts with the vari...
Definition: turingStyleMixedSymbolPatternParser.cpp:112
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::Token
std::tuple< TokenKind, size_t, size_t > Token
Type of tokens: the first component is the category, the second is the starting character,...
Definition: turingStyleMixedSymbolPatternParser.h:56
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TokenKind::RIGHT
@ RIGHT
Right parenthesis.
TuringSim::Symbol::TuringStyleMixedSymbolPattern< std::string >
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::parse
SymbolPattern parse(const std::string &pattern) const
Parse a string into a symbol pattern. The usual entry point.
Definition: turingStyleMixedSymbolPatternParser.cpp:70
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::parse_negative_pattern
std::pair< SymbolPattern, size_t > parse_negative_pattern(const std::string &pattern, const std::vector< Token > &tokens, size_t position) const
Parse a negative list of symbols.
Definition: turingStyleMixedSymbolPatternParser.cpp:124
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TuringStyleMixedSymbolPatternParser
TuringStyleMixedSymbolPatternParser()
Builds TuringStyleMixedSymbolPatternParser with default parameters.
Definition: turingStyleMixedSymbolPatternParser.cpp:61
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TokenKind::NON
@ NON
"non" keyword.
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TokenKind::LEFT
@ LEFT
Left parenthesis.
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::parse_positive_pattern
std::pair< SymbolPattern, size_t > parse_positive_pattern(const std::string &pattern, const std::vector< Token > &tokens, size_t position) const
Parse a positive list of symbols.
Definition: turingStyleMixedSymbolPatternParser.cpp:197
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TokenKind::NONE
@ NONE
"none" keyword.
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::checkInitialization
void checkInitialization() const
Check if parameters of the parser are consistent. This function is called by constructors.
Definition: turingStyleMixedSymbolPatternParser.cpp:5
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::translateKeyword
std::optional< Token > translateKeyword(const std::string &pattern, const TuringStyleMixedSymbolPatternParser::Token &token) const
Translate a single token by recognizing keyword.
Definition: turingStyleMixedSymbolPatternParser.cpp:298
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::tokenize
std::vector< Token > tokenize(const std::string &pattern) const
Fully lex a string, without recognizing keyword.
Definition: turingStyleMixedSymbolPatternParser.cpp:269
TuringSim::Utils::Debug::debug
std::function< std::basic_ostream< CharT, Traits > &(std::basic_ostream< CharT, Traits > &)> debug(const T &s)
Generic debug printing function.
Definition: printer.h:34
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::TokenKind::SEPARATOR
@ SEPARATOR
Separator of symbols, usually, the comma.
TuringSim::Symbol::TuringStyleMixedSymbolPatternParser::translateKeywords
void translateKeywords(const std::string &pattern, std::vector< Token > &tokens) const
Translate a full sequence of tokens.
Definition: turingStyleMixedSymbolPatternParser.cpp:318