TuringSim
C++ framework to simulate abstract computing models
mConfigurationParser.cpp
1 #include <state/mConfiguration/mConfigurationParser.h>
2 
5  std::set<char> symbols {left, right, separator};
6  if(symbols.size() != 3) {
8  "Left, right and separator should be distinct.",
9  left, right, separator, ignore, variablePrefix);
10  }
11  for(char i: ignore) {
12  if(symbols.count(i) > 0) {
14  "Left, right and separator should not be ignore characters.",
15  left, right, separator, ignore, variablePrefix);
16  }
17  }
18  for(char c: variablePrefix) {
19  if(symbols.count(c) > 0) {
21  "variablePrefix should not contain delimiters and separators.",
22  left, right, separator, ignore, variablePrefix);
23  }
24  if(ignore.count(c) > 0) {
26  "variablePrefix should not contain ignore characters",
27  left, right, separator, ignore, variablePrefix);
28  }
29  }
30  }
31 
34  }
35 
36  MConfigurationParser::MConfigurationParser(char left, char right, char separator, std::set<char> ignore, const std::string& variablePrefix)
37  : left(left), right(right), separator(separator), ignore(ignore), variablePrefix(variablePrefix) {
39  }
40 
41  std::shared_ptr<const MConfiguration<std::string>> MConfigurationParser::parse(const std::string& m_fun) const {
42  std::vector<Token> tokens = tokenize(m_fun);
43  translateKeywords(m_fun, tokens);
44  auto [parsed, position] = parse_tokens_m_func(m_fun, tokens, 0);
45 
46  if(position != tokens.size() - 1) {
47  using Utils::Debug::operator<<;
48  std::stringstream ss;
49  ss << "parsed: " << parsed;
50  throw MConfigurationParserSyntaxErrorException("Extra token at the end.", m_fun, tokens, position, ss.str());
51  }
52  else if(std::get<0>(tokens[position]) != TokenKind::EOS) {
53  using Utils::Debug::operator<<;
54  std::stringstream ss;
55  ss << "parsed: " << parsed;
56  throw MConfigurationParserSyntaxErrorException("Non terminated token stream." , m_fun, tokens, position, ss.str());
57  }
58 
59  return parsed;
60  }
61 
62  MConfigurationParser::Token MConfigurationParser::tokenize(const std::string& m_fun, size_t start) const {
63  for(; start < m_fun.size() && ignore.count(m_fun[start]) != 0; ++start)
64  ;
65  if(start == m_fun.size())
66  return {TokenKind::EOS, start, start};
67  if(m_fun[start] == left)
68  return {TokenKind::LEFT, start, start};
69  if(m_fun[start] == right)
70  return {TokenKind::RIGHT, start, start};
71  if(m_fun[start] == separator)
72  return {TokenKind::SEPARATOR, start, start};
73  size_t end;
74  for(end = start; end < m_fun.size() && ignore.count(m_fun[end]) == 0 && m_fun[end] != left && m_fun[end] != right && m_fun[end] != separator; ++end)
75  ;
76  return {TokenKind::IDENT, start, end - 1};
77  }
78 
79  std::vector<MConfigurationParser::Token> MConfigurationParser::tokenize(const std::string& m_fun) const {
80  std::vector<Token> tokens;
81  size_t start = 0;
82  while(true) {
83  Token t = tokenize(m_fun, start);
84  tokens.push_back(t);
85  if(std::get<0>(t) == TokenKind::EOS)
86  break;
87  start = std::get<2>(t) + 1;
88  }
89  return tokens;
90  }
91 
92  std::optional<MConfigurationParser::Token> MConfigurationParser::translateKeyword(const std::string& pattern, const Token& token) const {
93  if(std::get<TokenKind>(token) != TokenKind::IDENT)
94  return std::optional<Token>(std::nullopt);
95  std::string name = sub(pattern, token);
96 
97  if(name == "blank")
98  return std::make_optional(std::make_tuple(TokenKind::BLANK, std::get<1>(token), std::get<2>(token)));
99  return std::optional<Token>(std::nullopt);
100  }
101 
102  void MConfigurationParser::translateKeywords(const std::string& pattern, std::vector<Token>& tokens) const {
103  for(size_t i = 0; i < tokens.size(); ++i) {
104  const Token& token = tokens[i];
105  std::optional<Token> new_token = translateKeyword(pattern, token);
106  if(new_token)
107  tokens[i] = new_token.value();
108  }
109  }
110 
111  std::string MConfigurationParser::sub(const std::string& m_fun, const MConfigurationParser::Token& token) const {
112  size_t begin, end;
113  std::tie(std::ignore, begin, end) = token;
114  size_t len = end - begin + 1;
115  return m_fun.substr(begin, len);
116  }
117 
118  std::pair<bool, std::string> MConfigurationParser::isVariableNode(const std::string& node) const {
119  size_t nodeLen = variablePrefix.size();
120  size_t prefixLen = variablePrefix.size();
121  if(nodeLen < prefixLen) {
122  return {false, node};
123  }
124  if(variablePrefix == node.substr(0, prefixLen)) {
125  return {true, node.substr(prefixLen)};
126  }
127  return {false, node};
128  }
129 
130  std::pair<std::shared_ptr<const MConfiguration<std::string>>, size_t> MConfigurationParser::parse_tokens_m_func(
131  const std::string& m_fun,
132  const std::vector<Token>& tokens,
133  size_t position) const {
134  using namespace std::string_literals;
135  size_t l = tokens.size();
136  if(position >= l) {
137  throw MConfigurationParserSyntaxErrorException("Reached end of string. This should not happen.", m_fun, tokens, position);
138  }
139 
140  std::string node;
141  bool explicitBlank = false;
142 
143  switch(std::get<0>(tokens[position])) {
144  case TokenKind::IDENT:
145  node = sub(m_fun, tokens[position]);
146  break;
147  case TokenKind::BLANK:
148  node = "";
149  explicitBlank = true;
150  break;
151  case TokenKind::LEFT:
152  case TokenKind::RIGHT:
154  case TokenKind::EOS:
155  throw MConfigurationParserSyntaxErrorException("M-configuration should start with an identifier.", m_fun, tokens, position);
156  }
157 
158  position++;
159 
160  std::function<std::shared_ptr<const MConfiguration<std::string>>(std::string)> make_leaf =
161  [&](const std::string& node_str) -> std::shared_ptr<const MConfiguration<std::string>> {
162  if(!explicitBlank && node_str.empty()) {
163  throw MConfigurationParserSyntaxErrorException("Token shouldn't be empty.", m_fun, tokens, position);
164  }
165  auto [isVariable, nodeName] = isVariableNode(node_str);
166  return std::make_shared<const MConfiguration<std::string>>(nodeName, isVariable);
167  };
168 
169  switch(std::get<0>(tokens[position])) {
170  case TokenKind::EOS:
171  return {make_leaf(node), position};
172  case TokenKind::IDENT:
173  case TokenKind::BLANK: {
174  throw MConfigurationParserSyntaxErrorException("Expected a left or right parenthesis or a delimiter after a node name.", m_fun, tokens, position);
175  }
177  case TokenKind::RIGHT:
178  return {make_leaf(node), position};
179  case TokenKind::LEFT:
180  std::vector<std::shared_ptr<const MConfiguration<std::string>>> params;
181  position++;
182  std::tie(params, position) = parse_tokens_m_func_list(m_fun, tokens, position);
183  switch(std::get<0>(tokens[position])){
184  case TokenKind::RIGHT:
185  return {std::make_shared<const MConfiguration<std::string>>(node, params), position + 1};
186  case TokenKind::LEFT:
187  case TokenKind::IDENT:
188  case TokenKind::BLANK:
190  case TokenKind::EOS: {
191  throw MConfigurationParserSyntaxErrorException("Expected right parenthesis after a list of parameters.", m_fun, tokens, position);
192 
193  }
194  }
195  }
196  }
197 
198  std::pair<std::vector<std::shared_ptr<const MConfiguration<std::string>>>, size_t> MConfigurationParser::parse_tokens_m_func_list(const std::string& m_fun, const std::vector<Token>& tokens, size_t position) const {
199  std::vector<std::shared_ptr<const MConfiguration<std::string>>> params;
200 
201  while(true) {
202  auto [param, new_position] = parse_tokens_m_func(m_fun, tokens, position);
203 
204  std::function<MConfigurationParserSyntaxErrorException(const std::string&)> make_exception =
205  [&, new_position = new_position](const std::string& message) -> MConfigurationParserSyntaxErrorException {
206  using Utils::Debug::operator<<;
207  std::stringstream ss;
208  ss << "new_position: " << new_position;
209  return MConfigurationParserSyntaxErrorException(message, m_fun, tokens, position, ss.str());
210  };
211 
212  position = new_position;
213  params.push_back(param);
214 
215  if(std::get<0>(tokens[position]) == TokenKind::IDENT) {
216  throw make_exception("Expected delimiter or right parenthesis, got an identifier");
217  }
218  if(std::get<0>(tokens[position]) == TokenKind::LEFT) {
219  throw make_exception("Expected delimiter or right parenthesis, got a left parenthesis");
220  }
221  if(std::get<0>(tokens[position]) == TokenKind::EOS) {
222  throw make_exception("Expected delimiter or right parenthesis, got end of string");
223  }
224  if(std::get<0>(tokens[position]) == TokenKind::RIGHT) {
225  break;
226  }
227  if(std::get<0>(tokens[position]) == TokenKind::SEPARATOR) {
228  position++;
229  }
230  }
231 
232  return {params, position};
233  }
234 }
TuringSim::State::MConfiguration::MConfigurationParser::translateKeywords
void translateKeywords(const std::string &mConfiguration, std::vector< Token > &tokens) const
Translate a full sequence of tokens.
Definition: mConfigurationParser.cpp:102
TuringSim::State::MConfiguration::MConfigurationParser::checkInitialization
void checkInitialization() const
Check if parameters of the parser are consistent. This function is called by constructors.
Definition: mConfigurationParser.cpp:4
TuringSim::State::MConfiguration::MConfigurationParser::MConfigurationParser
MConfigurationParser()
Builds MConfigurationParser with default parameters.
Definition: mConfigurationParser.cpp:32
TuringSim::State::MConfiguration::MConfigurationParser::TokenKind::LEFT
@ LEFT
The left parenthesis.
TuringSim::State::MConfiguration::MConfigurationParser::TokenKind::EOS
@ EOS
The token at the end of the string.
TuringSim::State::MConfiguration::MConfigurationParserSyntaxErrorException
Exception launched when we try to parse a string with a syntax error as a MConfiguration<std::string>...
Definition: mConfigurationParser.h:183
TuringSim::State::MConfiguration::MConfigurationParser::TokenKind::IDENT
@ IDENT
Node names.
TuringSim::State::MConfiguration::MConfigurationParser::translateKeyword
std::optional< Token > translateKeyword(const std::string &mConfiguration, const Token &token) const
Translate a single token by recognizing keyword.
Definition: mConfigurationParser.cpp:92
TuringSim::State::MConfiguration::MConfigurationParser::TokenKind::BLANK
@ BLANK
The keyword "blank" for blank symbol.
TuringSim::State::MConfiguration::MConfigurationParser::parse_tokens_m_func_list
std::pair< std::vector< std::shared_ptr< const MConfiguration< std::string > > >, size_t > parse_tokens_m_func_list(const std::string &mConfiguration, const std::vector< Token > &tokens, size_t start) const
Parse a SEPARATOR-separated list of m-configuration starting at position start.
Definition: mConfigurationParser.cpp:198
TuringSim::State::MConfiguration::MConfigurationParser::parse_tokens_m_func
std::pair< std::shared_ptr< const MConfiguration< std::string > >, size_t > parse_tokens_m_func(const std::string &mConfiguration, const std::vector< Token > &tokens, size_t start) const
Parse a m-configuration starting at position start.
Definition: mConfigurationParser.cpp:130
TuringSim::State::MConfiguration::MConfigurationParser::TokenKind::RIGHT
@ RIGHT
The right parenthesis.
TuringSim::State::MConfiguration::MConfigurationParser::isVariableNode
std::pair< bool, std::string > isVariableNode(const std::string &node) const
Test is a raw node name is a variable name. A node name is a variable name if it starts with the vari...
Definition: mConfigurationParser.cpp:118
TuringSim::State::MConfiguration::MConfigurationParser::Token
std::tuple< TokenKind, size_t, size_t > Token
Type of tokens: the first component is the category, the second is the starting character,...
Definition: mConfigurationParser.h:38
TuringSim::State::MConfiguration::MConfigurationParser::tokenize
Token tokenize(const std::string &mConfiguration, size_t start) const
Lex a single token.
Definition: mConfigurationParser.cpp:62
TuringSim::State::MConfiguration::MConfigurationParser::TokenKind::SEPARATOR
@ SEPARATOR
Separator of terms.
TuringSim::State::MConfiguration::MConfigurationParserBadInitializationException
Exception thrown when the special characters of the parser are not consistent.
Definition: mConfigurationParser.h:248
TuringSim::State::MConfiguration::MConfigurationParser::sub
std::string sub(const std::string &mConfiguration, const Token &token) const
Get the text of a token.
Definition: mConfigurationParser.cpp:111
TuringSim::State::MConfiguration
The namespace of everything about m-configurations.
TuringSim::State::MConfiguration::MConfigurationParser::parse
std::shared_ptr< const MConfiguration< std::string > > parse(const std::string &mConfiguration) const
Parse a string into a m-configuration. The usual entry point.
Definition: mConfigurationParser.cpp:41