Eclipse SUMO - Simulation of Urban MObility
Loading...
Searching...
No Matches
StringUtils.cpp
Go to the documentation of this file.
1/****************************************************************************/
2// Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
3// Copyright (C) 2001-2023 German Aerospace Center (DLR) and others.
4// This program and the accompanying materials are made available under the
5// terms of the Eclipse Public License 2.0 which is available at
6// https://www.eclipse.org/legal/epl-2.0/
7// This Source Code may also be made available under the following Secondary
8// Licenses when the conditions for such availability set forth in the Eclipse
9// Public License 2.0 are satisfied: GNU General Public License, version 2
10// or later which is available at
11// https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13/****************************************************************************/
21// Some static methods for string processing
22/****************************************************************************/
23#include <config.h>
24
25#include <string>
26#include <iostream>
27#include <cstdio>
28#include <cstring>
29#include <regex>
30#ifdef WIN32
31#define NOMINMAX
32#include <windows.h>
33#undef NOMINMAX
34#else
35#include <unistd.h>
36#endif
37#include <xercesc/util/TransService.hpp>
38#include <xercesc/util/TranscodingException.hpp>
42#include "StringUtils.h"
43
44
45// ===========================================================================
46// static member definitions
47// ===========================================================================
48std::string StringUtils::emptyString;
49XERCES_CPP_NAMESPACE::XMLLCPTranscoder* StringUtils::myLCPTranscoder = nullptr;
50
51
52// ===========================================================================
53// method definitions
54// ===========================================================================
55std::string
56StringUtils::prune(const std::string& str) {
57 const std::string::size_type endpos = str.find_last_not_of(" \t\n\r");
58 if (std::string::npos != endpos) {
59 const int startpos = (int)str.find_first_not_of(" \t\n\r");
60 return str.substr(startpos, endpos - startpos + 1);
61 }
62 return "";
63}
64
65
66std::string
67StringUtils::pruneZeros(const std::string& str, int max) {
68 const std::string::size_type endpos = str.find_last_not_of("0");
69 if (endpos != std::string::npos && str.back() == '0') {
70 std::string res = str.substr(0, MAX2((int)str.size() - max, (int)endpos + 1));
71 return res;
72 }
73 return str;
74}
75
76std::string
77StringUtils::to_lower_case(const std::string& str) {
78 std::string s = str;
79 std::transform(s.begin(), s.end(), s.begin(), [](char c) {
80 return (char)::tolower(c);
81 });
82 return s;
83}
84
85
86std::string
88 // inspired by http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
89 std::string result;
90 for (const auto& c : str) {
91 const unsigned char uc = (unsigned char)c;
92 if (uc < 128) {
93 result += uc;
94 } else {
95 result += (char)(0xc2 + (uc > 0xbf));
96 result += (char)((uc & 0x3f) + 0x80);
97 }
98 }
99 return result;
100}
101
102
103std::string
105 str = replace(str, "\xE4", "ae");
106 str = replace(str, "\xC4", "Ae");
107 str = replace(str, "\xF6", "oe");
108 str = replace(str, "\xD6", "Oe");
109 str = replace(str, "\xFC", "ue");
110 str = replace(str, "\xDC", "Ue");
111 str = replace(str, "\xDF", "ss");
112 str = replace(str, "\xC9", "E");
113 str = replace(str, "\xE9", "e");
114 str = replace(str, "\xC8", "E");
115 str = replace(str, "\xE8", "e");
116 return str;
117}
118
119
120std::string
121StringUtils::replace(std::string str, const std::string& what, const std::string& by) {
122 std::string::size_type idx = str.find(what);
123 const int what_len = (int)what.length();
124 if (what_len > 0) {
125 const int by_len = (int)by.length();
126 while (idx != std::string::npos) {
127 str = str.replace(idx, what_len, by);
128 idx = str.find(what, idx + by_len);
129 }
130 }
131 return str;
132}
133
134
135std::string
136StringUtils::substituteEnvironment(const std::string& str, const std::chrono::time_point<std::chrono::system_clock>* const timeRef) {
137 std::string s = str;
138 if (timeRef != nullptr) {
139 const std::string::size_type localTimeIndex = str.find("${LOCALTIME}");
140 const std::string::size_type utcIndex = str.find("${UTC}");
141 const bool isUTC = utcIndex != std::string::npos;
142 if (localTimeIndex != std::string::npos || isUTC) {
143 const time_t rawtime = std::chrono::system_clock::to_time_t(*timeRef);
144 char buffer [80];
145 struct tm* timeinfo = isUTC ? gmtime(&rawtime) : localtime(&rawtime);
146 strftime(buffer, 80, "%Y-%m-%d-%H-%M-%S.", timeinfo);
147 auto seconds = std::chrono::time_point_cast<std::chrono::seconds>(*timeRef);
148 auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(*timeRef - seconds);
149 const std::string micro = buffer + toString(microseconds.count());
150 if (isUTC) {
151 s.replace(utcIndex, 6, micro);
152 } else {
153 s.replace(localTimeIndex, 12, micro);
154 }
155 }
156 }
157 const std::string::size_type pidIndex = str.find("${PID}");
158 if (pidIndex != std::string::npos) {
159#ifdef WIN32
160 s.replace(pidIndex, 6, toString(::GetCurrentProcessId()));
161#else
162 s.replace(pidIndex, 6, toString(::getpid()));
163#endif
164 }
165 if (std::getenv("SUMO_LOGO") == nullptr) {
166 s = replace(s, "${SUMO_LOGO}", "${SUMO_HOME}/data/logo/sumo-128x138.png");
167 }
168 const std::string::size_type tildeIndex = str.find("~");
169 if (tildeIndex == 0) {
170 s.replace(0, 1, "${HOME}");
171 }
172 s = replace(s, ",~", ",${HOME}");
173#ifdef WIN32
174 if (std::getenv("HOME") == nullptr) {
175 s = replace(s, "${HOME}", "${USERPROFILE}");
176 }
177#endif
178
179 // Expression for an environment variables, e.g. ${NAME}
180 // Note: - R"(...)" is a raw string literal syntax to simplify a regex declaration
181 // - .+? looks for the shortest match (non-greedy)
182 // - (.+?) defines a "subgroup" which is already stripped of the $ and {, }
183 std::regex envVarExpr(R"(\$\{(.+?)\})");
184
185 // Are there any variables in this string?
186 std::smatch match;
187 std::string strIter = s;
188
189 // Loop over the entire value string and look for variable names
190 while (std::regex_search(strIter, match, envVarExpr)) {
191 std::string varName = match[1];
192
193 // Find the variable in the environment and its value
194 std::string varValue;
195 if (std::getenv(varName.c_str()) != nullptr) {
196 varValue = std::getenv(varName.c_str());
197 }
198
199 // Replace the variable placeholder with its value in the original string
200 s = std::regex_replace(s, std::regex("\\$\\{" + varName + "\\}"), varValue);
201
202 // Continue the loop with the remainder of the string
203 strIter = match.suffix();
204 }
205 return s;
206}
207
208
209bool
210StringUtils::startsWith(const std::string& str, const std::string prefix) {
211 return str.compare(0, prefix.length(), prefix) == 0;
212}
213
214
215bool
216StringUtils::endsWith(const std::string& str, const std::string suffix) {
217 if (str.length() >= suffix.length()) {
218 return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
219 } else {
220 return false;
221 }
222}
223
224
225std::string
226StringUtils::padFront(const std::string& str, int length, char padding) {
227 return std::string(MAX2(0, length - (int)str.size()), padding) + str;
228}
229
230
231std::string
232StringUtils::escapeXML(const std::string& orig, const bool maskDoubleHyphen) {
233 std::string result = replace(orig, "&", "&amp;");
234 result = replace(result, ">", "&gt;");
235 result = replace(result, "<", "&lt;");
236 result = replace(result, "\"", "&quot;");
237 if (maskDoubleHyphen) {
238 result = replace(result, "--", "&#45;&#45;");
239 }
240 for (char invalid = '\1'; invalid < ' '; invalid++) {
241 result = replace(result, std::string(1, invalid).c_str(), "");
242 }
243 return replace(result, "'", "&apos;");
244}
245
246
247std::string
248StringUtils::urlEncode(const std::string& toEncode, const std::string encodeWhich) {
249 std::ostringstream out;
250
251 for (int i = 0; i < (int)toEncode.length(); ++i) {
252 const char t = toEncode.at(i);
253
254 if ((encodeWhich != "" && encodeWhich.find(t) == std::string::npos) ||
255 (encodeWhich == "" &&
256 ((t >= 45 && t <= 57) || // hyphen, period, slash, 0-9
257 (t >= 65 && t <= 90) || // A-Z
258 t == 95 || // underscore
259 (t >= 97 && t <= 122) || // a-z
260 t == 126)) // tilde
261 ) {
262 out << toEncode.at(i);
263 } else {
264 out << charToHex(toEncode.at(i));
265 }
266 }
267
268 return out.str();
269}
270
271
272std::string
273StringUtils::urlDecode(const std::string& toDecode) {
274 std::ostringstream out;
275
276 for (int i = 0; i < (int)toDecode.length(); ++i) {
277 if (toDecode.at(i) == '%') {
278 std::string str(toDecode.substr(i + 1, 2));
279 out << hexToChar(str);
280 i += 2;
281 } else {
282 out << toDecode.at(i);
283 }
284 }
285
286 return out.str();
287}
288
289std::string
290StringUtils::charToHex(unsigned char c) {
291 short i = c;
292
293 std::stringstream s;
294
295 s << "%" << std::setw(2) << std::setfill('0') << std::hex << i;
296
297 return s.str();
298}
299
300
301unsigned char
302StringUtils::hexToChar(const std::string& str) {
303 short c = 0;
304 if (!str.empty()) {
305 std::istringstream in(str);
306 in >> std::hex >> c;
307 if (in.fail()) {
308 throw NumberFormatException(str + " could not be interpreted as hex");
309 }
310 }
311 return static_cast<unsigned char>(c);
312}
313
314
315int
316StringUtils::toInt(const std::string& sData) {
317 long long int result = toLong(sData);
318 if (result > std::numeric_limits<int>::max() || result < std::numeric_limits<int>::min()) {
319 throw NumberFormatException(toString(result) + " int overflow");
320 }
321 return (int)result;
322}
323
324
325int
326StringUtils::toIntSecure(const std::string& sData, int def) {
327 if (sData.length() == 0) {
328 return def;
329 }
330 return toInt(sData);
331}
332
333
334long long int
335StringUtils::toLong(const std::string& sData) {
336 const char* const data = sData.c_str();
337 if (data == 0 || data[0] == 0) {
338 throw EmptyData();
339 }
340 char* end;
341 errno = 0;
342#ifdef WIN32
343 long long int ret = _strtoi64(data, &end, 10);
344#else
345 long long int ret = strtoll(data, &end, 10);
346#endif
347 if (errno == ERANGE) {
348 errno = 0;
349 throw NumberFormatException("(long long integer range) " + sData);
350 }
351 if ((int)(end - data) != (int)strlen(data)) {
352 throw NumberFormatException("(long long integer format) " + sData);
353 }
354 return ret;
355}
356
357
358int
359StringUtils::hexToInt(const std::string& sData) {
360 if (sData.length() == 0) {
361 throw EmptyData();
362 }
363 size_t idx = 0;
364 int result;
365 try {
366 if (sData[0] == '#') { // for html color codes
367 result = std::stoi(sData.substr(1), &idx, 16);
368 idx++;
369 } else {
370 result = std::stoi(sData, &idx, 16);
371 }
372 } catch (...) {
373 throw NumberFormatException("(hex integer format) " + sData);
374 }
375 if (idx != sData.length()) {
376 throw NumberFormatException("(hex integer format) " + sData);
377 }
378 return result;
379}
380
381
382double
383StringUtils::toDouble(const std::string& sData) {
384 if (sData.size() == 0) {
385 throw EmptyData();
386 }
387 try {
388 size_t idx = 0;
389 const double result = std::stod(sData, &idx);
390 if (idx != sData.size()) {
391 throw NumberFormatException("(double format) " + sData);
392 } else {
393 return result;
394 }
395 } catch (...) {
396 // invalid_argument or out_of_range
397 throw NumberFormatException("(double) " + sData);
398 }
399}
400
401
402double
403StringUtils::toDoubleSecure(const std::string& sData, const double def) {
404 if (sData.length() == 0) {
405 return def;
406 }
407 return toDouble(sData);
408}
409
410
411bool
412StringUtils::toBool(const std::string& sData) {
413 if (sData.length() == 0) {
414 throw EmptyData();
415 }
416 const std::string s = to_lower_case(sData);
417 if (s == "1" || s == "yes" || s == "true" || s == "on" || s == "x" || s == "t") {
418 return true;
419 }
420 if (s == "0" || s == "no" || s == "false" || s == "off" || s == "-" || s == "f") {
421 return false;
422 }
423 throw BoolFormatException(s);
424}
425
427StringUtils::toVersion(const std::string& sData) {
428 std::vector<std::string> parts = StringTokenizer(sData, ".").getVector();
429 return MMVersion(toInt(parts.front()), toDouble(parts.back()));
430}
431
432std::string
433StringUtils::transcode(const XMLCh* const data, int length) {
434 if (data == 0) {
435 throw EmptyData();
436 }
437 if (length == 0) {
438 return "";
439 }
440#if _XERCES_VERSION < 30100
441 char* t = XERCES_CPP_NAMESPACE::XMLString::transcode(data);
442 std::string result(t);
443 XERCES_CPP_NAMESPACE::XMLString::release(&t);
444 return result;
445#else
446 try {
447 XERCES_CPP_NAMESPACE::TranscodeToStr utf8(data, "UTF-8");
448 return reinterpret_cast<const char*>(utf8.str());
449 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {
450 return "?";
451 }
452#endif
453}
454
455
456std::string
457StringUtils::transcodeFromLocal(const std::string& localString) {
458#if _XERCES_VERSION > 30100
459 try {
460 if (myLCPTranscoder == nullptr) {
461 myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
462 }
463 if (myLCPTranscoder != nullptr) {
464 return transcode(myLCPTranscoder->transcode(localString.c_str()));
465 }
466 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
467#endif
468 return localString;
469}
470
471
472std::string
473StringUtils::transcodeToLocal(const std::string& utf8String) {
474#if _XERCES_VERSION > 30100
475 try {
476 if (myLCPTranscoder == nullptr) {
477 myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
478 }
479 if (myLCPTranscoder != nullptr) {
480 XERCES_CPP_NAMESPACE::TranscodeFromStr utf8(reinterpret_cast<const XMLByte*>(utf8String.c_str()), utf8String.size(), "UTF-8");
481 return myLCPTranscoder->transcode(utf8.str());
482 }
483 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
484#endif
485 return utf8String;
486}
487
488
489std::string
490StringUtils::trim_left(const std::string s, const std::string& t) {
491 std::string result = s;
492 result.erase(0, s.find_first_not_of(t));
493 return result;
494}
495
496std::string
497StringUtils::trim_right(const std::string s, const std::string& t) {
498 std::string result = s;
499 result.erase(s.find_last_not_of(t) + 1);
500 return result;
501}
502
503std::string
504StringUtils::trim(const std::string s, const std::string& t) {
505 return trim_right(trim_left(s, t), t);
506}
507
508void
512
513/****************************************************************************/
std::pair< int, double > MMVersion
(M)ajor/(M)inor version for written networks and default version for loading
Definition StdDefs.h:67
T MAX2(T a, T b)
Definition StdDefs.h:82
std::string toString(const T &t, std::streamsize accuracy=gPrecision)
Definition ToString.h:46
std::vector< std::string > getVector()
return vector of strings
static std::string pruneZeros(const std::string &str, int max)
Removes trailing zeros (at most 'max')
static std::string urlEncode(const std::string &url, const std::string encodeWhich="")
encode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static MMVersion toVersion(const std::string &sData)
to version
static std::string charToHex(unsigned char c)
char to hexadecimal
static std::string urlDecode(const std::string &encoded)
decode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static long long int toLong(const std::string &sData)
converts a string into the long value described by it by calling the char-type converter,...
static double toDoubleSecure(const std::string &sData, const double def)
converts a string into the integer value described by it
static std::string trim(const std::string s, const std::string &t=" \t\n")
remove leading and trailing whitespace
static std::string to_lower_case(const std::string &str)
Transfers the content to lower case.
static void resetTranscoder()
must be called when shutting down the xml subsystem
static XERCES_CPP_NAMESPACE::XMLLCPTranscoder * myLCPTranscoder
static std::string trim_right(const std::string s, const std::string &t=" \t\n")
remove trailing whitespace from string
static std::string trim_left(const std::string s, const std::string &t=" \t\n")
remove leading whitespace from string
static std::string replace(std::string str, const std::string &what, const std::string &by)
Replaces all occurrences of the second string by the third string within the first string.
static int hexToInt(const std::string &sData)
converts a string with a hex value into the integer value described by it by calling the char-type co...
static double toDouble(const std::string &sData)
converts a string into the double value described by it by calling the char-type converter
static std::string escapeXML(const std::string &orig, const bool maskDoubleHyphen=false)
Replaces the standard escapes by their XML entities.
static std::string latin1_to_utf8(std::string str)
Transfers from Latin 1 (ISO-8859-1) to UTF-8.
static std::string prune(const std::string &str)
Removes trailing and leading whitechars.
static std::string padFront(const std::string &str, int length, char padding)
static std::string convertUmlaute(std::string str)
Converts german "Umlaute" to their latin-version.
static unsigned char hexToChar(const std::string &str)
hexadecimal to char
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
static std::string emptyString
An empty string.
Definition StringUtils.h:86
static bool endsWith(const std::string &str, const std::string suffix)
Checks whether a given string ends with the suffix.
static std::string substituteEnvironment(const std::string &str, const std::chrono::time_point< std::chrono::system_clock > *const timeRef=nullptr)
Replaces an environment variable with its value (similar to bash); syntax for a variable is ${NAME}.
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
static std::string transcodeToLocal(const std::string &utf8String)
convert a string from UTF-8 to the local codepage
static int toIntSecure(const std::string &sData, int def)
converts a string into the integer value described by it
static std::string transcodeFromLocal(const std::string &localString)
convert a string from the local codepage to UTF-8
static int toInt(const std::string &sData)
converts a string into the integer value described by it by calling the char-type converter,...
static bool toBool(const std::string &sData)
converts a string into the bool value described by it by calling the char-type converter