00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 #include <ctype.h>
00060
00061 #include <iomanip>
00062 #include <string>
00063 #include <sstream>
00064
00065 #include "GNURegex.h"
00066 #include "Error.h"
00067 #include "InternalErr.h"
00068
00069 #include "debug.h"
00070
00071 using namespace std;
00072
00073 namespace libdap {
00074
00075
00076
00077
00078
00079 string
00080 hexstring(unsigned char val)
00081 {
00082 ostringstream buf;
00083 buf << hex << setw(2) << setfill('0')
00084 << static_cast<unsigned int>(val);
00085
00086 return buf.str();
00087 }
00088
00089 string
00090 unhexstring(string s)
00091 {
00092 int val;
00093 istringstream ss(s);
00094 ss >> hex >> val;
00095 char tmp_str[2];
00096 tmp_str[0] = static_cast<char>(val);
00097 tmp_str[1] = '\0';
00098 return string(tmp_str);
00099 }
00100
00101 string
00102 octstring(unsigned char val)
00103 {
00104 ostringstream buf;
00105 buf << oct << setw(3) << setfill('0')
00106 << static_cast<unsigned int>(val);
00107
00108 return buf.str();
00109 }
00110
00111 string
00112 unoctstring(string s)
00113 {
00114 int val;
00115
00116 istringstream ss(s);
00117 ss >> oct >> val;
00118
00119 DBG(cerr << "unoctstring: " << val << endl);
00120
00121 char tmp_str[2];
00122 tmp_str[0] = static_cast<char>(val);
00123 tmp_str[1] = '\0';
00124 return string(tmp_str);
00125 }
00126
00151 string
00152 id2www(string in, const string &allowable)
00153 {
00154 string::size_type i = 0;
00155
00156 while ((i = in.find_first_not_of(allowable, i)) != string::npos) {
00157 in.replace(i, 1, "%" + hexstring(in[i]));
00158 i++;
00159 }
00160
00161 return in;
00162 }
00163
00174 string
00175 id2www_ce(string in, const string &allowable)
00176 {
00177 return id2www(in, allowable);
00178 }
00179
00212 string
00213 www2id(const string &in, const string &escape, const string &except)
00214 {
00215 string::size_type i = 0;
00216 string res = in;
00217 while ((i = res.find_first_of(escape, i)) != string::npos) {
00218 if (except.find(res.substr(i, 3)) != string::npos) {
00219 i += 3;
00220 continue;
00221 }
00222 res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
00223 }
00224
00225 return res;
00226 }
00227
00228 static string
00229 entity(char c)
00230 {
00231 switch (c) {
00232 case '>': return ">";
00233 case '<': return "<";
00234 case '&': return "&";
00235 case '\'': return "'";
00236 case '\"': return """;
00237 default:
00238 throw InternalErr(__FILE__, __LINE__, "Unrecognized character.");
00239 }
00240 }
00241
00242
00243
00244 string
00245 octal_to_hex(const string &octal_digits)
00246 {
00247 int val;
00248
00249 istringstream ss(octal_digits);
00250 ss >> oct >> val;
00251
00252 ostringstream ds;
00253 ds << hex << setw(2) << setfill('0') << val;
00254 return ds.str();
00255 }
00256
00263 string
00264 id2xml(string in, const string ¬_allowed)
00265 {
00266 string::size_type i = 0;
00267
00268 while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
00269 in.replace(i, 1, entity(in[i]));
00270 ++i;
00271 }
00272
00273
00274
00275
00276
00277
00278 string octal_escape = "\\\\";
00279 i = 0;
00280 string::size_type length = in.length();
00281 while ((i = in.find(octal_escape, i)) != string::npos) {
00282
00283 string::size_type j = i + 2;
00284 if (j + 1 >= length)
00285 break;
00286 string octal_digits = in.substr(j, 3);
00287
00288 string hex_escape = string("&#x");
00289 hex_escape.append(octal_to_hex(octal_digits));
00290 hex_escape.append(string(";"));
00291
00292
00293 in.replace(i, 5, hex_escape);
00294
00295
00296 i += 6;
00297 }
00298
00299 return in;
00300 }
00301
00307 string
00308 xml2id(string in)
00309 {
00310 string::size_type i = 0;
00311
00312 while ((i = in.find(">", i)) != string::npos)
00313 in.replace(i, 4, ">");
00314
00315 i = 0;
00316 while ((i = in.find("<", i)) != string::npos)
00317 in.replace(i, 4, "<");
00318
00319 i = 0;
00320 while ((i = in.find("&", i)) != string::npos)
00321 in.replace(i, 5, "&");
00322
00323 i = 0;
00324 while ((i = in.find("'", i)) != string::npos)
00325 in.replace(i, 6, "'");
00326
00327 i = 0;
00328 while ((i = in.find(""", i)) != string::npos)
00329 in.replace(i, 6, "\"");
00330
00331 return in;
00332 }
00333
00339 string
00340 esc2underscore(string s)
00341 {
00342 string::size_type pos;
00343 while ((pos = s.find('%')) != string::npos)
00344 s.replace(pos, 3, "_");
00345
00346 return s;
00347 }
00348
00349
00353 string
00354 escattr(string s)
00355 {
00356 const string printable = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~`!@#$%^&*()_-+={[}]|\\:;<,>.?/'\"";
00357 const string ESC = "\\";
00358 const string DOUBLE_ESC = ESC + ESC;
00359 const string QUOTE = "\"";
00360 const string ESCQUOTE = ESC + QUOTE;
00361
00362
00363 string::size_type ind = 0;
00364 while ((ind = s.find_first_not_of(printable, ind)) != s.npos)
00365 s.replace(ind, 1, ESC + octstring(s[ind]));
00366
00367
00368 ind = 0;
00369 while ((ind = s.find(ESC, ind)) != s.npos) {
00370 s.replace(ind, 1, DOUBLE_ESC);
00371 ind += DOUBLE_ESC.length();
00372 }
00373
00374
00375 ind = 0;
00376 while ((ind = s.find(QUOTE, ind)) != s.npos) {
00377 s.replace(ind, 1, ESCQUOTE);
00378 ind += ESCQUOTE.length();
00379 }
00380
00381 return s;
00382 }
00383
00392 string
00393 unescattr(string s)
00394 {
00395 Regex octal("\\\\[0-3][0-7][0-7]");
00396 Regex esc_quote("\\\\\"");
00397 Regex esc_esc("\\\\\\\\");
00398 const string ESC = "\\";
00399 const string QUOTE = "\"";
00400 int matchlen;
00401 unsigned int index;
00402
00403 DBG(cerr << "0XX" << s << "XXX" << endl);
00404
00405 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00406 while (index < s.length()) {
00407 DBG(cerr << "1aXX" << s << "XXX index: " << index << endl);
00408 s.replace(index, 2, ESC);
00409 DBG(cerr << "1bXX" << s << "XXX index: " << index << endl);
00410 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00411 }
00412
00413
00414 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00415 while (index < s.length()) {
00416 s.replace(index, 2, QUOTE);
00417 DBG(cerr << "2XX" << s << "XXX index: " << index << endl);
00418 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00419 }
00420
00421
00422 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00423 while (index < s.length()) {
00424 s.replace(index, 4, unoctstring(s.substr(index + 1, 3)));
00425 DBG(cerr << "3XX" << s << "XXX index: " << index << endl);
00426 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00427 }
00428
00429 DBG(cerr << "4XX" << s << "XXX" << endl);
00430 return s;
00431 }
00432
00433 string
00434 munge_error_message(string msg)
00435 {
00436
00437 if (*msg.begin() != '"')
00438 msg.insert(msg.begin(), '"');
00439 if (*(msg.end() - 1) != '"')
00440 msg += "\"";
00441
00442
00443 string::iterator miter;
00444 for (miter = msg.begin() + 1; miter != msg.end() - 1; miter++)
00445 if (*miter == '"' && *(miter - 1) != '\\')
00446 miter = msg.insert(miter, '\\');
00447
00448 return msg;
00449 }
00450
00455 string
00456 escape_double_quotes(string source)
00457 {
00458 string::size_type idx = 0;
00459 while((idx = source.find('\"', idx)) != string::npos) {
00460 source.replace(idx, 1, "\\\"");
00461 idx += 2;
00462 }
00463
00464 return source;
00465 }
00466
00472 string
00473 unescape_double_quotes(string source)
00474 {
00475 string::size_type idx = 0;
00476 while((idx = source.find("\\\"", idx)) != string::npos) {
00477 source.replace(idx, 2, "\"");
00478 ++idx;
00479 }
00480
00481 return source;
00482 }
00483
00484 }
00485