1 // this file implements the structures and lexer for the protocol buffer format 2 // required to parse a protocol buffer file or tree and generate 3 // code to read and write the specified format 4 module dprotobuf.pbgeneral; 5 version(D_Version2) { 6 import std.algorithm; 7 import std.range; 8 } else 9 import dprotobuf.d1support; 10 11 import std.stdio; 12 import std..string; 13 import std.uni; 14 import std.conv; 15 16 enum PBTypes { 17 PB_Package=1, 18 PB_Enum, 19 PB_Message, 20 PB_Option, 21 PB_Extension, 22 PB_Extend, 23 PB_Service, 24 PB_Import, 25 PB_Optional, 26 PB_Required, 27 PB_Repeated, 28 PB_Comment, 29 PB_MultiComment, 30 } 31 32 // character classes for parsing 33 enum CClass { 34 MultiIdentifier, 35 Identifier, 36 Numeric, 37 Comment, 38 Value, 39 } 40 41 bool validateMultiIdentifier(string ident) 42 in { 43 assert(ident.length); 44 } body { 45 string[] parts = split(ident,"."); 46 foreach(part;parts) { 47 if (!part.length) return false; 48 if (!validIdentifier(part)) return false; 49 } 50 return true; 51 } 52 53 class PBParseException:Exception { 54 string locus; 55 string error; 56 size_t lineNumber; 57 this(string location,string problem, size_t inputLine, string file = __FILE__, size_t line = __LINE__) { 58 version(D_Version2) { 59 super(location~": "~problem~" Line:"~to!(string)(inputLine), file, line); 60 } else { 61 super(location~": "~problem~" Line:"~to!(string)(inputLine)); 62 } 63 64 locus = location; 65 error = problem; 66 lineNumber = inputLine; 67 } 68 } 69 70 struct ParserData { 71 string input; 72 size_t line = 1; 73 74 version(D_Version2) { 75 mixin("alias input this;"); 76 } else { 77 size_t length() { 78 return input.length; 79 } 80 81 ParserData opSlice(size_t left, size_t right) { 82 return ParserData(input[left..right], line); 83 } 84 85 size_t opDollar() { 86 return input.length; 87 } 88 89 char opIndex(size_t i) { 90 return input[i]; 91 } 92 } 93 } 94 95 struct CommentManager { 96 string[] comments; 97 size_t line; 98 size_t lastElementLine; 99 PBTypes lastElementType; 100 101 version(D_Version2) { 102 mixin("alias comments this;"); 103 } else { 104 void opCatAssign(string v) { 105 comments ~= v; 106 } 107 } 108 } 109 110 PBTypes typeNextElement(in ParserData pbstring) 111 in { 112 assert(pbstring.length); 113 } body { 114 // we want to check for // type comments here, since there doesn't 115 // necessarily have to be a space after the opener 116 if (pbstring.length>1) { 117 if(pbstring.input[0..2] == "//") 118 return PBTypes.PB_Comment; 119 else if(pbstring.input[0..2] == "/*") 120 return PBTypes.PB_MultiComment; 121 } 122 int i=0; 123 version(D_Version2) 124 for(;i<pbstring.length && !isWhite(pbstring[i]);i++){} 125 else 126 for(;i<pbstring.length && !iswhite(pbstring[i]);i++){} 127 auto type = pbstring.input[0..i]; 128 switch(type) { 129 case "package": 130 return PBTypes.PB_Package; 131 case "enum": 132 return PBTypes.PB_Enum; 133 case "message": 134 return PBTypes.PB_Message; 135 case "repeated": 136 return PBTypes.PB_Repeated; 137 case "required": 138 return PBTypes.PB_Required; 139 case "optional": 140 return PBTypes.PB_Optional; 141 case "option": 142 return PBTypes.PB_Option; 143 case "import": 144 return PBTypes.PB_Import; 145 case "extensions": 146 return PBTypes.PB_Extension; 147 case "extend": 148 return PBTypes.PB_Extend; 149 case "service": 150 throw new PBParseException("Protocol Buffer Definition",capitalize(type)~" definitions are not currently supported.", pbstring.line); 151 default: 152 throw new PBParseException("Protocol Buffer Definition","Unknown element type "~type~".", pbstring.line); 153 } 154 } 155 156 // this will rip off the next token 157 string stripValidChars(CClass cc,ref ParserData pbstring) 158 in { 159 assert(pbstring.length); 160 } body { 161 if(pbstring[0] == '_' || pbstring.input[$-1] == '_') 162 throw new PBParseException("Next Token","Identifier cannot begin or end with underscore", pbstring.line); 163 int i=0; 164 for(;i<pbstring.length && isValidChar(cc,pbstring[i]);i++){} 165 string tmp = pbstring.input[0..i]; 166 pbstring = pbstring[i..pbstring.length]; 167 return tmp; 168 } 169 170 unittest { 171 auto str = ParserData("// Filly\n"); 172 assert(stripValidChars(CClass.Comment, str) == "// Filly"); 173 assert(str.input == "\n"); 174 } 175 176 177 // allowed characters vary by type 178 bool isValidChar(CClass cc,char pc) { 179 switch(cc) { 180 case CClass.Value: 181 if (pc == '-') return true; 182 goto case; 183 case CClass.MultiIdentifier: 184 if (pc == '.') return true; 185 goto case; 186 case CClass.Identifier: 187 if (pc >= 'a' && pc <= 'z') return true; 188 if (pc >= 'A' && pc <= 'Z') return true; 189 if (pc == '_') return true; 190 goto case; 191 case CClass.Numeric: 192 if (pc >= '0' && pc <= '9') return true; 193 return false; 194 case CClass.Comment: 195 if (pc == '\n') return false; 196 if (pc == '\r') return false; 197 if (pc == '\f') return false; 198 return true; 199 default: 200 break; 201 } 202 203 assert(false, "Missing switch case"); 204 } 205 206 bool validIdentifier(string ident) 207 in { 208 assert(ident.length); 209 } body { 210 if (ident[0] >= '0' && ident[0] <= '9') return false; 211 return true; 212 } 213 214 ParserData stripLWhite(ParserData s) { 215 size_t i; 216 217 for (i = 0; i < s.length; i++) 218 { 219 version(D_Version2) { 220 if (!isWhite(s[i])) 221 break; 222 } else { 223 if (!iswhite(s[i])) 224 break; 225 } 226 if (s[i] == '\n') 227 s.line++; 228 if (s[i] == '\r') { 229 s.line++; 230 if(s.length < i+1 && s[i+1] == '\n') 231 i++; 232 } 233 } 234 s.input = s.input[i .. $]; 235 return s; 236 } 237 unittest { 238 assert("asdf " == stripLWhite(ParserData(" \n\tasdf ")).input); 239 assert(stripLWhite(ParserData(" \n\tasdf ")).line == 2); 240 assert("asdf" == stripLWhite(ParserData(" \tasdf")).input); 241 } 242 243 unittest { 244 writefln("unittest ProtocolBuffer.pbgeneral"); 245 debug writefln("Checking validIdentifier..."); 246 assert(validIdentifier("asdf")); 247 assert(!validIdentifier("8asdf")); 248 // also takes care of isValidChar 249 debug writefln("Checking stripValidChars..."); 250 auto tmp = ParserData("asdf1 yarrr"); 251 assert(stripValidChars(CClass.Identifier,tmp) == "asdf1"); 252 assert(tmp.input == " yarrr"); 253 tmp = ParserData("as2f.ya7rr -adfbads25737"); 254 assert(stripValidChars(CClass.MultiIdentifier,tmp) == "as2f.ya7rr"); 255 assert(tmp.input == " -adfbads25737"); 256 debug writefln(""); 257 } 258 259 struct PBOption { 260 string name; 261 string subident; 262 string value; 263 bool extension = false; 264 } 265 266 // TODO: actually do something with options 267 PBOption ripOption(ref ParserData pbstring,string terms = ";") { 268 // we need to pull apart the option and stuff it in a struct 269 PBOption pbopt; 270 if (pbstring[0] == '(') { 271 stripLWhite(pbstring); 272 pbopt.extension = true; 273 pbstring = pbstring[1..pbstring.length]; 274 } 275 pbstring = stripLWhite(pbstring); 276 pbopt.name = stripValidChars(CClass.MultiIdentifier,pbstring); 277 if (!pbopt.name.length) throw new PBParseException("Option Parse","Malformed option: Option name not found.", pbstring.line); 278 if (pbopt.extension) { 279 pbstring = stripLWhite(pbstring); 280 // rip off trailing ) 281 pbstring = pbstring[1..pbstring.length]; 282 // check for more portions of the identifier 283 if (pbstring[0] == '.') { 284 // rip off the leading . 285 pbstring = pbstring[1..pbstring.length]; 286 // rip the continuation of the identifier 287 pbopt.name = stripValidChars(CClass.MultiIdentifier,pbstring); 288 } 289 } 290 pbstring = stripLWhite(pbstring); 291 // expect next char must be = 292 if (!pbstring.input.skipOver("=")) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Missing = after option name.", pbstring.line); 293 pbstring = stripLWhite(pbstring); 294 // the remaining text between here and the terminator is our value 295 if (pbstring[0] == '"') { 296 pbopt.value = ripQuotedValue(pbstring); 297 pbstring = stripLWhite(pbstring); 298 version(D_Version2) { 299 if (terms.find(pbstring[0]).empty) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Bad terminator("~pbstring[0]~")", pbstring.line); 300 } else 301 if (terms.find(pbstring[0]) == -1) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Bad terminator("~pbstring[0]~")", pbstring.line); 302 // leave the terminator in the string in case the caller wants to look at it 303 return pbopt; 304 } 305 // take care of non-quoted values 306 pbopt.value = stripValidChars(CClass.Value,pbstring); 307 pbstring = stripLWhite(pbstring); 308 version(D_Version2) { 309 if (terms.find(pbstring[0]).empty) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Bad terminator("~pbstring[0]~")", pbstring.line); 310 } else 311 if (terms.find(pbstring[0]) == -1) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Bad terminator("~pbstring[0]~")", pbstring.line); 312 return pbopt; 313 } 314 315 unittest { 316 auto str = ParserData("java_package = \"Fish\";"); 317 auto pbopt = ripOption(str); 318 assert(pbopt.name == "java_package"); 319 assert(pbopt.value == "\"Fish\""); 320 321 str = ParserData("java_multiple_files = true;"); 322 pbopt = ripOption(str); 323 assert(pbopt.name == "java_multiple_files"); 324 assert(pbopt.value == "true"); 325 326 str = ParserData("java_generate_equals_and_hash = true;"); 327 pbopt = ripOption(str); 328 assert(pbopt.name == "java_generate_equals_and_hash"); 329 assert(pbopt.value == "true"); 330 331 str = ParserData("optimize_for = LITE_RUNTIME;"); 332 pbopt = ripOption(str); 333 assert(pbopt.name == "optimize_for"); 334 assert(pbopt.value == "LITE_RUNTIME"); 335 336 str = ParserData("default = -1];"); 337 pbopt = ripOption(str, "]"); 338 assert(pbopt.name == "default"); 339 assert(pbopt.value == "-1"); 340 assert(str.input[0] == ']'); // option blocks may term with comma 341 } 342 343 string ripQuotedValue(ref ParserData pbstring) { 344 int x; 345 for(x = 1;pbstring[x] != '"' && x < pbstring.length;x++) { 346 if(pbstring[x] == '\n') 347 pbstring.line++; 348 if(pbstring[x] == '\r') { 349 pbstring.line++; 350 if(pbstring.length < x+1 && pbstring[x+1] == '\n') 351 x++; 352 } 353 } 354 // inc to take the quotes with us 355 x++; 356 string tmp = pbstring.input[0..x]; 357 pbstring = pbstring[x..pbstring.length]; 358 return tmp; 359 } 360 361 // this rips line-specific options from the string 362 PBOption[]ripOptions(ref ParserData pbstring) { 363 PBOption[]ret; 364 while(pbstring.length && pbstring[0] != ']') { 365 // this will rip off the leading [ and intermediary ','s 366 pbstring = pbstring[1..pbstring.length]; 367 ret ~= ripOption(pbstring,",]"); 368 if(__ctfe) {} else 369 debug writefln("Pulled option %s with value %s",ret[$-1].name,ret[$-1].value); 370 } 371 // rip off the trailing ] 372 pbstring.input.skipOver("]"); 373 return ret; 374 } 375 376 string[] ripComment(ref ParserData pbstring) { 377 string[] ret; 378 int i = 0; 379 do { 380 i++; 381 for(;i<pbstring.length && pbstring[i] != '/';i++){} 382 if(i == pbstring.length) 383 break; 384 } while(pbstring[i-1] != '*'); 385 i++; 386 auto tmp = pbstring.input[0..i]; 387 pbstring = pbstring[i..pbstring.length]; 388 version(D_Version2) 389 ret = tmp.splitLines(); 390 else 391 ret = tmp.splitlines(); 392 pbstring.line += ret.length - 1; 393 return ret; 394 }