1 // this file implements the structures and lexer for the protocol buffer format
2 // required to parse a protocol buffer file or tree and generate
3 // code to read and write the specified format
4 module dprotobuf.pbgeneral;
5 version(D_Version2) {
6 	import std.algorithm;
7 	import std.range;
8 } else
9 	import dprotobuf.d1support;
10 
11 import std.stdio;
12 import std..string;
13 import std.uni;
14 import std.conv;
15 
16 enum PBTypes {
17 	PB_Package=1,
18 	PB_Enum,
19 	PB_Message,
20 	PB_Option,
21 	PB_Extension,
22 	PB_Extend,
23 	PB_Service,
24 	PB_Import,
25 	PB_Optional,
26 	PB_Required,
27 	PB_Repeated,
28 	PB_Comment,
29 	PB_MultiComment,
30 }
31 
32 // character classes for parsing
33 enum CClass {
34 	MultiIdentifier,
35 	Identifier,
36 	Numeric,
37 	Comment,
38 	Value,
39 }
40 
41 bool validateMultiIdentifier(string ident)
42 in {
43 	assert(ident.length);
44 } body {
45 	string[] parts = split(ident,".");
46 	foreach(part;parts) {
47 		if (!part.length) return false;
48 		if (!validIdentifier(part)) return false;
49 	}
50 	return true;
51 }
52 
53 class PBParseException:Exception {
54 	string locus;
55 	string error;
56 	size_t lineNumber;
57 	this(string location,string problem, size_t inputLine, string file = __FILE__, size_t line = __LINE__) {
58 		version(D_Version2) {
59 			super(location~": "~problem~" Line:"~to!(string)(inputLine), file, line);
60 		} else {
61 			super(location~": "~problem~" Line:"~to!(string)(inputLine));
62 		}
63 
64 		locus = location;
65 		error = problem;
66 		lineNumber = inputLine;
67 	}
68 }
69 
70 struct ParserData {
71 	string input;
72 	size_t line = 1;
73 
74 	version(D_Version2) {
75 		mixin("alias input this;");
76 	} else {
77 		size_t length() {
78 			return input.length;
79 		}
80 
81 		ParserData opSlice(size_t left, size_t right) {
82 			return ParserData(input[left..right], line);
83 		}
84 
85 		size_t opDollar() {
86 			return input.length;
87 		}
88 
89 		char opIndex(size_t i) {
90 			return input[i];
91 		}
92 	}
93 }
94 
95 struct CommentManager {
96 	string[] comments;
97 	size_t line;
98 	size_t lastElementLine;
99 	PBTypes lastElementType;
100 
101 	version(D_Version2) {
102 		mixin("alias comments this;");
103 	} else {
104 		void opCatAssign(string v) {
105 			comments ~= v;
106 		}
107 	}
108 }
109 
110 PBTypes typeNextElement(in ParserData pbstring)
111 in {
112 	assert(pbstring.length);
113 } body {
114 	// we want to check for // type comments here, since there doesn't
115 	// necessarily have to be a space after the opener
116 	if (pbstring.length>1) {
117 		if(pbstring.input[0..2] == "//")
118 			return PBTypes.PB_Comment;
119 		else if(pbstring.input[0..2] == "/*")
120 			return PBTypes.PB_MultiComment;
121     }
122 	int i=0;
123 	version(D_Version2)
124 		for(;i<pbstring.length && !isWhite(pbstring[i]);i++){}
125 	else
126 		for(;i<pbstring.length && !iswhite(pbstring[i]);i++){}
127 	auto type = pbstring.input[0..i];
128 	switch(type) {
129 	case "package":
130 		return PBTypes.PB_Package;
131 	case "enum":
132 		return PBTypes.PB_Enum;
133 	case "message":
134 		return PBTypes.PB_Message;
135 	case "repeated":
136 		return PBTypes.PB_Repeated;
137 	case "required":
138 		return PBTypes.PB_Required;
139 	case "optional":
140 		return PBTypes.PB_Optional;
141 	case "option":
142 		return PBTypes.PB_Option;
143 	case "import":
144 		return PBTypes.PB_Import;
145 	case "extensions":
146 		return PBTypes.PB_Extension;
147 	case "extend":
148 		return PBTypes.PB_Extend;
149 	case "service":
150 		throw new PBParseException("Protocol Buffer Definition",capitalize(type)~" definitions are not currently supported.", pbstring.line);
151 	default:
152 		throw new PBParseException("Protocol Buffer Definition","Unknown element type "~type~".", pbstring.line);
153 	}
154 }
155 
156 // this will rip off the next token
157 string stripValidChars(CClass cc,ref ParserData pbstring)
158 in {
159 	assert(pbstring.length);
160 } body {
161 	if(pbstring[0] == '_' || pbstring.input[$-1] == '_')
162 		throw new PBParseException("Next Token","Identifier cannot begin or end with underscore", pbstring.line);
163 	int i=0;
164 	for(;i<pbstring.length && isValidChar(cc,pbstring[i]);i++){}
165 	string tmp = pbstring.input[0..i];
166 	pbstring = pbstring[i..pbstring.length];
167 	return tmp;
168 }
169 
170 unittest {
171 	auto str = ParserData("// Filly\n");
172 	assert(stripValidChars(CClass.Comment, str) == "// Filly");
173 	assert(str.input == "\n");
174 }
175 
176 
177 // allowed characters vary by type
178 bool isValidChar(CClass cc,char pc) {
179 	switch(cc) {
180 	case CClass.Value:
181 		if (pc == '-') return true;
182 		goto case;
183 	case CClass.MultiIdentifier:
184 		if (pc == '.') return true;
185 		goto case;
186 	case CClass.Identifier:
187 		if (pc >= 'a' && pc <= 'z') return true;
188 		if (pc >= 'A' && pc <= 'Z') return true;
189 		if (pc == '_') return true;
190 		goto case;
191 	case CClass.Numeric:
192 		if (pc >= '0' && pc <= '9') return true;
193 		return false;
194 	case CClass.Comment:
195 		if (pc == '\n') return false;
196 		if (pc == '\r') return false;
197 		if (pc == '\f') return false;
198 		return true;
199 	default:
200 		break;
201 	}
202 
203 	assert(false, "Missing switch case");
204 }
205 
206 bool validIdentifier(string ident)
207 in {
208 	assert(ident.length);
209 } body {
210 	if (ident[0] >= '0' && ident[0] <= '9') return false;
211 	return true;
212 }
213 
214 ParserData stripLWhite(ParserData s) {
215 	size_t i;
216 
217 	for (i = 0; i < s.length; i++)
218 	{
219 		version(D_Version2) {
220 			if (!isWhite(s[i]))
221 				break;
222 		} else {
223 			if (!iswhite(s[i]))
224 				break;
225 		}
226 		if (s[i] == '\n')
227 			s.line++;
228 		if (s[i] == '\r') {
229 			s.line++;
230 			if(s.length < i+1 && s[i+1] == '\n')
231 				i++;
232 		}
233 	}
234 	s.input = s.input[i .. $];
235 	return s;
236 }
237 unittest {
238 	assert("asdf " == stripLWhite(ParserData("  \n\tasdf ")).input);
239 	assert(stripLWhite(ParserData("  \n\tasdf ")).line == 2);
240 	assert("asdf" == stripLWhite(ParserData("  \tasdf")).input);
241 }
242 
243 unittest {
244 	writefln("unittest ProtocolBuffer.pbgeneral");
245 	debug writefln("Checking validIdentifier...");
246 	assert(validIdentifier("asdf"));
247 	assert(!validIdentifier("8asdf"));
248 	// also takes care of isValidChar
249 	debug writefln("Checking stripValidChars...");
250 	auto tmp = ParserData("asdf1 yarrr");
251 	assert(stripValidChars(CClass.Identifier,tmp) == "asdf1");
252 	assert(tmp.input == " yarrr");
253 	tmp = ParserData("as2f.ya7rr -adfbads25737");
254 	assert(stripValidChars(CClass.MultiIdentifier,tmp) == "as2f.ya7rr");
255 	assert(tmp.input == " -adfbads25737");
256 	debug writefln("");
257 }
258 
259 struct PBOption {
260 	string name;
261 	string subident;
262 	string value;
263 	bool extension = false;
264 }
265 
266 // TODO: actually do something with options
267 PBOption ripOption(ref ParserData pbstring,string terms = ";") {
268 	// we need to pull apart the option and stuff it in a struct
269 	PBOption pbopt;
270 	if (pbstring[0] == '(') {
271 		stripLWhite(pbstring);
272 		pbopt.extension = true;
273 		pbstring = pbstring[1..pbstring.length];
274 	}
275 	pbstring = stripLWhite(pbstring);
276 	pbopt.name = stripValidChars(CClass.MultiIdentifier,pbstring);
277 	if (!pbopt.name.length) throw new PBParseException("Option Parse","Malformed option: Option name not found.", pbstring.line);
278 	if (pbopt.extension) {
279 		pbstring = stripLWhite(pbstring);
280 		// rip off trailing )
281 		pbstring = pbstring[1..pbstring.length];
282 		// check for more portions of the identifier
283 		if (pbstring[0] == '.') {
284 			// rip off the leading .
285 			pbstring = pbstring[1..pbstring.length];
286 			// rip the continuation of the identifier
287 			pbopt.name = stripValidChars(CClass.MultiIdentifier,pbstring);
288 		}
289 	}
290 	pbstring = stripLWhite(pbstring);
291 	// expect next char must be =
292 	if (!pbstring.input.skipOver("=")) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Missing = after option name.", pbstring.line);
293 	pbstring = stripLWhite(pbstring);
294 	// the remaining text between here and the terminator is our value
295 	if (pbstring[0] == '"') {
296 		pbopt.value = ripQuotedValue(pbstring);
297 		pbstring = stripLWhite(pbstring);
298 		version(D_Version2) {
299 			if (terms.find(pbstring[0]).empty) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Bad terminator("~pbstring[0]~")", pbstring.line);
300 		} else
301 			if (terms.find(pbstring[0]) == -1) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Bad terminator("~pbstring[0]~")", pbstring.line);
302 		// leave the terminator in the string in case the caller wants to look at it
303 		return pbopt;
304 	}
305 	// take care of non-quoted values
306 	pbopt.value = stripValidChars(CClass.Value,pbstring);
307 	pbstring = stripLWhite(pbstring);
308 	version(D_Version2) {
309 		if (terms.find(pbstring[0]).empty) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Bad terminator("~pbstring[0]~")", pbstring.line);
310 	} else
311 		if (terms.find(pbstring[0]) == -1) throw new PBParseException("Option Parse("~pbopt.name~")","Malformed option: Bad terminator("~pbstring[0]~")", pbstring.line);
312 	return pbopt;
313 }
314 
315 unittest {
316 	auto str = ParserData("java_package = \"Fish\";");
317 	auto pbopt = ripOption(str);
318 	assert(pbopt.name == "java_package");
319 	assert(pbopt.value == "\"Fish\"");
320 
321 	str = ParserData("java_multiple_files = true;");
322 	pbopt = ripOption(str);
323 	assert(pbopt.name == "java_multiple_files");
324 	assert(pbopt.value == "true");
325 
326 	str = ParserData("java_generate_equals_and_hash = true;");
327 	pbopt = ripOption(str);
328 	assert(pbopt.name == "java_generate_equals_and_hash");
329 	assert(pbopt.value == "true");
330 
331 	str = ParserData("optimize_for = LITE_RUNTIME;");
332 	pbopt = ripOption(str);
333 	assert(pbopt.name == "optimize_for");
334 	assert(pbopt.value == "LITE_RUNTIME");
335 
336 	str = ParserData("default = -1];");
337 	pbopt = ripOption(str, "]");
338 	assert(pbopt.name == "default");
339 	assert(pbopt.value == "-1");
340 	assert(str.input[0] == ']'); // option blocks may term with comma
341 }
342 
343 string ripQuotedValue(ref ParserData pbstring) {
344 	int x;
345 	for(x = 1;pbstring[x] != '"' && x < pbstring.length;x++) {
346 		if(pbstring[x] == '\n')
347 			pbstring.line++;
348 		if(pbstring[x] == '\r') {
349 			pbstring.line++;
350 			if(pbstring.length < x+1 && pbstring[x+1] == '\n')
351 				x++;
352 		}
353 	}
354 	// inc to take the quotes with us
355 	x++;
356 	string tmp = pbstring.input[0..x];
357 	pbstring = pbstring[x..pbstring.length];
358 	return tmp;
359 }
360 
361 // this rips line-specific options from the string
362 PBOption[]ripOptions(ref ParserData pbstring) {
363 	PBOption[]ret;
364 	while(pbstring.length && pbstring[0] != ']') {
365 		// this will rip off the leading [ and intermediary ','s
366 		pbstring = pbstring[1..pbstring.length];
367 		ret ~= ripOption(pbstring,",]");
368 		if(__ctfe) {} else
369 		debug writefln("Pulled option %s with value %s",ret[$-1].name,ret[$-1].value);
370 	}
371 	// rip off the trailing ]
372 	pbstring.input.skipOver("]");
373 	return ret;
374 }
375 
376 string[] ripComment(ref ParserData pbstring) {
377 	string[] ret;
378 	int i = 0;
379 	do {
380 		i++;
381 		for(;i<pbstring.length && pbstring[i] != '/';i++){}
382 		if(i == pbstring.length)
383 			break;
384 	} while(pbstring[i-1] != '*');
385 	i++;
386 	auto tmp = pbstring.input[0..i];
387 	pbstring = pbstring[i..pbstring.length];
388 	version(D_Version2)
389 		ret = tmp.splitLines();
390 	else
391 		ret = tmp.splitlines();
392 	pbstring.line += ret.length - 1;
393 	return ret;
394 }