1 /**
2  * This module provides the functions needed to convert to/from
3  * the Protocol Buffer binary format.
4  */
5 module dprotobuf.wireformat;
6 
7 version(D_Version2) {
8 	import std.conv;
9 } else {
10 	import std..string;
11 	int to(T)(string v) {
12 		return atoi(v);
13 	}
14 
15 	string to(T)(int v) {
16 		return toString(v);
17 	}
18 
19 	string to(T, S)(ulong v, S redix) {
20 		return toString(v, redix);
21 	}
22 
23 	bool empty(T)(T[] v) {
24 		return !v.length;
25 	}
26 
27 	T back(T)(T[] arr) {
28 		return arr[$-1];
29 	}
30 
31 	void popBack(T)(ref T[] arr) {
32 		arr = arr[0..$-1];
33 	}
34 
35 	bool skipOver(ref string str, string c) {
36 		if(str.length < c.length) return false;
37 
38 		if(str[0..c.length] == c) {
39 			str = str[c.length..$];
40 			return true;
41 		}
42 		return false;
43 	}
44 }
45 
46 import std.stdio;
47 
48 /**
49  * Stores the integer value for each wire type.
50  *
51  * Undecided is used on custom types; given a type name
52  * a wire type can not be determined until identifying
53  * if the type is a enum or struct.
54  */
55 enum WireType : byte {
56 	varint,
57 	fixed64,
58 	lenDelimited,
59 	startGroup, // Deprecated
60 	endGroup, // Deprecated
61 	fixed32,
62 	undecided = -1
63 }
64 
65 /**
66  * Encode a type as a varint.
67  */
68 ubyte[]toVarint(bool input,int field) {
69 	return toVarint(cast(long)(input?1:0),field);
70 }
71 /// ditto
72 ubyte[]toVarint(uint input,int field) {
73 	return toVarint(cast(long)input,field);
74 }
75 /// ditto
76 ubyte[]toVarint(int input,int field) {
77 	return toVarint(cast(long)input,field);
78 }
79 /// ditto
80 ubyte[]toVarint(ulong input,int field) {
81 	return toVarint(cast(long)input,field);
82 }
83 /// ditto
84 ubyte[]toVarint(long input,int field) {
85 	ubyte[]ret;
86 	// tack on the header and the varint
87 	ret = genHeader(field,WireType.varint)~toVarint(input);
88 	return ret;
89 }
90 
91 /**
92  * Encode a varint without a header.
93  */
94 ubyte[] toVarint(long input) {
95 	ubyte[]ret;
96 	int x;
97 	if (input < 0) {
98 		// shortcut negative numbers, this is always the case
99 		ret.length = 10;
100 	} else {
101 		long tmp = input;
102 		for (x = 1;tmp >= 128;x++) {
103 			// arithmetic shift is fine, because we've already checked for
104 			// negative numbers
105 			tmp >>= 7;
106 		}
107 		ret.length = x;
108 	}
109 	for (x = 0;x<ret.length;x++) {
110 		// set the top bit
111 		ret[x] = cast(ubyte)(1<<7);
112 		ret[x] |= (cast(ubyte)input)&0b1111111;
113 		input >>= 7;
114 	}
115 	// unset the top bit of the last data element
116 	ret[$-1] &= 0b1111111;
117 	return ret;
118 }
119 
120 /**
121  * Decodes a varint to the requested type.
122  */
123 T fromVarint(T)(ref ubyte[] input)
124 in {
125 	assert(input.length);
126 } body {
127 	// find last ubyte
128 	int x;
129 	ubyte[]tmp;
130 	for (x = 0;x<=input.length;x++) {
131 		if (x == input.length) throw new Exception(
132 			"Found no end to varint ubyte string starting with: "~
133 			to!(string)(cast(ulong)input[0],16u)~" "~
134 			(input.length>1?to!(string)(cast(ulong)input[1],16u):"")~" "~
135 			(input.length>2?to!(string)(cast(ulong)input[2],16u):""));
136 
137 		if (!(input[x]>>7)) {
138 			// we have a ubyte with an unset upper bit! huzzah!
139 			// this means we have the whole varint ubyte string
140 			tmp = input[0..x+1];
141 			input = input[x+1..$];
142 			break;
143 		}
144 	}
145 
146 	long output = 0;
147 	version(D_Version2)
148 		auto starting = to!(int)(tmp.length-1);
149 	else
150 		auto starting = tmp.length-1;
151 	for (x = starting;x>=0;x--) {
152 		output |= (tmp[x]&0b1111111);
153 		if (x==0) {
154 			// we're done, so jump out so we can return values
155 			break;
156 		}
157 		output <<= 7;
158 	}
159 	if (output > T.max || output < T.min) {
160 		throw new Exception("Integer parse is not within the valid range.");
161 	}
162 	return cast(T)output;
163 }
164 
165 /**
166  * Provide the specified wiretype from the header.
167  *
168  * Does not varify type is known as future wire types
169  * could be introduced.
170  */
171 WireType getWireType(int header) {
172 	return cast(WireType)(header&0b111);
173 }
174 
175 /**
176  * Provide the specified field number from the header.
177  */
178 int getFieldNumber(int header) {
179 	return header>>3;
180 }
181 
182 /**
183  * Encodes a header.
184  */
185 ubyte[] genHeader(int field, WireType wiretype) {
186 	return toVarint((field<<3)|wiretype);
187 }
188 
189 unittest {
190 	writefln("unittest ProtocolBuffer.pbhelper.toVarint");
191 	debug writefln("toVarint(bool)...");
192 	ubyte[]tmp = toVarint(true,5);
193 	ubyte cmp;
194 	debug writefln("length");
195 	assert(tmp.length == 2);
196 	debug writefln("header");
197 	assert(getFieldNumber(tmp[0]) == 5);
198 	cmp = cast(ubyte)0b00000001;
199 	debug writefln("first data ubyte(%b): %b",cmp,tmp[1]);
200 	assert(tmp[1] == cmp);
201 	debug writefln("toVarint(int)...");
202 	tmp = toVarint(300,12);
203 	debug writefln("length");
204 	assert(tmp.length == 3);
205 	cmp = cast(ubyte)0b10101100;
206 	debug writefln("first data ubyte(%b): %b",cmp,tmp[1]);
207 	assert(tmp[1] == cmp);
208 	cmp = cast(ubyte)0b00000010;
209 	debug writefln("second data ubyte(%b): %b",cmp,tmp[2]);
210 	assert(tmp[2] == cmp);
211 	debug writefln("long fromVarint...");
212 	// use last value with the header ripped off
213 	cmp = tmp[0];
214 	tmp = tmp[1..$];
215 	long ret = fromVarint!(long)(tmp);
216 	assert(ret == 300);
217 
218 	debug writefln("Checking max/min edges...");
219 	tmp = toVarint(ulong.max,5);
220 	tmp = tmp[1..$];
221 	assert(ulong.max == fromVarint!(ulong)(tmp));
222 
223 	tmp = toVarint(long.min,5);
224 	tmp = tmp[1..$];
225 	assert(long.min == fromVarint!(long)(tmp));
226 
227 	tmp = toVarint(int.min,5);
228 	tmp = tmp[1..$];
229 	assert(int.min == fromVarint!(int)(tmp));
230 
231 	tmp = toVarint(uint.max,5);
232 	tmp = tmp[1..$];
233 	uint uitmp = fromVarint!(uint)(tmp);
234 	debug writefln("%d should be %d",uitmp,uint.max);
235 	assert(uint.max == uitmp);
236 	assert(tmp.length == 0);
237 	debug writefln("");
238 }
239 
240 // zigzag encoding and decodings
241 ubyte[]toSInt(int input,int field) {
242 	return toVarint((input<<1)^(input>>31),field);
243 }
244 ubyte[]toSInt(long input,int field) {
245 	return toVarint((input<<1)^(input>>63),field);
246 }
247 
248 T fromSInt(T)(ref ubyte[]input) {
249 	static assert(is(T == int) || is(T == long),
250 		"fromSInt only works with types int or long.");
251 
252 	T tmp = fromVarint!(T)(input);
253 	tmp = (tmp>>1)^cast(T)(tmp&0x1?0xFFFFFFFFFFFFFFFF:0);
254 	return tmp;
255 }
256 
257 unittest {
258 	writefln("unittest ProtocolBuffer.pbhelper.toSInt");
259 	debug writefln("toSInt(int)...");
260 	ubyte[]tmp = toSInt(0,12);
261 	debug writefln("length");
262 	assert(tmp.length == 2);
263 	ubyte cmp = cast(ubyte)0b0;
264 	debug writefln("first ubyte(%b): %b",cmp,tmp[1]);
265 	assert(tmp[1] == cmp);
266 	cmp = cast(ubyte)(12<<3);
267 	debug writefln("header ubyte(%b): %b",cmp,tmp[0]);
268 	assert(tmp[0] == cmp);
269 
270 	debug writefln("toSInt(long)...");
271 	tmp = toSInt(cast(long)-2,12);
272 	debug writefln("length");
273 	assert(tmp.length == 2);
274 	cmp = cast(ubyte)0b11;
275 	debug writefln("first ubyte(%b): %b",cmp,tmp[1]);
276 	assert(tmp[1] == cmp);
277 
278 	debug writefln("fromSInt(long)...");
279 	// slice off header for reuse
280 	tmp = tmp[1..$];
281 	assert(-2 == fromSInt!(long)(tmp));
282 	assert(tmp.length == 0);
283 	debug writefln("");
284 }
285 
286 /**
287  * Fixed sized numeric types.
288  *
289  * Valid for uint, float, ulong, and double
290  */
291 ubyte[]toByteBlob(T)(T input,int field) {
292 	ubyte[]ret;
293 	ubyte[]tmp = (cast(ubyte*)&input)[0..T.sizeof].dup;
294 	version (BigEndian) {tmp.reverse;}
295 	ret = genHeader(field,T.sizeof==8?WireType.fixed64:WireType.fixed32)
296 	      ~tmp[0..T.sizeof];
297 	return ret;
298 }
299 
300 /// ditto
301 T fromByteBlob(T)(ref ubyte[]input)
302 in {
303 	assert(input.length >= T.sizeof);
304 } body {
305 	T ret;
306 	ubyte[]tmp = input[0..T.sizeof];
307 	input = input[T.sizeof..$];
308 	version (BigEndian) {tmp.reverse;}
309 	(cast(ubyte*)&ret)[0..T.sizeof] = tmp[0..T.sizeof];
310 	return ret;
311 }
312 
313 unittest {
314 	writefln("unittest ProtocolBuffer.pbhelper.byteblobs");
315 	ubyte[]tmp = toByteBlob!(double)(1.542,cast(ubyte)5)[1..$];
316 	assert(1.542 == fromByteBlob!(double)(tmp));
317 	assert(tmp.length == 0);
318 	debug writefln("");
319 }
320 
321 /**
322  * Handle strings
323  */
324 ubyte[]toByteString(T)(T[]input,int field) {
325 	// we need to rip off the generated header ubyte for code reuse, this could
326 	// be done better
327 	ubyte[]tmp = toVarint(input.length);
328 	return genHeader(field,WireType.lenDelimited)~tmp~cast(ubyte[])input;
329 }
330 
331 /// ditto
332 T[]fromByteString(T:T[])(ref ubyte[]input) {
333 	uint len = fromVarint!(uint)(input);
334 	if (len > input.length) {
335 		throw new Exception("String length exceeds length of input ubyte array.");
336 	}
337 	T[]ret = cast(T[])input[0..len];
338 	input = input[len..$];
339 	return ret;
340 }
341 
342 unittest {
343 	writefln("unittest ProtocolBuffer.pbhelper.byteblobs");
344 	string test = "My toast has been stolen!";
345 	ubyte[]tmp = toByteString(test,cast(ubyte)15)[1..$];
346 	assert(test == fromByteString!(string)(tmp));
347 	assert(tmp.length == 0);
348 	debug writefln("");
349 
350 	ubyte[] data = [0x03, // Length
351 	    0x05, 0x06, 0x07,
352 	    0x00, // Length
353 	    0x01, // Length
354 	    0x08];
355 	assert(fromByteString!(ubyte[])(data) == cast(ubyte[])[0x05, 0x06, 0x07]);
356 	assert(fromByteString!(ubyte[])(data) == cast(ubyte[])[]);
357 	assert(fromByteString!(ubyte[])(data) == cast(ubyte[])[0x08]);
358 }
359 
360 /**
361  * Remove unknown field from input.
362  *
363  * Returns:
364  * The data of field.
365  */
366 ubyte[]ripUField(ref ubyte[]input,int wiretype) {
367 	switch(wiretype) {
368 	case 0:
369 		// snag a varint
370 		return toVarint(fromVarint!(long)(input));
371 	case 1:
372 		// snag a 64bit chunk
373 		ubyte[]tmp = input[0..8];
374 		input = input[8..$];
375 		return tmp;
376 	case 2:
377 		// snag a length delimited chunk
378 		auto blen = fromVarint!(long)(input);
379 		ubyte[]tmp = input[0..cast(uint)blen];
380 		return toVarint(blen)~tmp;
381 	case 5:
382 		// snag a 32bit chunk
383 		ubyte[]tmp = input[0..4];
384 		input = input[4..$];
385 		return tmp;
386 	default:
387 		// shit is broken....
388 		throw new Exception("Can't deal with wiretype "~to!(string)(wiretype));
389 	}
390 	assert(0);
391 }
392 
393 /**
394  * Handle packed fields.
395  */
396 ubyte[]toPacked(T:T[],alias serializer)(in T[] packed,int field) {
397 	// zero length packed repeated fields serialize to nothing
398 	if (!packed.length) return null;
399 	ubyte[]ret;
400 	foreach(pack;packed) {
401 		// serialize everything, but leave off the header bytes for all of them
402 		ret ~= serializer(pack,field)[1..$];
403 	}
404 	// now that everything is serialized, grab the length, convert to varint,
405 	// and tack on a header
406 	ret = genHeader(field,WireType.lenDelimited)~toVarint(ret.length)~ret;
407 	return ret;
408 }
409 
410 /// ditto
411 T[]fromPacked(T,alias deserializer)(ref ubyte[]input) {
412 	T[]ret;
413 	// it's assumed that the field is already ripped off
414 	// grab the length to be decoded
415 	auto len = fromVarint!(uint)(input);
416 	if (input.length < len) throw new Exception("A repeated packed field specifies a length longer than available data.");
417 	// rip off the chunk that's ours and process the hell out of it
418 	ubyte[]own = input[0..len];
419 	input = input[len..$];
420 	while(own.length) {
421 		ret ~= cast(T) deserializer(own);
422 	}
423 	return ret;
424 }
425 
426 unittest {
427 	writefln("unittest ProtocolBuffer.pbhelper.packed_fields");
428 	int[]test = [3,270,86942];
429 	ubyte[]cmp = cast(ubyte[])[0x22,0x6,0x3,0x8e,0x2,0x9e,0xa7,0x5];
430 	ubyte[]tmp = toPacked!(int[],toVarint)(test,cast(ubyte)4);
431 	assert(tmp.length == 8);
432 	version(D_Version2) {
433 		mixin("import std.algorithm, std.range, std.string;");
434 		debug writeln(map!((a) { return format("%x", a); })(cmp));
435 		debug writeln(map!((a) { return format("%x", a); })(tmp));
436 	} else {
437 		debug writefln("%x",cmp);
438 		debug writefln("%x",tmp);
439 	}
440 	assert(tmp == cmp);
441 	// rip off header ubyte
442 	tmp = tmp[1..$];
443 	int[]test2 = fromPacked!(int,fromVarint!(int))(tmp);
444 	assert(test == test2);
445 	debug writefln("");
446 }