1 /** 2 * This module provides the functions needed to convert to/from 3 * the Protocol Buffer binary format. 4 */ 5 module dprotobuf.wireformat; 6 7 version(D_Version2) { 8 import std.conv; 9 } else { 10 import std..string; 11 int to(T)(string v) { 12 return atoi(v); 13 } 14 15 string to(T)(int v) { 16 return toString(v); 17 } 18 19 string to(T, S)(ulong v, S redix) { 20 return toString(v, redix); 21 } 22 23 bool empty(T)(T[] v) { 24 return !v.length; 25 } 26 27 T back(T)(T[] arr) { 28 return arr[$-1]; 29 } 30 31 void popBack(T)(ref T[] arr) { 32 arr = arr[0..$-1]; 33 } 34 35 bool skipOver(ref string str, string c) { 36 if(str.length < c.length) return false; 37 38 if(str[0..c.length] == c) { 39 str = str[c.length..$]; 40 return true; 41 } 42 return false; 43 } 44 } 45 46 import std.stdio; 47 48 /** 49 * Stores the integer value for each wire type. 50 * 51 * Undecided is used on custom types; given a type name 52 * a wire type can not be determined until identifying 53 * if the type is a enum or struct. 54 */ 55 enum WireType : byte { 56 varint, 57 fixed64, 58 lenDelimited, 59 startGroup, // Deprecated 60 endGroup, // Deprecated 61 fixed32, 62 undecided = -1 63 } 64 65 /** 66 * Encode a type as a varint. 67 */ 68 ubyte[]toVarint(bool input,int field) { 69 return toVarint(cast(long)(input?1:0),field); 70 } 71 /// ditto 72 ubyte[]toVarint(uint input,int field) { 73 return toVarint(cast(long)input,field); 74 } 75 /// ditto 76 ubyte[]toVarint(int input,int field) { 77 return toVarint(cast(long)input,field); 78 } 79 /// ditto 80 ubyte[]toVarint(ulong input,int field) { 81 return toVarint(cast(long)input,field); 82 } 83 /// ditto 84 ubyte[]toVarint(long input,int field) { 85 ubyte[]ret; 86 // tack on the header and the varint 87 ret = genHeader(field,WireType.varint)~toVarint(input); 88 return ret; 89 } 90 91 /** 92 * Encode a varint without a header. 93 */ 94 ubyte[] toVarint(long input) { 95 ubyte[]ret; 96 int x; 97 if (input < 0) { 98 // shortcut negative numbers, this is always the case 99 ret.length = 10; 100 } else { 101 long tmp = input; 102 for (x = 1;tmp >= 128;x++) { 103 // arithmetic shift is fine, because we've already checked for 104 // negative numbers 105 tmp >>= 7; 106 } 107 ret.length = x; 108 } 109 for (x = 0;x<ret.length;x++) { 110 // set the top bit 111 ret[x] = cast(ubyte)(1<<7); 112 ret[x] |= (cast(ubyte)input)&0b1111111; 113 input >>= 7; 114 } 115 // unset the top bit of the last data element 116 ret[$-1] &= 0b1111111; 117 return ret; 118 } 119 120 /** 121 * Decodes a varint to the requested type. 122 */ 123 T fromVarint(T)(ref ubyte[] input) 124 in { 125 assert(input.length); 126 } body { 127 // find last ubyte 128 int x; 129 ubyte[]tmp; 130 for (x = 0;x<=input.length;x++) { 131 if (x == input.length) throw new Exception( 132 "Found no end to varint ubyte string starting with: "~ 133 to!(string)(cast(ulong)input[0],16u)~" "~ 134 (input.length>1?to!(string)(cast(ulong)input[1],16u):"")~" "~ 135 (input.length>2?to!(string)(cast(ulong)input[2],16u):"")); 136 137 if (!(input[x]>>7)) { 138 // we have a ubyte with an unset upper bit! huzzah! 139 // this means we have the whole varint ubyte string 140 tmp = input[0..x+1]; 141 input = input[x+1..$]; 142 break; 143 } 144 } 145 146 long output = 0; 147 version(D_Version2) 148 auto starting = to!(int)(tmp.length-1); 149 else 150 auto starting = tmp.length-1; 151 for (x = starting;x>=0;x--) { 152 output |= (tmp[x]&0b1111111); 153 if (x==0) { 154 // we're done, so jump out so we can return values 155 break; 156 } 157 output <<= 7; 158 } 159 if (output > T.max || output < T.min) { 160 throw new Exception("Integer parse is not within the valid range."); 161 } 162 return cast(T)output; 163 } 164 165 /** 166 * Provide the specified wiretype from the header. 167 * 168 * Does not varify type is known as future wire types 169 * could be introduced. 170 */ 171 WireType getWireType(int header) { 172 return cast(WireType)(header&0b111); 173 } 174 175 /** 176 * Provide the specified field number from the header. 177 */ 178 int getFieldNumber(int header) { 179 return header>>3; 180 } 181 182 /** 183 * Encodes a header. 184 */ 185 ubyte[] genHeader(int field, WireType wiretype) { 186 return toVarint((field<<3)|wiretype); 187 } 188 189 unittest { 190 writefln("unittest ProtocolBuffer.pbhelper.toVarint"); 191 debug writefln("toVarint(bool)..."); 192 ubyte[]tmp = toVarint(true,5); 193 ubyte cmp; 194 debug writefln("length"); 195 assert(tmp.length == 2); 196 debug writefln("header"); 197 assert(getFieldNumber(tmp[0]) == 5); 198 cmp = cast(ubyte)0b00000001; 199 debug writefln("first data ubyte(%b): %b",cmp,tmp[1]); 200 assert(tmp[1] == cmp); 201 debug writefln("toVarint(int)..."); 202 tmp = toVarint(300,12); 203 debug writefln("length"); 204 assert(tmp.length == 3); 205 cmp = cast(ubyte)0b10101100; 206 debug writefln("first data ubyte(%b): %b",cmp,tmp[1]); 207 assert(tmp[1] == cmp); 208 cmp = cast(ubyte)0b00000010; 209 debug writefln("second data ubyte(%b): %b",cmp,tmp[2]); 210 assert(tmp[2] == cmp); 211 debug writefln("long fromVarint..."); 212 // use last value with the header ripped off 213 cmp = tmp[0]; 214 tmp = tmp[1..$]; 215 long ret = fromVarint!(long)(tmp); 216 assert(ret == 300); 217 218 debug writefln("Checking max/min edges..."); 219 tmp = toVarint(ulong.max,5); 220 tmp = tmp[1..$]; 221 assert(ulong.max == fromVarint!(ulong)(tmp)); 222 223 tmp = toVarint(long.min,5); 224 tmp = tmp[1..$]; 225 assert(long.min == fromVarint!(long)(tmp)); 226 227 tmp = toVarint(int.min,5); 228 tmp = tmp[1..$]; 229 assert(int.min == fromVarint!(int)(tmp)); 230 231 tmp = toVarint(uint.max,5); 232 tmp = tmp[1..$]; 233 uint uitmp = fromVarint!(uint)(tmp); 234 debug writefln("%d should be %d",uitmp,uint.max); 235 assert(uint.max == uitmp); 236 assert(tmp.length == 0); 237 debug writefln(""); 238 } 239 240 // zigzag encoding and decodings 241 ubyte[]toSInt(int input,int field) { 242 return toVarint((input<<1)^(input>>31),field); 243 } 244 ubyte[]toSInt(long input,int field) { 245 return toVarint((input<<1)^(input>>63),field); 246 } 247 248 T fromSInt(T)(ref ubyte[]input) { 249 static assert(is(T == int) || is(T == long), 250 "fromSInt only works with types int or long."); 251 252 T tmp = fromVarint!(T)(input); 253 tmp = (tmp>>1)^cast(T)(tmp&0x1?0xFFFFFFFFFFFFFFFF:0); 254 return tmp; 255 } 256 257 unittest { 258 writefln("unittest ProtocolBuffer.pbhelper.toSInt"); 259 debug writefln("toSInt(int)..."); 260 ubyte[]tmp = toSInt(0,12); 261 debug writefln("length"); 262 assert(tmp.length == 2); 263 ubyte cmp = cast(ubyte)0b0; 264 debug writefln("first ubyte(%b): %b",cmp,tmp[1]); 265 assert(tmp[1] == cmp); 266 cmp = cast(ubyte)(12<<3); 267 debug writefln("header ubyte(%b): %b",cmp,tmp[0]); 268 assert(tmp[0] == cmp); 269 270 debug writefln("toSInt(long)..."); 271 tmp = toSInt(cast(long)-2,12); 272 debug writefln("length"); 273 assert(tmp.length == 2); 274 cmp = cast(ubyte)0b11; 275 debug writefln("first ubyte(%b): %b",cmp,tmp[1]); 276 assert(tmp[1] == cmp); 277 278 debug writefln("fromSInt(long)..."); 279 // slice off header for reuse 280 tmp = tmp[1..$]; 281 assert(-2 == fromSInt!(long)(tmp)); 282 assert(tmp.length == 0); 283 debug writefln(""); 284 } 285 286 /** 287 * Fixed sized numeric types. 288 * 289 * Valid for uint, float, ulong, and double 290 */ 291 ubyte[]toByteBlob(T)(T input,int field) { 292 ubyte[]ret; 293 ubyte[]tmp = (cast(ubyte*)&input)[0..T.sizeof].dup; 294 version (BigEndian) {tmp.reverse;} 295 ret = genHeader(field,T.sizeof==8?WireType.fixed64:WireType.fixed32) 296 ~tmp[0..T.sizeof]; 297 return ret; 298 } 299 300 /// ditto 301 T fromByteBlob(T)(ref ubyte[]input) 302 in { 303 assert(input.length >= T.sizeof); 304 } body { 305 T ret; 306 ubyte[]tmp = input[0..T.sizeof]; 307 input = input[T.sizeof..$]; 308 version (BigEndian) {tmp.reverse;} 309 (cast(ubyte*)&ret)[0..T.sizeof] = tmp[0..T.sizeof]; 310 return ret; 311 } 312 313 unittest { 314 writefln("unittest ProtocolBuffer.pbhelper.byteblobs"); 315 ubyte[]tmp = toByteBlob!(double)(1.542,cast(ubyte)5)[1..$]; 316 assert(1.542 == fromByteBlob!(double)(tmp)); 317 assert(tmp.length == 0); 318 debug writefln(""); 319 } 320 321 /** 322 * Handle strings 323 */ 324 ubyte[]toByteString(T)(T[]input,int field) { 325 // we need to rip off the generated header ubyte for code reuse, this could 326 // be done better 327 ubyte[]tmp = toVarint(input.length); 328 return genHeader(field,WireType.lenDelimited)~tmp~cast(ubyte[])input; 329 } 330 331 /// ditto 332 T[]fromByteString(T:T[])(ref ubyte[]input) { 333 uint len = fromVarint!(uint)(input); 334 if (len > input.length) { 335 throw new Exception("String length exceeds length of input ubyte array."); 336 } 337 T[]ret = cast(T[])input[0..len]; 338 input = input[len..$]; 339 return ret; 340 } 341 342 unittest { 343 writefln("unittest ProtocolBuffer.pbhelper.byteblobs"); 344 string test = "My toast has been stolen!"; 345 ubyte[]tmp = toByteString(test,cast(ubyte)15)[1..$]; 346 assert(test == fromByteString!(string)(tmp)); 347 assert(tmp.length == 0); 348 debug writefln(""); 349 350 ubyte[] data = [0x03, // Length 351 0x05, 0x06, 0x07, 352 0x00, // Length 353 0x01, // Length 354 0x08]; 355 assert(fromByteString!(ubyte[])(data) == cast(ubyte[])[0x05, 0x06, 0x07]); 356 assert(fromByteString!(ubyte[])(data) == cast(ubyte[])[]); 357 assert(fromByteString!(ubyte[])(data) == cast(ubyte[])[0x08]); 358 } 359 360 /** 361 * Remove unknown field from input. 362 * 363 * Returns: 364 * The data of field. 365 */ 366 ubyte[]ripUField(ref ubyte[]input,int wiretype) { 367 switch(wiretype) { 368 case 0: 369 // snag a varint 370 return toVarint(fromVarint!(long)(input)); 371 case 1: 372 // snag a 64bit chunk 373 ubyte[]tmp = input[0..8]; 374 input = input[8..$]; 375 return tmp; 376 case 2: 377 // snag a length delimited chunk 378 auto blen = fromVarint!(long)(input); 379 ubyte[]tmp = input[0..cast(uint)blen]; 380 return toVarint(blen)~tmp; 381 case 5: 382 // snag a 32bit chunk 383 ubyte[]tmp = input[0..4]; 384 input = input[4..$]; 385 return tmp; 386 default: 387 // shit is broken.... 388 throw new Exception("Can't deal with wiretype "~to!(string)(wiretype)); 389 } 390 assert(0); 391 } 392 393 /** 394 * Handle packed fields. 395 */ 396 ubyte[]toPacked(T:T[],alias serializer)(in T[] packed,int field) { 397 // zero length packed repeated fields serialize to nothing 398 if (!packed.length) return null; 399 ubyte[]ret; 400 foreach(pack;packed) { 401 // serialize everything, but leave off the header bytes for all of them 402 ret ~= serializer(pack,field)[1..$]; 403 } 404 // now that everything is serialized, grab the length, convert to varint, 405 // and tack on a header 406 ret = genHeader(field,WireType.lenDelimited)~toVarint(ret.length)~ret; 407 return ret; 408 } 409 410 /// ditto 411 T[]fromPacked(T,alias deserializer)(ref ubyte[]input) { 412 T[]ret; 413 // it's assumed that the field is already ripped off 414 // grab the length to be decoded 415 auto len = fromVarint!(uint)(input); 416 if (input.length < len) throw new Exception("A repeated packed field specifies a length longer than available data."); 417 // rip off the chunk that's ours and process the hell out of it 418 ubyte[]own = input[0..len]; 419 input = input[len..$]; 420 while(own.length) { 421 ret ~= cast(T) deserializer(own); 422 } 423 return ret; 424 } 425 426 unittest { 427 writefln("unittest ProtocolBuffer.pbhelper.packed_fields"); 428 int[]test = [3,270,86942]; 429 ubyte[]cmp = cast(ubyte[])[0x22,0x6,0x3,0x8e,0x2,0x9e,0xa7,0x5]; 430 ubyte[]tmp = toPacked!(int[],toVarint)(test,cast(ubyte)4); 431 assert(tmp.length == 8); 432 version(D_Version2) { 433 mixin("import std.algorithm, std.range, std.string;"); 434 debug writeln(map!((a) { return format("%x", a); })(cmp)); 435 debug writeln(map!((a) { return format("%x", a); })(tmp)); 436 } else { 437 debug writefln("%x",cmp); 438 debug writefln("%x",tmp); 439 } 440 assert(tmp == cmp); 441 // rip off header ubyte 442 tmp = tmp[1..$]; 443 int[]test2 = fromPacked!(int,fromVarint!(int))(tmp); 444 assert(test == test2); 445 debug writefln(""); 446 }