SVN checkout 11/12/2010
[monav:monav.git] / plugins / osmimporter / pbfreader.h
1 /*
2 Copyright 2010  Christian Vetter veaac.fdirct@gmail.com
3
4 This file is part of MoNav.
5
6 MoNav is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 MoNav is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with MoNav.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #ifndef PBFREADER_H
21 #define PBFREADER_H
22
23 #include "ientityreader.h"
24 #include "protobuff definitions/fileformat.pb.h"
25 #include "protobuff definitions/osmformat.pb.h"
26 #include "utils/qthelpers.h"
27 #include <QHash>
28 #include <QFile>
29 #include <string>
30 #include <zlib.h>
31 #include <bzlib.h>
32 #include "lzma/LzmaDec.h"
33
34 #define NANO ( 1000.0 * 1000.0 * 1000.0 )
35 #define MAX_BLOCK_HEADER_SIZE ( 64 * 1024 )
36 #define MAX_BLOB_SIZE ( 32 * 1024 * 1024 )
37
38 class PBFReader : public IEntityReader {
39
40 protected:
41
42         enum Mode {
43                 ModeNode, ModeWay, ModeRelation, ModeDense
44         };
45
46 public:
47
48         PBFReader()
49         {
50                 GOOGLE_PROTOBUF_VERIFY_VERSION;
51         }
52
53         virtual bool open( QString filename )
54         {
55                 m_file.setFileName( filename );
56
57                 if ( !openQFile( &m_file, QIODevice::ReadOnly ) )
58                         return false;
59
60                 if ( !readBlockHeader() )
61                         return false;
62
63                 if ( m_blockHeader.type() != "OSMHeader" ) {
64                         qCritical() << "OSMHeader missing, found" << m_blockHeader.type().data() << "instead";
65                         return false;
66                 }
67
68                 if ( !readBlob() )
69                         return false;
70
71                 if ( !m_headerBlock.ParseFromArray( m_buffer.data(), m_buffer.size() ) ) {
72                         qCritical() << "failed to parse HeaderBlock";
73                         return false;
74                 }
75                 for ( int i = 0; i < m_headerBlock.required_features_size(); i++ ) {
76                         const std::string& feature = m_headerBlock.required_features( i );
77                         bool supported = false;
78                         if ( feature == "OsmSchema-V0.6" )
79                                 supported = true;
80                         else if ( feature == "DenseNodes" )
81                                 supported = true;
82
83                         if ( !supported ) {
84                                 qCritical() << "required feature not supported:" << feature.data();
85                                 return false;
86                         }
87                 }
88                 m_loadBlock = true;
89                 return true;
90         }
91
92         virtual void setNodeTags( QStringList tags )
93         {
94                 for ( int i = 0; i < tags.size(); i++ )
95                         m_nodeTags.insert( tags[i], i );
96         }
97
98         virtual void setWayTags( QStringList tags )
99         {
100                 for ( int i = 0; i < tags.size(); i++ )
101                         m_wayTags.insert( tags[i], i );
102         }
103
104         virtual void setRelationTags( QStringList tags )
105         {
106                 for ( int i = 0; i < tags.size(); i++ )
107                         m_relationTags.insert( tags[i], i );
108         }
109
110         virtual EntityType getEntitiy( Node* node, Way* way, Relation* relation )
111         {
112                 if ( m_loadBlock ) {
113                         if ( !readNextBlock() )
114                                 return EntityNone;
115                         loadBlock();
116                         loadGroup();
117                 }
118
119                 switch ( m_mode ) {
120                 case ModeNode:
121                         parseNode( node );
122                         return EntityNode;
123                 case ModeWay:
124                         parseWay( way );
125                         return EntityWay;
126                 case ModeRelation:
127                         parseRelation( relation );
128                         return EntityRelation;
129                 case ModeDense:
130                         parseDense( node );
131                         return EntityNode;
132                 }
133
134                 return EntityNone;
135         }
136
137         virtual ~PBFReader()
138         {
139         }
140
141 protected:
142
143         int convertNetworkByteOrder( char data[4] )
144         {
145                 return ( ( ( unsigned ) data[0] ) << 24 ) | ( ( ( unsigned ) data[1] ) << 16 ) | ( ( ( unsigned ) data[2] ) << 8 ) | ( unsigned ) data[3];
146         }
147
148         void parseNode( IEntityReader::Node* node )
149         {
150                 node->tags.clear();
151
152                 const PBF::Node& inputNode = m_primitiveBlock.primitivegroup( m_currentGroup ).nodes( m_currentEntity );
153                 node->id = inputNode.id();
154                 node->coordinate.latitude = ( ( double ) inputNode.lat() * m_primitiveBlock.granularity() + m_primitiveBlock.lat_offset() ) / NANO;
155                 node->coordinate.longitude = ( ( double ) inputNode.lon() * m_primitiveBlock.granularity() + m_primitiveBlock.lon_offset() ) / NANO;
156                 for ( int tag = 0; tag < inputNode.keys_size(); tag++ ) {
157                         int tagID = m_nodeTagIDs[inputNode.keys( tag )];
158                         if ( tagID == -1 )
159                                 continue;
160                         Tag newTag;
161                         newTag.key = tagID;
162                         newTag.value = QString::fromUtf8( m_primitiveBlock.stringtable().s( inputNode.vals( tag ) ).data() );
163                         node->tags.push_back( newTag );
164                 }
165
166                 m_currentEntity++;
167                 if ( m_currentEntity >= m_primitiveBlock.primitivegroup( m_currentGroup ).nodes_size() ) {
168                         m_currentEntity = 0;
169                         m_currentGroup++;
170                         if ( m_currentGroup >= m_primitiveBlock.primitivegroup_size() )
171                                 m_loadBlock = true;
172                         else
173                                 loadGroup();
174                 }
175         }
176
177         void parseWay( IEntityReader::Way* way )
178         {
179                 way->tags.clear();
180                 way->nodes.clear();
181
182                 const PBF::Way& inputWay = m_primitiveBlock.primitivegroup( m_currentGroup ).ways( m_currentEntity );
183                 way->id = inputWay.id();
184                 for ( int tag = 0; tag < inputWay.keys_size(); tag++ ) {
185                         int tagID = m_wayTagIDs[inputWay.keys( tag )];
186                         if ( tagID == -1 )
187                                 continue;
188                         Tag newTag;
189                         newTag.key = tagID;
190                         newTag.value = QString::fromUtf8( m_primitiveBlock.stringtable().s( inputWay.vals( tag ) ).data() );
191                         way->tags.push_back( newTag );
192                 }
193
194                 long long lastRef = 0;
195                 for ( int i = 0; i < inputWay.refs_size(); i++ ) {
196                         lastRef += inputWay.refs( i );
197                         way->nodes.push_back( lastRef );
198                 }
199
200                 m_currentEntity++;
201                 if ( m_currentEntity >= m_primitiveBlock.primitivegroup( m_currentGroup ).ways_size() ) {
202                         m_currentEntity = 0;
203                         m_currentGroup++;
204                         if ( m_currentGroup >= m_primitiveBlock.primitivegroup_size() )
205                                 m_loadBlock = true;
206                         else
207                                 loadGroup();
208                 }
209         }
210
211         void parseRelation( IEntityReader::Relation* relation )
212         {
213                 relation->tags.clear();
214                 relation->members.clear();
215
216                 const PBF::Relation& inputRelation = m_primitiveBlock.primitivegroup( m_currentGroup ).relations( m_currentEntity );
217                 relation->id = inputRelation.id();
218                 for ( int tag = 0; tag < inputRelation.keys_size(); tag++ ) {
219                         int tagID = m_relationTagIDs[inputRelation.keys( tag )];
220                         if ( tagID == -1 )
221                                 continue;
222                         Tag newTag;
223                         newTag.key = tagID;
224                         newTag.value = QString::fromUtf8( m_primitiveBlock.stringtable().s( inputRelation.vals( tag ) ).data() );
225                         relation->tags.push_back( newTag );
226                 }
227
228                 long long lastRef = 0;
229                 for ( int i = 0; i < inputRelation.types_size(); i++ ) {
230                         RelationMember member;
231                         switch ( inputRelation.types( i ) ) {
232                         case PBF::Relation::NODE:
233                                 member.type = RelationMember::Node;
234                                 break;
235                         case PBF::Relation::WAY:
236                                 member.type = RelationMember::Way;
237                                 break;
238                         case PBF::Relation::RELATION:
239                                 member.type = RelationMember::Relation;
240                         }
241                         lastRef += inputRelation.memids( i );
242                         member.ref = lastRef;
243                         member.role = m_primitiveBlock.stringtable().s( inputRelation.roles_sid( i ) ).data();
244                         relation->members.push_back( member );
245                 }
246
247                 m_currentEntity++;
248                 if ( m_currentEntity >= m_primitiveBlock.primitivegroup( m_currentGroup ).relations_size() ) {
249                         m_currentEntity = 0;
250                         m_currentGroup++;
251                         if ( m_currentGroup >= m_primitiveBlock.primitivegroup_size() )
252                                 m_loadBlock = true;
253                         else
254                                 loadGroup();
255                 }
256         }
257
258         void parseDense( IEntityReader::Node* node )
259         {
260                 node->tags.clear();
261
262                 const PBF::DenseNodes& dense = m_primitiveBlock.primitivegroup( m_currentGroup ).dense();
263                 m_lastDenseID += dense.id( m_currentEntity );
264                 m_lastDenseLatitude += dense.lat( m_currentEntity );
265                 m_lastDenseLongitude += dense.lon( m_currentEntity );
266                 node->id = m_lastDenseID;
267                 node->coordinate.latitude = ( ( double ) m_lastDenseLatitude * m_primitiveBlock.granularity() + m_primitiveBlock.lat_offset() ) / NANO;
268                 node->coordinate.longitude = ( ( double ) m_lastDenseLongitude * m_primitiveBlock.granularity() + m_primitiveBlock.lon_offset() ) / NANO;
269
270                 while ( true ){
271                         if ( m_lastDenseTag >= dense.keys_vals_size() )
272                                 break;
273
274                         int tagValue = dense.keys_vals( m_lastDenseTag );
275                         if ( tagValue == 0 ) {
276                                 m_lastDenseTag++;
277                                 break;
278                         }
279
280                         int tagID = m_nodeTagIDs[tagValue];
281
282                         if ( tagID == -1 ) {
283                                 m_lastDenseTag += 2;
284                                 continue;
285                         }
286
287                         Tag newTag;
288                         newTag.key = tagID;
289                         newTag.value = QString::fromUtf8( m_primitiveBlock.stringtable().s( dense.keys_vals( m_lastDenseTag + 1 ) ).data() );
290                         node->tags.push_back( newTag );
291                         m_lastDenseTag += 2;
292                 }
293
294                 m_currentEntity++;
295                 if ( m_currentEntity >= dense.id_size() ) {
296                         m_currentEntity = 0;
297                         m_currentGroup++;
298                         if ( m_currentGroup >= m_primitiveBlock.primitivegroup_size() )
299                                 m_loadBlock = true;
300                         else
301                                 loadGroup();
302                 }
303         }
304
305         void loadGroup()
306         {
307                 const PBF::PrimitiveGroup& group = m_primitiveBlock.primitivegroup( m_currentGroup );
308                 if ( group.nodes_size() != 0 ) {
309                         m_mode = ModeNode;
310                 } else if ( group.ways_size() != 0 ) {
311                         m_mode = ModeWay;
312                 } else if ( group.relations_size() != 0 ) {
313                         m_mode = ModeRelation;
314                 } else if ( group.has_dense() )  {
315                         m_mode = ModeDense;
316                         m_lastDenseID = 0;
317                         m_lastDenseTag = 0;
318                         m_lastDenseLatitude = 0;
319                         m_lastDenseLongitude = 0;
320                         assert( group.dense().id_size() != 0 );
321                 } else
322                         assert( false );
323         }
324
325         void loadBlock()
326         {
327                 m_loadBlock = false;
328                 m_currentGroup = 0;
329                 m_currentEntity = 0;
330                 int stringCount = m_primitiveBlock.stringtable().s_size();
331                 // precompute all strings that match a necessary tag
332                 m_nodeTagIDs.resize( m_primitiveBlock.stringtable().s_size() );
333                 for ( int i = 1; i < stringCount; i++ )
334                         m_nodeTagIDs[i] = m_nodeTags.value( m_primitiveBlock.stringtable().s( i ).data(), -1 );
335                 m_wayTagIDs.resize( m_primitiveBlock.stringtable().s_size() );
336                 for ( int i = 1; i < stringCount; i++ )
337                         m_wayTagIDs[i] = m_wayTags.value( m_primitiveBlock.stringtable().s( i ).data(), -1 );
338                 m_relationTagIDs.resize( m_primitiveBlock.stringtable().s_size() );
339                 for ( int i = 1; i < stringCount; i++ )
340                         m_relationTagIDs[i] = m_relationTags.value( m_primitiveBlock.stringtable().s( i ).data(), -1 );
341         }
342
343         bool readNextBlock()
344         {
345                 if ( !readBlockHeader() )
346                         return false;
347
348                 if ( m_blockHeader.type() != "OSMData" ) {
349                         qCritical() << "invalid block type, found" << m_blockHeader.type().data() << "instead of OSMData";
350                         return false;
351                 }
352
353                 if ( !readBlob() )
354                         return false;
355
356                 if ( !m_primitiveBlock.ParseFromArray( m_buffer.data(), m_buffer.size() ) ) {
357                         qCritical() << "failed to parse PrimitiveBlock";
358                         return false;
359                 }
360                 return true;
361         }
362
363         bool readBlockHeader()
364         {
365                 char sizeData[4];
366                 if ( m_file.read( sizeData, 4 * sizeof( char ) ) != 4 * sizeof( char ) )
367                         return false; // end of stream?
368
369                 int size = convertNetworkByteOrder( sizeData );
370                 if ( size > MAX_BLOCK_HEADER_SIZE || size < 0 ) {
371                         qCritical() << "BlockHeader size invalid:" << size;
372                         return false;
373                 }
374                 m_buffer.resize( size );
375                 int readBytes = m_file.read( m_buffer.data(), size );
376                 if ( readBytes != size ) {
377                         qCritical() << "failed to read BlockHeader";
378                         return false;
379                 }
380                 if ( !m_blockHeader.ParseFromArray( m_buffer.constData(), size ) ) {
381                         qCritical() << "failed to parse BlockHeader";
382                         return false;
383                 }
384                 return true;
385         }
386
387         bool readBlob()
388         {
389                 int size = m_blockHeader.datasize();
390                 if ( size < 0 || size > MAX_BLOB_SIZE ) {
391                         qCritical() << "invalid Blob size:" << size;
392                         return false;
393                 }
394                 m_buffer.resize( size );
395                 int readBytes = m_file.read( m_buffer.data(), size );
396                 if ( readBytes != size ) {
397                         qCritical() << "failed to read Blob";
398                         return false;
399                 }
400                 if ( !m_blob.ParseFromArray( m_buffer.constData(), size ) ) {
401                         qCritical() << "failed to parse blob";
402                         return false;
403                 }
404
405                 if ( m_blob.has_raw() ) {
406                         const std::string& data = m_blob.raw();
407                         m_buffer.resize( data.size() );
408                         for ( unsigned i = 0; i < data.size(); i++ )
409                                 m_buffer[i] = data[i];
410                 } else if ( m_blob.has_zlib_data() ) {
411                         if ( !unpackZlib() )
412                                 return false;
413                 } else if ( m_blob.has_bzip2_data() ) {
414                         if ( !unpackBzip2() )
415                                 return false;
416                 } else if ( m_blob.has_lzma_data() ) {
417                         if ( !unpackLzma() )
418                                 return false;
419                 } else {
420                         qCritical() << "Blob contains no data";
421                         return false;
422                 }
423
424                 return true;
425         }
426
427         bool unpackZlib()
428         {
429                 m_buffer.resize( m_blob.raw_size() );
430                 z_stream compressedStream;
431                 compressedStream.next_in = ( unsigned char* ) m_blob.zlib_data().data();
432                 compressedStream.avail_in = m_blob.zlib_data().size();
433                 compressedStream.next_out = ( unsigned char* ) m_buffer.data();
434                 compressedStream.avail_out = m_blob.raw_size();
435                 compressedStream.zalloc = Z_NULL;
436                 compressedStream.zfree = Z_NULL;
437                 compressedStream.opaque = Z_NULL;
438                 int ret = inflateInit( &compressedStream );
439                 if ( ret != Z_OK ) {
440                         qCritical() << "failed to init zlib stream";
441                         return false;
442                 }
443                 ret = inflate( &compressedStream, Z_FINISH );
444                 if ( ret != Z_STREAM_END ) {
445                         qCritical() << "failed to inflate zlib stream";
446                         return false;
447                 }
448                 ret = inflateEnd( &compressedStream );
449                 if ( ret != Z_OK ) {
450                         qCritical() << "failed to deinit zlib stream";
451                         return false;
452                 }
453                 return true;
454         }
455
456         bool unpackBzip2()
457         {
458                 unsigned size = m_blob.raw_size();
459                 m_buffer.resize( size );
460                 m_bzip2Buffer.resize( m_blob.bzip2_data().size() );
461                 for ( unsigned i = 0; i < m_blob.bzip2_data().size(); i++ )
462                         m_bzip2Buffer[i] = m_blob.bzip2_data()[i];
463                 int ret = BZ2_bzBuffToBuffDecompress( m_buffer.data(), &size, m_bzip2Buffer.data(), m_bzip2Buffer.size(), 0, 0 );
464                 if ( ret != BZ_OK ) {
465                         qCritical() << "failed to unpack bzip2 stream";
466                         return false;
467                 }
468                 return true;
469         }
470
471         static void *SzAlloc( void *p, size_t size)
472         {
473                 p = p;
474                 return malloc( size );
475         }
476
477         static void SzFree( void *p, void *address)
478         {
479                 p = p;
480                 free( address );
481         }
482
483         bool unpackLzma()
484         {
485                 ISzAlloc alloc = { SzAlloc, SzFree };
486                 ELzmaStatus status;
487                 SizeT destinationLength = m_blob.raw_size();
488                 SizeT sourceLength = m_blob.lzma_data().size() - LZMA_PROPS_SIZE + 8;
489                 int ret = LzmaDecode(
490                                 ( unsigned char* ) m_buffer.data(),
491                                 &destinationLength,
492                                 ( const unsigned char* ) m_blob.lzma_data().data() + LZMA_PROPS_SIZE + 8,
493                                 &sourceLength,
494                                 ( const unsigned char* ) m_blob.lzma_data().data(),
495                                 LZMA_PROPS_SIZE + 8,
496                                 LZMA_FINISH_END,
497                                 &status,
498                                 &alloc );
499
500                 if ( ret != SZ_OK )
501                         return false;
502
503                 return true;
504         }
505
506         PBF::BlockHeader m_blockHeader;
507         PBF::Blob m_blob;
508
509         PBF::HeaderBlock m_headerBlock;
510         PBF::PrimitiveBlock m_primitiveBlock;
511
512         int m_currentGroup;
513         int m_currentEntity;
514         bool m_loadBlock;
515
516         Mode m_mode;
517
518         QHash< QString, int > m_nodeTags;
519         QHash< QString, int > m_wayTags;
520         QHash< QString, int > m_relationTags;
521
522         std::vector< int > m_nodeTagIDs;
523         std::vector< int > m_wayTagIDs;
524         std::vector< int > m_relationTagIDs;
525
526         long long m_lastDenseID;
527         long long m_lastDenseLatitude;
528         long long m_lastDenseLongitude;
529         int m_lastDenseTag;
530
531         QFile m_file;
532         QByteArray m_buffer;
533         QByteArray m_bzip2Buffer;
534
535 };
536
537 #endif // PBFREADER_H