SVN checkout 11/12/2010
[monav:monav.git] / plugins / unicodetournamenttrie / unicodetournamenttrie.cpp
1 /*
2 Copyright 2010  Christian Vetter veaac.fdirct@gmail.com
3
4 This file is part of MoNav.
5
6 MoNav is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 MoNav is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with MoNav.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "unicodetournamenttrie.h"
21 #include "utils/qthelpers.h"
22 #include "utils/edgeconnector.h"
23 #include "interfaces/iimporter.h"
24
25 #include <algorithm>
26 #include <QMultiHash>
27 #include <QList>
28 #include <limits>
29
30 UnicodeTournamentTrie::UnicodeTournamentTrie()
31 {
32         m_settingsDialog = NULL;
33 }
34
35 UnicodeTournamentTrie::~UnicodeTournamentTrie()
36 {
37         if ( m_settingsDialog != NULL )
38                 delete m_settingsDialog;
39 }
40
41 QString UnicodeTournamentTrie::GetName()
42 {
43         return "Unicode Tournament Trie";
44 }
45
46 int UnicodeTournamentTrie::GetFileFormatVersion()
47 {
48         return 1;
49 }
50
51 UnicodeTournamentTrie::Type UnicodeTournamentTrie::GetType()
52 {
53         return AddressLookup;
54 }
55
56 QWidget* UnicodeTournamentTrie::GetSettings()
57 {
58         if ( m_settingsDialog == NULL )
59                 m_settingsDialog = new UTTSettingsDialog();
60         return m_settingsDialog;
61 }
62
63 bool UnicodeTournamentTrie::LoadSettings( QSettings* settings )
64 {
65         if ( m_settingsDialog == NULL )
66                 m_settingsDialog = new UTTSettingsDialog();
67         return m_settingsDialog->loadSettings( settings );
68 }
69
70 bool UnicodeTournamentTrie::SaveSettings( QSettings* settings )
71 {
72         if ( m_settingsDialog == NULL )
73                 m_settingsDialog = new UTTSettingsDialog();
74         return m_settingsDialog->saveSettings( settings );
75 }
76
77 bool UnicodeTournamentTrie::Preprocess( IImporter* importer, QString dir )
78 {
79         if ( m_settingsDialog == NULL )
80                 m_settingsDialog = new UTTSettingsDialog();
81
82         UTTSettingsDialog::Settings settings;
83         if ( !m_settingsDialog->getSettings( &settings ) )
84                 return false;
85         QString filename = fileInDirectory( dir, "Unicode Tournament Trie" );
86
87         QFile subTrieFile( filename + "_sub" );
88         QFile mainTrieFile( filename + "_main" );
89         QFile wayFile( filename + "_ways" );
90
91         if ( !openQFile( &subTrieFile, QIODevice::WriteOnly ) )
92                 return false;
93         if ( !openQFile( &mainTrieFile, QIODevice::WriteOnly ) )
94                 return false;
95         if ( !openQFile( &wayFile, QIODevice::WriteOnly ) )
96                 return false;
97
98         std::vector< IImporter::Place > inputPlaces;
99         std::vector< IImporter::Address > inputAddress;
100         std::vector< UnsignedCoordinate > inputWayBuffer;
101         std::vector< QString > inputWayNames;
102         if ( !importer->GetAddressData( &inputPlaces, &inputAddress, &inputWayBuffer, &inputWayNames ) )
103                 return false;
104
105         Timer time;
106
107         std::vector< PlaceImportance > importanceOrder;
108         importanceOrder.reserve( inputPlaces.size() );
109         for ( std::vector< IImporter::Place >::const_iterator i = inputPlaces.begin(), e = inputPlaces.end(); i != e; ++i ) {
110                 PlaceImportance temp;
111                 temp.population  = i->population;
112                 temp.name = i->name;
113                 switch ( i->type ) {
114                 case IImporter::Place::City:
115                         {
116                                 temp.type = 6;
117                                 break;
118                         }
119                 case IImporter::Place::Town:
120                         {
121                                 temp.type = 5;
122                                 break;
123                         }
124                 case IImporter::Place::Suburb:
125                         {
126                                 temp.type = 0;
127                                 break;
128                         }
129                 case IImporter::Place::Village:
130                         {
131                                 temp.type = 4;
132                                 break;
133                         }
134                 case IImporter::Place::Hamlet:
135                         {
136                                 temp.type = 3;
137                                 break;
138                         }
139                 case IImporter::Place::None:
140                         {
141                                 temp.type = 0;
142                                 break;
143                         }
144                 }
145                 importanceOrder.push_back( temp );
146         }
147
148         std::sort( importanceOrder.begin(), importanceOrder.end() );
149         QHash< QString, unsigned > importance;
150         for ( int i = 0; i < ( int ) importanceOrder.size(); i++ )
151                 importance[importanceOrder[i].name] = i;
152         std::vector< PlaceImportance >().swap( importanceOrder );
153
154         std::sort( inputAddress.begin(), inputAddress.end() );
155         qDebug() << "Unicode Tournament Trie: sorted addresses by importance:" << time.restart() << "ms";
156
157         std::vector< UnsignedCoordinate > wayBuffer;
158         std::vector< utt::Node > trie( 1 );
159         unsigned address = 0;
160         for ( unsigned place = 0; place < inputPlaces.size(); place++ ) {
161
162                 // skip suburbs
163                 if ( inputPlaces[place].type == IImporter::Place::Suburb ) {
164                         while ( address < inputAddress.size() && inputAddress[address].nearPlace == place )
165                                 ++address;
166                 }
167
168                 utt::Data data;
169                 data.start = subTrieFile.pos();
170
171                 // write city information in front of the trie
172                 utt::CityData cityData;
173                 cityData.coordinate = inputPlaces[place].coordinate;
174                 char* buffer = new char[cityData.GetSize()];
175                 cityData.Write( buffer );
176                 subTrieFile.write( buffer, cityData.GetSize() );
177                 delete[] buffer;
178
179                 // build address name index
180                 QMultiHash< unsigned, unsigned > addressByName;
181                 for ( ; address < inputAddress.size(); address++ ) {
182                         if ( inputAddress[address].nearPlace != place )
183                                 break;
184                         addressByName.insert( inputAddress[address].name, address );
185                 }
186
187                 // compute way lengths
188                 QList< unsigned > uniqueNames = addressByName.uniqueKeys();
189                 std::vector< std::pair< double, unsigned > > wayLengths;
190                 for ( unsigned name = 0; name < ( unsigned ) uniqueNames.size(); name++ ) {
191                         QList< unsigned > segments = addressByName.values( uniqueNames[name] );
192                         double distance = 0;
193                         for( unsigned segment = 0; segment < ( unsigned ) segments.size(); segment++ ) {
194                                 const IImporter::Address segmentAddress = inputAddress[segment];
195                                 for ( unsigned coord = 1; coord < segmentAddress.pathLength; ++coord ) {
196                                         GPSCoordinate sourceGPS = inputWayBuffer[segmentAddress.pathID + coord - 1].ToProjectedCoordinate().ToGPSCoordinate();
197                                         GPSCoordinate targetGPS = inputWayBuffer[segmentAddress.pathID + coord].ToProjectedCoordinate().ToGPSCoordinate();
198                                         distance += sourceGPS.ApproximateDistance( targetGPS );
199                                 }
200                         }
201                         wayLengths.push_back( std::pair< double, unsigned >( distance, name ) );
202                 }
203
204                 // sort ways by aggregate lengths
205                 std::sort( wayLengths.begin(), wayLengths.end() );
206                 std::vector< unsigned > wayImportance( uniqueNames.size() );
207                 for ( unsigned way = 0; way < wayLengths.size(); way++ )
208                         wayImportance[wayLengths[way].second] = way;
209                 wayLengths.clear();
210
211                 std::vector< utt::Node > subTrie( 1 );
212
213                 for ( unsigned name = 0; name < ( unsigned ) uniqueNames.size(); name++ ) {
214                         QList< unsigned > segments = addressByName.values( uniqueNames[name] );
215
216                         // build edge connector data structures
217                         std::vector< EdgeConnector< UnsignedCoordinate>::Edge > connectorEdges;
218                         std::vector< unsigned > resultSegments;
219                         std::vector< unsigned > resultSegmentDescriptions;
220                         std::vector< bool > resultReversed;
221
222                         for ( unsigned segment = 0; segment < ( unsigned ) segments.size(); segment++ ) {
223                                 const IImporter::Address& segmentAddress = inputAddress[segments[segment]];
224                                 EdgeConnector< UnsignedCoordinate >::Edge newEdge;
225                                 newEdge.source = inputWayBuffer[segmentAddress.pathID];
226                                 newEdge.target = inputWayBuffer[segmentAddress.pathID + segmentAddress.pathLength - 1];
227                                 newEdge.reverseable = true;
228                                 connectorEdges.push_back( newEdge );
229                         }
230
231                         EdgeConnector< UnsignedCoordinate >::run( &resultSegments, &resultSegmentDescriptions, &resultReversed, connectorEdges );
232
233                         // string places with the same name together
234                         unsigned nextID = 0;
235                         for ( unsigned segment = 0; segment < resultSegments.size(); segment++ ) {
236                                 utt::Data subEntry;
237                                 subEntry.start = wayBuffer.size();
238
239                                 for ( unsigned description = 0; description < resultSegments[segment]; description++ ) {
240                                         unsigned segmentID = resultSegmentDescriptions[nextID + description];
241                                         const IImporter::Address& segmentAddress = inputAddress[segments[segmentID]];
242                                         std::vector< UnsignedCoordinate > path;
243                                         for ( unsigned pathID = 0; pathID < segmentAddress.pathLength; pathID++ )
244                                                 path.push_back( inputWayBuffer[pathID + segmentAddress.pathID]);
245                                         if ( resultReversed[segmentID] )
246                                                 std::reverse( path.begin(), path.end() );
247                                         int skipFirst = description == 0 ? 0 : 1;
248                                         assert( skipFirst == 0 || wayBuffer.back() == path.front() );
249                                         wayBuffer.insert( wayBuffer.end(), path.begin() + skipFirst, path.end() );
250                                 }
251
252                                 subEntry.length = wayBuffer.size() - subEntry.start;
253                                 insert( &subTrie, wayImportance[name], inputWayNames[uniqueNames[name]], subEntry );
254
255                                 nextID += resultSegments[segment];
256                         }
257                 }
258
259                 utt::Data cityCenterData;
260                 cityCenterData.start = wayBuffer.size();
261                 wayBuffer.push_back( inputPlaces[place].coordinate );
262                 wayBuffer.push_back( inputPlaces[place].coordinate );
263                 cityCenterData.length = 2;
264                 insert( &subTrie, std::numeric_limits< unsigned >::max(), tr( "City Center" ), cityCenterData );
265
266                 writeTrie( &subTrie, subTrieFile );
267
268                 data.length = subTrieFile.pos() - data.start;
269
270                 assert( importance.contains( inputPlaces[place].name ) );
271                 insert( &trie, importance[inputPlaces[place].name], inputPlaces[place].name, data );
272         }
273         assert( address == inputAddress.size() );
274         qDebug() << "Unicode Tournament Trie: build tries and tournament trees:" << time.restart() << "ms";
275
276         writeTrie( &trie, mainTrieFile );
277         qDebug() << "Unicode Tournament Trie: wrote tries:" << time.restart() << "ms";
278
279         for ( std::vector< UnsignedCoordinate >::const_iterator i = wayBuffer.begin(), e = wayBuffer.end(); i != e; ++i ) {
280                 wayFile.write( ( char* ) &i->x, sizeof( i->x ) );
281                 wayFile.write( ( char* ) &i->y, sizeof( i->y ) );
282         }
283         qDebug() << "Unicode Tournament Trie: wrote ways:" << time.restart() << "ms";
284
285         return true;
286 }
287
288 void UnicodeTournamentTrie::insert( std::vector< utt::Node >* trie, unsigned importance, const QString& name, utt::Data data )
289 {
290         unsigned node = 0;
291         QString lowerName = name.toLower();
292         int position = 0;
293         while ( position < lowerName.length() ) {
294                 bool found = false;
295                 for ( int c = 0; c < ( int ) trie->at( node ).labelList.size(); c++ ) {
296                         utt::Label& label = (*trie)[node].labelList[c];
297                         if ( label.string[0] == lowerName[position] ) {
298                                 int diffPos = 0;
299                                 int minLength = std::min( label.string.length(), lowerName.length() - position );
300                                 for ( ; diffPos < minLength; diffPos++ )
301                                         if ( label.string[diffPos] != lowerName[position + diffPos] )
302                                                 break;
303
304                                 if ( diffPos != label.string.length() ) {
305                                         utt::Label newEdge;
306                                         newEdge.importance = label.importance;
307                                         newEdge.index = label.index;
308                                         newEdge.string = label.string.mid( diffPos );
309
310                                         label.string = label.string.left( diffPos );
311                                         label.index = trie->size();
312                                         node = label.index;
313
314                                         if ( label.importance < importance )
315                                                 label.importance = importance;
316
317                                         trie->push_back( utt::Node() ); //invalidates label reference!!!
318                                         trie->back().labelList.push_back( newEdge );
319                                 } else {
320                                         node = label.index;
321                                         if ( label.importance < importance )
322                                                 label.importance = importance;
323                                 }
324
325                                 position += diffPos;
326                                 found = true;
327                                 break;
328                         }
329                 }
330
331                 if ( position == lowerName.length() )
332                         found = true;
333
334                 if ( !found ) {
335                         utt::Label label;
336                         label.string = lowerName.mid( position );
337                         label.index = trie->size();
338                         label.importance = importance;
339                         (*trie)[node].labelList.push_back( label );
340
341                         node = trie->size();
342                         trie->push_back( utt::Node() );
343                         break;
344                 }
345         }
346
347         (*trie)[node].dataList.push_back( data );
348 }
349
350 void UnicodeTournamentTrie::writeTrie( std::vector< utt::Node >* trie, QFile& file )
351 {
352         if ( trie->size() == 0 )
353                 return;
354
355         size_t position = 0;
356         std::vector< unsigned > index( trie->size() );
357         std::vector< unsigned > stack;
358         std::vector< unsigned > order;
359         stack.push_back( 0 );
360         while ( !stack.empty() ) {
361                 unsigned node = stack.back();
362                 stack.pop_back();
363                 order.push_back( node );
364
365                 index[node] = position;
366                 position += (*trie)[node].GetSize();
367
368                 std::sort( (*trie)[node].labelList.begin(), (*trie)[node].labelList.end() );
369                 for ( int i = (*trie)[node].labelList.size() - 1; i >= 0; i-- )
370                         stack.push_back( (*trie)[node].labelList[i].index );
371         }
372
373         for ( int i = 0; i < ( int ) trie->size(); i++ ) {
374                 for ( int c = 0; c < ( int ) (*trie)[i].labelList.size(); c++ ) {
375                         (*trie)[i].labelList[c].index = index[(*trie)[i].labelList[c].index];
376                 }
377         }
378         assert( order.size() == trie->size() );
379
380         char* buffer = new char[position];
381
382         position = 0;
383         for ( int i = 0; i < ( int ) order.size(); i++ ) {
384                 unsigned node = order[i];
385                 (*trie)[node].Write( buffer + position );
386                 utt::Node testElement;
387                 testElement.Read( buffer + position );
388                 assert( testElement == (*trie)[node] );
389                 position += (*trie)[node].GetSize();
390         }
391         file.write( buffer, position );
392
393         delete[] buffer;
394 }
395
396 Q_EXPORT_PLUGIN2(unicodetournamenttrie, UnicodeTournamentTrie)