Add configuration for ignored fields.
[infos-pratiques:etalage.git] / etalage / conv.py
1 # -*- coding: utf-8 -*-
2
3
4 # Etalage -- Open Data POIs portal
5 # By: Emmanuel Raviart <eraviart@easter-eggs.com>
6 #
7 # Copyright (C) 2011, 2012 Easter-eggs
8 # http://gitorious.org/infos-pratiques/etalage
9 #
10 # This file is part of Etalage.
11 #
12 # Etalage is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU Affero General Public License as
14 # published by the Free Software Foundation, either version 3 of the
15 # License, or (at your option) any later version.
16 #
17 # Etalage is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 # GNU Affero General Public License for more details.
21 #
22 # You should have received a copy of the GNU Affero General Public License
23 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
25
26 """Conversion functions"""
27
28
29 from cStringIO import StringIO
30 import csv
31 import math
32
33 from biryani.baseconv import *
34 from biryani.bsonconv import *
35 from biryani.objectconv import *
36 from biryani.frconv import *
37 from biryani import states
38 from territoria2.conv import split_postal_distribution, str_to_postal_distribution
39
40
41 default_state = states.default_state
42 N_ = lambda message: message
43
44
45 def default_pois_layer_data_bbox(data, state = default_state):
46     """Compute bounding box and add it when it is missing from data. Return modified data."""
47     from . import ramdb
48     if data is None:
49         return data, None
50     if data['bbox'] is not None:
51         return data, None
52     data = data.copy()
53     categories_slug = set(state.base_categories_slug or [])
54     if data['categories'] is not None:
55         categories_slug.update(
56             category.slug
57             for category in data['categories']
58             )
59     filter = data['filter']
60     territory = data['territory']
61     pois_by_id = ramdb.pois_by_id
62     if territory is None:
63         competence_territories_id = None
64         presence_territory = None
65         pois_id_iter = ramdb.iter_pois_id(categories_slug = categories_slug,
66             competence_territories_id = competence_territories_id, presence_territory = presence_territory,
67             term = data['term'])
68         pois = [
69             poi
70             for poi in (
71                 pois_by_id[poi_id]
72                 for poi_id in pois_id_iter
73                 )
74             if poi.geo is not None
75             ]
76         if not pois:
77             data['bbox'] = [-180.0, -90.0, 180.0, 90.0]
78             return data, None
79         bottom = top = pois[0].geo[0]
80         left = right = pois[0].geo[1]
81     else:
82         center_latitude = territory.geo[0]
83         center_longitude = territory.geo[1]
84         bottom = center_latitude
85         left = center_longitude
86         right = center_longitude
87         top = center_latitude
88         if filter == 'competence':
89             competence_territories_id = ramdb.get_territory_related_territories_id(territory)
90             presence_territory = None
91             pois_id_iter = ramdb.iter_pois_id(categories_slug = categories_slug,
92                 competence_territories_id = competence_territories_id, presence_territory = presence_territory,
93                 term = data['term'])
94             pois = [
95                 poi
96                 for poi in (
97                     pois_by_id[poi_id]
98                     for poi_id in pois_id_iter
99                     )
100                 if poi.geo is not None
101                 ]
102         elif filter == 'presence':
103             competence_territories_id = None
104             presence_territory = territory
105             pois_id_iter = ramdb.iter_pois_id(categories_slug = categories_slug,
106                 competence_territories_id = competence_territories_id, presence_territory = presence_territory,
107                 term = data['term'])
108             pois = [
109                 poi
110                 for poi in (
111                     pois_by_id[poi_id]
112                     for poi_id in pois_id_iter
113                     )
114                 if poi.geo is not None
115                 ]
116         else:
117             # When no filter is given, use the bounding box of the territory (ie the bounding box enclosing every POI
118             # present in the territory).
119             competence_territories_id = None
120             presence_territory = territory
121             pois_id_iter = ramdb.iter_pois_id(categories_slug = categories_slug,
122                 competence_territories_id = competence_territories_id, presence_territory = presence_territory,
123                 term = data['term'])
124             pois = [
125                 poi
126                 for poi in (
127                     pois_by_id[poi_id]
128                     for poi_id in pois_id_iter
129                     )
130                 if poi.geo is not None
131                 ]
132             if not pois:
133                 # When no POI has been found in territory, use the bounding box enclosing every competent POI.
134                 competence_territories_id = ramdb.get_territory_related_territories_id(territory)
135                 presence_territory = None
136                 pois_id_iter = ramdb.iter_pois_id(categories_slug = categories_slug,
137                     competence_territories_id = competence_territories_id, presence_territory = presence_territory,
138                     term = data['term'])
139                 pois = [
140                     poi
141                     for poi in (
142                         pois_by_id[poi_id]
143                         for poi_id in pois_id_iter
144                         )
145                     if poi.geo is not None
146                     ]
147                 if not pois:
148                     # When no present nor competent POI has been found, compute bounding box using given distance.
149                     delta = math.degrees(state.distance / 6372.8)
150                     data['bbox'] = [
151                         center_longitude - delta, # left
152                         center_latitude - delta, # bottom
153                         center_longitude + delta, # left
154                         center_latitude + delta, # top
155                         ]
156                     return data, None
157     for poi in pois:
158         poi_latitude = poi.geo[0]
159         if poi_latitude < bottom:
160             bottom = poi_latitude
161         elif poi_latitude > top:
162             top = poi_latitude
163         poi_longitude = poi.geo[1]
164         if poi_longitude < left:
165             left = poi_longitude
166         elif poi_longitude > right:
167             right = poi_longitude
168     data['bbox'] = [left, bottom, right, top]
169     return data, None
170
171
172 def id_name_dict_list_to_ignored_fields(value, state = default_state):
173     if not value:
174         return None, None
175     ignored_fields = {}
176     for id_name_dict in value:
177         id = id_name_dict['id']
178         name = id_name_dict.get('name')
179         if id in ignored_fields:
180             ignored_field = ignored_fields[id]
181             if ignored_field is not None:
182                 ignored_field.add(name)
183         else:
184             if name is None:
185                 ignored_fields[id] = None
186             else:
187                 ignored_fields[id] = set([name])
188     return ignored_fields, None
189
190
191 def id_to_poi(poi_id, state = default_state):
192     import ramdb
193     if poi_id is None:
194         return poi_id, None
195     poi = ramdb.pois_by_id.get(poi_id)
196     if poi is None:
197         return poi_id, state._("POI {0} doesn't exist").format(poi_id)
198     return poi, None
199
200
201 def layer_data_to_clusters(data, state = default_state):
202     from . import model, ramdb
203     if data is None:
204         return None, None
205     left, bottom, right, top = data['bbox']
206     center_latitude = (bottom + top) / 2.0
207     center_latitude_cos = math.cos(math.radians(center_latitude))
208     center_latitude_sin = math.sin(math.radians(center_latitude))
209     center_longitude = (left + right) / 2.0
210     categories_slug = set(state.base_categories_slug or [])
211     if data['categories'] is not None:
212         categories_slug.update(
213             category.slug
214             for category in data['categories']
215             )
216     filter = data['filter']
217     territory = data['territory']
218     related_territories_id = ramdb.get_territory_related_territories_id(territory) if territory is not None else None
219     if filter == 'competence':
220         competence_territories_id = related_territories_id
221         presence_territory = None
222     elif filter == 'presence':
223         competence_territories_id = None
224         presence_territory = territory
225     else:
226         competence_territories_id = None
227         presence_territory = None
228     pois_id_iter = ramdb.iter_pois_id(categories_slug = categories_slug,
229         competence_territories_id = competence_territories_id, presence_territory = presence_territory,
230         term = data['term'])
231     pois_by_id = ramdb.pois_by_id
232     current = data['current']
233     pois_iter = (
234         poi
235         for poi in (
236             pois_by_id[poi_id]
237             for poi_id in pois_id_iter
238             )
239         if poi.geo is not None and bottom <= poi.geo[0] <= top and left <= poi.geo[1] <= right and (
240             current is None or poi._id != current._id)
241         )
242     distance_and_poi_couples = sorted(
243         (
244             (
245                 # distance from center of map
246                 6372.8 * math.acos(
247                     math.sin(math.radians(poi.geo[0])) * center_latitude_sin
248                     + math.cos(math.radians(poi.geo[0])) * center_latitude_cos
249                         * math.cos(math.radians(poi.geo[1] - center_longitude))
250                     ),
251                 # POI
252                 poi,
253                 )
254             for poi in pois_iter
255             ),
256         key = lambda distance_and_poi_couple: distance_and_poi_couple[0],
257         )
258     pois = [
259         poi
260         for distance, poi in distance_and_poi_couples
261         ]
262     if current is not None:
263         pois.insert(0, current)
264     horizontal_iota = (right - left) / 20.0
265     vertical_iota = (top - bottom) / 15.0
266 #    vertical_iota = horizontal_iota = (right - left) / 30.0
267     clusters = []
268     for poi in pois:
269         poi_latitude = poi.geo[0]
270         poi_longitude = poi.geo[1]
271         for cluster in clusters:
272             if abs(poi_latitude - cluster.center_latitude) <= vertical_iota \
273                     and abs(poi_longitude - cluster.center_longitude) <= horizontal_iota:
274                 cluster.count += 1
275                 if poi_latitude == cluster.center_latitude and poi_longitude == cluster.center_longitude:
276                     cluster.center_pois.append(poi)
277                 if poi_latitude < cluster.bottom:
278                     cluster.bottom = poi_latitude
279                 elif poi_latitude > cluster.top:
280                     cluster.top = poi_latitude
281                 if poi_longitude < cluster.left:
282                     cluster.left = poi_longitude
283                 elif poi_longitude > cluster.right:
284                     cluster.right = poi_longitude
285                 break
286         else:
287             cluster = model.Cluster()
288             cluster.competent = False # changed below
289             cluster.count = 1
290             cluster.bottom = cluster.top = cluster.center_latitude = poi_latitude
291             cluster.left = cluster.right = cluster.center_longitude = poi_longitude
292             cluster.center_pois = [poi]
293             clusters.append(cluster)
294         if cluster.competent is False:
295             if related_territories_id is None or poi.competence_territories_id is None:
296                 cluster.competent = None
297             elif not related_territories_id.isdisjoint(poi.competence_territories_id):
298                 cluster.competent = True
299         elif cluster.competent is None and related_territories_id is not None \
300                 and poi.competence_territories_id is not None \
301                 and not related_territories_id.isdisjoint(poi.competence_territories_id):
302             cluster.competent = True
303     return clusters, None
304
305
306 def params_and_pois_iter_to_csv((params, pois_iter), state = default_state):
307     if pois_iter is None:
308         return None, None
309
310     columns_label = []
311     columns_ref = []
312     rows = []
313     for poi in pois_iter:
314         columns_index = {}
315         row = [None] * len(columns_ref)
316         for field_ref, field in poi.iter_csv_fields(state):
317             # Detect column number to use for field. Create a new column if needed.
318             column_ref = tuple(field_ref[:-1])
319             same_ref_columns_count = field_ref[-1]
320             if columns_ref.count(column_ref) == same_ref_columns_count:
321                 column_index = len(columns_ref)
322                 columns_label.append(field.label) # or u' - '.join(label for label in field_ref[::2])
323                 columns_ref.append(column_ref)
324                 row.append(None)
325             else:
326                 column_index = columns_ref.index(column_ref, columns_index.get(column_ref, -1) + 1)
327             columns_index[column_ref] = column_index
328             row[column_index] = unicode(field.value).encode('utf-8')
329         rows.append(row)
330
331     csv_file = StringIO()
332     writer = csv.writer(csv_file, delimiter = ',', quotechar = '"', quoting = csv.QUOTE_MINIMAL)
333     writer.writerow([label.encode("utf-8") for label in columns_label])
334     for row in rows:
335         writer.writerow(row)
336     return csv_file.getvalue().decode('utf-8'), None
337
338
339 def params_to_pois_csv(params, state = default_state):
340     from . import ramdb
341     data, errors = pipe(
342         rename_item('category', 'categories'), # Must be renamed before struct, to be able to use categories on errors
343         struct(
344             dict(
345                 categories = uniform_sequence(str_to_slug_to_category),
346                 filter = str_to_filter,
347                 term = str_to_slug,
348                 territory = str_to_postal_distribution_to_geolocated_territory,
349                 ),
350             default = 'ignore',
351             keep_missing_values = True,
352             ),
353         )(params, state = state)
354     if errors is not None:
355         return data, errors
356
357     categories_slug = set(state.base_categories_slug or [])
358     if data['categories'] is not None:
359         categories_slug.update(
360             category.slug
361             for category in data['categories']
362             )
363     filter = data['filter']
364     territory = data['territory']
365     related_territories_id = ramdb.get_territory_related_territories_id(territory) if territory is not None else None
366     if filter == 'competence':
367         competence_territories_id = related_territories_id
368         presence_territory = None
369     elif filter == 'presence':
370         competence_territories_id = None
371         presence_territory = territory
372     else:
373         competence_territories_id = None
374         presence_territory = None
375     pois_id = list(ramdb.iter_pois_id(categories_slug = categories_slug,
376         competence_territories_id = competence_territories_id, presence_territory = presence_territory,
377         term = data['term']))
378     if not pois_id:
379         return None, None
380     pois_iter = (
381         ramdb.pois_by_id[poi_id]
382         for poi_id in pois_id
383         )
384     return params_and_pois_iter_to_csv((params, pois_iter), state = state)
385
386
387 def params_to_pois_directory_data(params, state = default_state):
388     from . import model
389     return pipe(
390         rename_item('category', 'categories'), # Must be renamed before struct, to be able to use categories on errors
391         struct(
392             dict(
393                 categories = uniform_sequence(str_to_slug_to_category),
394                 filter = str_to_filter,
395                 term = str_to_slug,
396                 territory = pipe(
397                     str_to_postal_distribution_to_geolocated_territory,
398                     test(lambda territory: territory.__class__.__name__ in model.communes_kinds,
399                         error = N_(u'In "directory" mode, territory must be a commune')),
400                     test_exists(error = N_(u'In "directory" mode, a commune is required')),
401                     ),
402                 ),
403             default = 'ignore',
404             keep_missing_values = True,
405             ),
406         )(params, state = state)
407
408
409 def params_to_pois_layer_data(params, state = default_state):
410     return pipe(
411         rename_item('category', 'categories'), # Must be renamed before struct, to be able to use categories on errors
412         struct(
413             dict(
414                 bbox = pipe(
415                     function(lambda bbox: bbox.split(u',')),
416                     struct(
417                         [
418                             # West longitude
419                             pipe(
420                                 str_to_float,
421                                 test_between(-180, 180),
422                                 exists,
423                                 ),
424                             # South latitude
425                             pipe(
426                                 str_to_float,
427                                 test_between(-90, 90),
428                                 exists,
429                                 ),
430                             # East longitude
431                             pipe(
432                                 str_to_float,
433                                 test_between(-180, 180),
434                                 exists,
435                                 ),
436                             # North latitude
437                             pipe(
438                                 str_to_float,
439                                 test_between(-90, 90),
440                                 exists,
441                                 ),
442                             ],
443                         ),
444                     ),
445                 categories = uniform_sequence(str_to_slug_to_category),
446                 current = pipe(
447                     str_to_object_id,
448                     id_to_poi,
449                     test(lambda poi: poi.geo is not None, error = N_('POI has no geographical coordinates')),
450                     ),
451                 filter = str_to_filter,
452                 term = str_to_slug,
453                 territory = str_to_postal_distribution_to_geolocated_territory,
454                 ),
455             default = 'ignore',
456             keep_missing_values = True,
457             ),
458         )(params, state = state)
459
460
461 def params_to_pois_list_data(params, state = default_state):
462     return pipe(
463         rename_item('category', 'categories'), # Must be renamed before struct, to be able to use categories on errors
464         struct(
465             dict(
466                 categories = uniform_sequence(str_to_slug_to_category),
467                 filter = str_to_filter,
468                 page = pipe(
469                     str_to_int,
470                     test_greater_or_equal(1),
471                     default(1),
472                     ),
473                 term = str_to_slug,
474                 territory = str_to_postal_distribution_to_geolocated_territory,
475                 ),
476             default = 'ignore',
477             keep_missing_values = True,
478             ),
479         rename_item('page', 'page_number'),
480         )(params, state = state)
481
482
483 def postal_distribution_to_territory(postal_distribution, state = default_state):
484     from . import ramdb
485     if postal_distribution is None:
486         return postal_distribution, None
487     territory_id = ramdb.territories_id_by_postal_distribution.get(postal_distribution)
488     if territory_id is None:
489         return postal_distribution, state._(u'Unknown territory')
490     territory = ramdb.territories_by_id.get(territory_id)
491     if territory is None:
492         return postal_distribution, state._(u'Unknown territory')
493     return territory, None
494
495
496 def str_to_category_slug(value, state = default_state):
497     from . import ramdb
498     return pipe(
499         str_to_slug,
500         test(lambda slug: slug in ramdb.categories_by_slug, error = N_(u'Invalid category')),
501         )(value, state = state)
502
503
504 str_to_filter = pipe(
505     str_to_slug,
506     test_in(['competence', 'presence']),
507     )
508
509
510 str_to_postal_distribution_to_geolocated_territory = pipe(
511     str_to_postal_distribution,
512     postal_distribution_to_territory,
513     test(lambda territory: territory.geo is not None, error = N_(u'Territory has no geographical coordinates')),
514     )
515
516
517 def str_to_slug_to_category(value, state = default_state):
518     from . import ramdb
519     return pipe(
520         str_to_category_slug,
521         function(lambda slug: ramdb.categories_by_slug[slug]),
522         test(lambda category: (category.tags_slug or set()).issuperset(state.category_tags_slug or []),
523             error = N_(u'Missing required tags for category')),
524         )(value, state = state)
525