Fix for ipv4 tunnel devices:
[opensuse:kernel.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder. 
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38                 
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45                 
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55                 
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling 
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95  
96 #include <linux/config.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/protocol.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118
119 #define HASH_SIZE  16
120 #define HASH(addr) ((addr^(addr>>4))&0xF)
121
122 static int ipip_fb_tunnel_init(struct net_device *dev);
123 static int ipip_tunnel_init(struct net_device *dev);
124
125 static struct net_device ipip_fb_tunnel_dev = {
126         name:   "tunl0",
127         init:   ipip_fb_tunnel_init,
128 };
129
130 static struct ip_tunnel ipip_fb_tunnel = {
131         dev:    &ipip_fb_tunnel_dev,
132         parms:  { name: "tunl0", }
133 };
134
135 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
136 static struct ip_tunnel *tunnels_r[HASH_SIZE];
137 static struct ip_tunnel *tunnels_l[HASH_SIZE];
138 static struct ip_tunnel *tunnels_wc[1];
139 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
140
141 static rwlock_t ipip_lock = RW_LOCK_UNLOCKED;
142
143 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
144 {
145         unsigned h0 = HASH(remote);
146         unsigned h1 = HASH(local);
147         struct ip_tunnel *t;
148
149         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
150                 if (local == t->parms.iph.saddr &&
151                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
152                         return t;
153         }
154         for (t = tunnels_r[h0]; t; t = t->next) {
155                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
156                         return t;
157         }
158         for (t = tunnels_l[h1]; t; t = t->next) {
159                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
160                         return t;
161         }
162         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
163                 return t;
164         return NULL;
165 }
166
167 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
168 {
169         u32 remote = t->parms.iph.daddr;
170         u32 local = t->parms.iph.saddr;
171         unsigned h = 0;
172         int prio = 0;
173
174         if (remote) {
175                 prio |= 2;
176                 h ^= HASH(remote);
177         }
178         if (local) {
179                 prio |= 1;
180                 h ^= HASH(local);
181         }
182         return &tunnels[prio][h];
183 }
184
185
186 static void ipip_tunnel_unlink(struct ip_tunnel *t)
187 {
188         struct ip_tunnel **tp;
189
190         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
191                 if (t == *tp) {
192                         write_lock_bh(&ipip_lock);
193                         *tp = t->next;
194                         write_unlock_bh(&ipip_lock);
195                         break;
196                 }
197         }
198 }
199
200 static void ipip_tunnel_link(struct ip_tunnel *t)
201 {
202         struct ip_tunnel **tp = ipip_bucket(t);
203
204         t->next = *tp;
205         write_lock_bh(&ipip_lock);
206         *tp = t;
207         write_unlock_bh(&ipip_lock);
208 }
209
210 struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
211 {
212         u32 remote = parms->iph.daddr;
213         u32 local = parms->iph.saddr;
214         struct ip_tunnel *t, **tp, *nt;
215         struct net_device *dev;
216         unsigned h = 0;
217         int prio = 0;
218
219         if (remote) {
220                 prio |= 2;
221                 h ^= HASH(remote);
222         }
223         if (local) {
224                 prio |= 1;
225                 h ^= HASH(local);
226         }
227         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
228                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
229                         return t;
230         }
231         if (!create)
232                 return NULL;
233
234         MOD_INC_USE_COUNT;
235         dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
236         if (dev == NULL) {
237                 MOD_DEC_USE_COUNT;
238                 return NULL;
239         }
240         memset(dev, 0, sizeof(*dev) + sizeof(*t));
241         dev->priv = (void*)(dev+1);
242         nt = (struct ip_tunnel*)dev->priv;
243         nt->dev = dev;
244         dev->init = ipip_tunnel_init;
245         dev->features |= NETIF_F_DYNALLOC;
246         memcpy(&nt->parms, parms, sizeof(*parms));
247         nt->parms.name[IFNAMSIZ-1] = '\0';
248         strcpy(dev->name, nt->parms.name);
249         if (dev->name[0] == 0) {
250                 int i;
251                 for (i=1; i<100; i++) {
252                         sprintf(dev->name, "tunl%d", i);
253                         if (__dev_get_by_name(dev->name) == NULL)
254                                 break;
255                 }
256                 if (i==100)
257                         goto failed;
258                 memcpy(nt->parms.name, dev->name, IFNAMSIZ);
259         }
260         if (register_netdevice(dev) < 0)
261                 goto failed;
262
263         dev_hold(dev);
264         ipip_tunnel_link(nt);
265         /* Do not decrement MOD_USE_COUNT here. */
266         return nt;
267
268 failed:
269         kfree(dev);
270         MOD_DEC_USE_COUNT;
271         return NULL;
272 }
273
274 static void ipip_tunnel_destructor(struct net_device *dev)
275 {
276         if (dev != &ipip_fb_tunnel_dev) {
277                 MOD_DEC_USE_COUNT;
278         }
279 }
280
281 static void ipip_tunnel_uninit(struct net_device *dev)
282 {
283         if (dev == &ipip_fb_tunnel_dev) {
284                 write_lock_bh(&ipip_lock);
285                 tunnels_wc[0] = NULL;
286                 write_unlock_bh(&ipip_lock);
287         } else
288                 ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
289         dev_put(dev);
290 }
291
292 void ipip_err(struct sk_buff *skb, u32 info)
293 {
294 #ifndef I_WISH_WORLD_WERE_PERFECT
295
296 /* It is not :-( All the routers (except for Linux) return only
297    8 bytes of packet payload. It means, that precise relaying of
298    ICMP in the real Internet is absolutely infeasible.
299  */
300         struct iphdr *iph = (struct iphdr*)skb->data;
301         int type = skb->h.icmph->type;
302         int code = skb->h.icmph->code;
303         struct ip_tunnel *t;
304
305         switch (type) {
306         default:
307         case ICMP_PARAMETERPROB:
308                 return;
309
310         case ICMP_DEST_UNREACH:
311                 switch (code) {
312                 case ICMP_SR_FAILED:
313                 case ICMP_PORT_UNREACH:
314                         /* Impossible event. */
315                         return;
316                 case ICMP_FRAG_NEEDED:
317                         /* Soft state for pmtu is maintained by IP core. */
318                         return;
319                 default:
320                         /* All others are translated to HOST_UNREACH.
321                            rfc2003 contains "deep thoughts" about NET_UNREACH,
322                            I believe they are just ether pollution. --ANK
323                          */
324                         break;
325                 }
326                 break;
327         case ICMP_TIME_EXCEEDED:
328                 if (code != ICMP_EXC_TTL)
329                         return;
330                 break;
331         }
332
333         read_lock(&ipip_lock);
334         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
335         if (t == NULL || t->parms.iph.daddr == 0)
336                 goto out;
337         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
338                 goto out;
339
340         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
341                 t->err_count++;
342         else
343                 t->err_count = 1;
344         t->err_time = jiffies;
345 out:
346         read_unlock(&ipip_lock);
347         return;
348 #else
349         struct iphdr *iph = (struct iphdr*)dp;
350         int hlen = iph->ihl<<2;
351         struct iphdr *eiph;
352         int type = skb->h.icmph->type;
353         int code = skb->h.icmph->code;
354         int rel_type = 0;
355         int rel_code = 0;
356         int rel_info = 0;
357         struct sk_buff *skb2;
358         struct rtable *rt;
359
360         if (len < hlen + sizeof(struct iphdr))
361                 return;
362         eiph = (struct iphdr*)(dp + hlen);
363
364         switch (type) {
365         default:
366                 return;
367         case ICMP_PARAMETERPROB:
368                 if (skb->h.icmph->un.gateway < hlen)
369                         return;
370
371                 /* So... This guy found something strange INSIDE encapsulated
372                    packet. Well, he is fool, but what can we do ?
373                  */
374                 rel_type = ICMP_PARAMETERPROB;
375                 rel_info = skb->h.icmph->un.gateway - hlen;
376                 break;
377
378         case ICMP_DEST_UNREACH:
379                 switch (code) {
380                 case ICMP_SR_FAILED:
381                 case ICMP_PORT_UNREACH:
382                         /* Impossible event. */
383                         return;
384                 case ICMP_FRAG_NEEDED:
385                         /* And it is the only really necesary thing :-) */
386                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
387                         if (rel_info < hlen+68)
388                                 return;
389                         rel_info -= hlen;
390                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
391                         if (rel_info > ntohs(eiph->tot_len))
392                                 return;
393                         break;
394                 default:
395                         /* All others are translated to HOST_UNREACH.
396                            rfc2003 contains "deep thoughts" about NET_UNREACH,
397                            I believe, it is just ether pollution. --ANK
398                          */
399                         rel_type = ICMP_DEST_UNREACH;
400                         rel_code = ICMP_HOST_UNREACH;
401                         break;
402                 }
403                 break;
404         case ICMP_TIME_EXCEEDED:
405                 if (code != ICMP_EXC_TTL)
406                         return;
407                 break;
408         }
409
410         /* Prepare fake skb to feed it to icmp_send */
411         skb2 = skb_clone(skb, GFP_ATOMIC);
412         if (skb2 == NULL)
413                 return;
414         dst_release(skb2->dst);
415         skb2->dst = NULL;
416         skb_pull(skb2, skb->data - (u8*)eiph);
417         skb2->nh.raw = skb2->data;
418
419         /* Try to guess incoming interface */
420         if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
421                 kfree_skb(skb2);
422                 return;
423         }
424         skb2->dev = rt->u.dst.dev;
425
426         /* route "incoming" packet */
427         if (rt->rt_flags&RTCF_LOCAL) {
428                 ip_rt_put(rt);
429                 rt = NULL;
430                 if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
431                     rt->u.dst.dev->type != ARPHRD_IPGRE) {
432                         ip_rt_put(rt);
433                         kfree_skb(skb2);
434                         return;
435                 }
436         } else {
437                 ip_rt_put(rt);
438                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
439                     skb2->dst->dev->type != ARPHRD_IPGRE) {
440                         kfree_skb(skb2);
441                         return;
442                 }
443         }
444
445         /* change mtu on this route */
446         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
447                 if (rel_info > skb2->dst->pmtu) {
448                         kfree_skb(skb2);
449                         return;
450                 }
451                 skb2->dst->pmtu = rel_info;
452                 rel_info = htonl(rel_info);
453         } else if (type == ICMP_TIME_EXCEEDED) {
454                 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
455                 if (t->parms.iph.ttl) {
456                         rel_type = ICMP_DEST_UNREACH;
457                         rel_code = ICMP_HOST_UNREACH;
458                 }
459         }
460
461         icmp_send(skb2, rel_type, rel_code, rel_info);
462         kfree_skb(skb2);
463         return;
464 #endif
465 }
466
467 static inline void ipip_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
468 {
469         if (INET_ECN_is_ce(iph->tos) &&
470             INET_ECN_is_not_ce(skb->nh.iph->tos))
471                 IP_ECN_set_ce(iph);
472 }
473
474 int ipip_rcv(struct sk_buff *skb)
475 {
476         struct iphdr *iph;
477         struct ip_tunnel *tunnel;
478
479         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
480                 goto out;
481
482         iph = skb->nh.iph;
483         skb->mac.raw = skb->nh.raw;
484         skb->nh.raw = skb->data;
485         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
486         skb->protocol = __constant_htons(ETH_P_IP);
487         skb->pkt_type = PACKET_HOST;
488
489         read_lock(&ipip_lock);
490         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
491                 tunnel->stat.rx_packets++;
492                 tunnel->stat.rx_bytes += skb->len;
493                 skb->dev = tunnel->dev;
494                 dst_release(skb->dst);
495                 skb->dst = NULL;
496 #ifdef CONFIG_NETFILTER
497                 nf_conntrack_put(skb->nfct);
498                 skb->nfct = NULL;
499 #ifdef CONFIG_NETFILTER_DEBUG
500                 skb->nf_debug = 0;
501 #endif
502 #endif
503                 ipip_ecn_decapsulate(iph, skb);
504                 netif_rx(skb);
505                 read_unlock(&ipip_lock);
506                 return 0;
507         }
508         read_unlock(&ipip_lock);
509
510         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
511 out:
512         kfree_skb(skb);
513         return 0;
514 }
515
516 /* Need this wrapper because NF_HOOK takes the function address */
517 static inline int do_ip_send(struct sk_buff *skb)
518 {
519         return ip_send(skb);
520 }
521
522 /*
523  *      This function assumes it is being called from dev_queue_xmit()
524  *      and that skb is filled properly by that function.
525  */
526
527 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
528 {
529         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
530         struct net_device_stats *stats = &tunnel->stat;
531         struct iphdr  *tiph = &tunnel->parms.iph;
532         u8     tos = tunnel->parms.iph.tos;
533         u16    df = tiph->frag_off;
534         struct rtable *rt;                      /* Route to the other host */
535         struct net_device *tdev;                        /* Device to other host */
536         struct iphdr  *old_iph = skb->nh.iph;
537         struct iphdr  *iph;                     /* Our new IP header */
538         int    max_headroom;                    /* The extra header space needed */
539         u32    dst = tiph->daddr;
540         int    mtu;
541
542         if (tunnel->recursion++) {
543                 tunnel->stat.collisions++;
544                 goto tx_error;
545         }
546
547         if (skb->protocol != __constant_htons(ETH_P_IP))
548                 goto tx_error;
549
550         if (tos&1)
551                 tos = old_iph->tos;
552
553         if (!dst) {
554                 /* NBMA tunnel */
555                 if ((rt = (struct rtable*)skb->dst) == NULL) {
556                         tunnel->stat.tx_fifo_errors++;
557                         goto tx_error;
558                 }
559                 if ((dst = rt->rt_gateway) == 0)
560                         goto tx_error_icmp;
561         }
562
563         if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
564                 tunnel->stat.tx_carrier_errors++;
565                 goto tx_error_icmp;
566         }
567         tdev = rt->u.dst.dev;
568
569         if (tdev == dev) {
570                 ip_rt_put(rt);
571                 tunnel->stat.collisions++;
572                 goto tx_error;
573         }
574
575         if (tiph->frag_off)
576                 mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
577         else
578                 mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
579
580         if (mtu < 68) {
581                 tunnel->stat.collisions++;
582                 ip_rt_put(rt);
583                 goto tx_error;
584         }
585         if (skb->dst && mtu < skb->dst->pmtu)
586                 skb->dst->pmtu = mtu;
587
588         df |= (old_iph->frag_off&__constant_htons(IP_DF));
589
590         if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
591                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
592                 ip_rt_put(rt);
593                 goto tx_error;
594         }
595
596         if (tunnel->err_count > 0) {
597                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
598                         tunnel->err_count--;
599                         dst_link_failure(skb);
600                 } else
601                         tunnel->err_count = 0;
602         }
603
604         skb->h.raw = skb->nh.raw;
605
606         /*
607          * Okay, now see if we can stuff it in the buffer as-is.
608          */
609         max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
610
611         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
612                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
613                 if (!new_skb) {
614                         ip_rt_put(rt);
615                         stats->tx_dropped++;
616                         dev_kfree_skb(skb);
617                         tunnel->recursion--;
618                         return 0;
619                 }
620                 if (skb->sk)
621                         skb_set_owner_w(new_skb, skb->sk);
622                 dev_kfree_skb(skb);
623                 skb = new_skb;
624         }
625
626         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
627         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
628         dst_release(skb->dst);
629         skb->dst = &rt->u.dst;
630
631         /*
632          *      Push down and install the IPIP header.
633          */
634
635         iph                     =       skb->nh.iph;
636         iph->version            =       4;
637         iph->ihl                =       sizeof(struct iphdr)>>2;
638         iph->frag_off           =       df;
639         iph->protocol           =       IPPROTO_IPIP;
640         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
641         iph->daddr              =       rt->rt_dst;
642         iph->saddr              =       rt->rt_src;
643
644         if ((iph->ttl = tiph->ttl) == 0)
645                 iph->ttl        =       old_iph->ttl;
646
647 #ifdef CONFIG_NETFILTER
648         nf_conntrack_put(skb->nfct);
649         skb->nfct = NULL;
650 #ifdef CONFIG_NETFILTER_DEBUG
651         skb->nf_debug = 0;
652 #endif
653 #endif
654
655         IPTUNNEL_XMIT();
656         tunnel->recursion--;
657         return 0;
658
659 tx_error_icmp:
660         dst_link_failure(skb);
661 tx_error:
662         stats->tx_errors++;
663         dev_kfree_skb(skb);
664         tunnel->recursion--;
665         return 0;
666 }
667
668 static int
669 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
670 {
671         int err = 0;
672         struct ip_tunnel_parm p;
673         struct ip_tunnel *t;
674
675         MOD_INC_USE_COUNT;
676
677         switch (cmd) {
678         case SIOCGETTUNNEL:
679                 t = NULL;
680                 if (dev == &ipip_fb_tunnel_dev) {
681                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
682                                 err = -EFAULT;
683                                 break;
684                         }
685                         t = ipip_tunnel_locate(&p, 0);
686                 }
687                 if (t == NULL)
688                         t = (struct ip_tunnel*)dev->priv;
689                 memcpy(&p, &t->parms, sizeof(p));
690                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
691                         err = -EFAULT;
692                 break;
693
694         case SIOCADDTUNNEL:
695         case SIOCCHGTUNNEL:
696                 err = -EPERM;
697                 if (!capable(CAP_NET_ADMIN))
698                         goto done;
699
700                 err = -EFAULT;
701                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
702                         goto done;
703
704                 err = -EINVAL;
705                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
706                     p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)))
707                         goto done;
708                 if (p.iph.ttl)
709                         p.iph.frag_off |= __constant_htons(IP_DF);
710
711                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
712
713                 if (dev != &ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL &&
714                     t != &ipip_fb_tunnel) {
715                         if (t != NULL) {
716                                 if (t->dev != dev) {
717                                         err = -EEXIST;
718                                         break;
719                                 }
720                         } else {
721                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
722                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
723                                         err = -EINVAL;
724                                         break;
725                                 }
726                                 t = (struct ip_tunnel*)dev->priv;
727                                 ipip_tunnel_unlink(t);
728                                 t->parms.iph.saddr = p.iph.saddr;
729                                 t->parms.iph.daddr = p.iph.daddr;
730                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
731                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
732                                 ipip_tunnel_link(t);
733                                 netdev_state_change(dev);
734                         }
735                 }
736
737                 if (t) {
738                         err = 0;
739                         if (cmd == SIOCCHGTUNNEL) {
740                                 t->parms.iph.ttl = p.iph.ttl;
741                                 t->parms.iph.tos = p.iph.tos;
742                                 t->parms.iph.frag_off = p.iph.frag_off;
743                         }
744                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
745                                 err = -EFAULT;
746                 } else
747                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
748                 break;
749
750         case SIOCDELTUNNEL:
751                 err = -EPERM;
752                 if (!capable(CAP_NET_ADMIN))
753                         goto done;
754
755                 if (dev == &ipip_fb_tunnel_dev) {
756                         err = -EFAULT;
757                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
758                                 goto done;
759                         err = -ENOENT;
760                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
761                                 goto done;
762                         err = -EPERM;
763                         if (t == &ipip_fb_tunnel)
764                                 goto done;
765                         dev = t->dev;
766                 }
767                 err = unregister_netdevice(dev);
768                 break;
769
770         default:
771                 err = -EINVAL;
772         }
773
774 done:
775         MOD_DEC_USE_COUNT;
776         return err;
777 }
778
779 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
780 {
781         return &(((struct ip_tunnel*)dev->priv)->stat);
782 }
783
784 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
785 {
786         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
787                 return -EINVAL;
788         dev->mtu = new_mtu;
789         return 0;
790 }
791
792 static void ipip_tunnel_init_gen(struct net_device *dev)
793 {
794         struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
795
796         dev->uninit             = ipip_tunnel_uninit;
797         dev->destructor         = ipip_tunnel_destructor;
798         dev->hard_start_xmit    = ipip_tunnel_xmit;
799         dev->get_stats          = ipip_tunnel_get_stats;
800         dev->do_ioctl           = ipip_tunnel_ioctl;
801         dev->change_mtu         = ipip_tunnel_change_mtu;
802
803         dev->type               = ARPHRD_TUNNEL;
804         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
805         dev->mtu                = 1500 - sizeof(struct iphdr);
806         dev->flags              = IFF_NOARP;
807         dev->iflink             = 0;
808         dev->addr_len           = 4;
809         memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
810         memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
811 }
812
813 static int ipip_tunnel_init(struct net_device *dev)
814 {
815         struct net_device *tdev = NULL;
816         struct ip_tunnel *tunnel;
817         struct iphdr *iph;
818
819         tunnel = (struct ip_tunnel*)dev->priv;
820         iph = &tunnel->parms.iph;
821
822         ipip_tunnel_init_gen(dev);
823
824         if (iph->daddr) {
825                 struct rtable *rt;
826                 if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
827                         tdev = rt->u.dst.dev;
828                         ip_rt_put(rt);
829                 }
830                 dev->flags |= IFF_POINTOPOINT;
831         }
832
833         if (!tdev && tunnel->parms.link)
834                 tdev = __dev_get_by_index(tunnel->parms.link);
835
836         if (tdev) {
837                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
838                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
839         }
840         dev->iflink = tunnel->parms.link;
841
842         return 0;
843 }
844
845 #ifdef MODULE
846 static int ipip_fb_tunnel_open(struct net_device *dev)
847 {
848         MOD_INC_USE_COUNT;
849         return 0;
850 }
851
852 static int ipip_fb_tunnel_close(struct net_device *dev)
853 {
854         MOD_DEC_USE_COUNT;
855         return 0;
856 }
857 #endif
858
859 int __init ipip_fb_tunnel_init(struct net_device *dev)
860 {
861         struct iphdr *iph;
862
863         ipip_tunnel_init_gen(dev);
864 #ifdef MODULE
865         dev->open               = ipip_fb_tunnel_open;
866         dev->stop               = ipip_fb_tunnel_close;
867 #endif
868
869         iph = &ipip_fb_tunnel.parms.iph;
870         iph->version            = 4;
871         iph->protocol           = IPPROTO_IPIP;
872         iph->ihl                = 5;
873
874         dev_hold(dev);
875         tunnels_wc[0]           = &ipip_fb_tunnel;
876         return 0;
877 }
878
879 static struct inet_protocol ipip_protocol = {
880         handler:        ipip_rcv,
881         err_handler:    ipip_err,
882         protocol:       IPPROTO_IPIP,
883         name:           "IPIP"
884 };
885
886 static char banner[] __initdata =
887         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
888
889 int __init ipip_init(void)
890 {
891         printk(banner);
892
893         ipip_fb_tunnel_dev.priv = (void*)&ipip_fb_tunnel;
894         register_netdev(&ipip_fb_tunnel_dev);
895         inet_add_protocol(&ipip_protocol);
896         return 0;
897 }
898
899 static void __exit ipip_fini(void)
900 {
901         if ( inet_del_protocol(&ipip_protocol) < 0 )
902                 printk(KERN_INFO "ipip close: can't remove protocol\n");
903
904         unregister_netdev(&ipip_fb_tunnel_dev);
905 }
906
907 #ifdef MODULE
908 module_init(ipip_init);
909 #endif
910 module_exit(ipip_fini);
911 MODULE_LICENSE("GPL");