Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

ip_gre.c

Go to the documentation of this file.
00001 /*
00002  *      Linux NET3:     GRE over IP protocol decoder. 
00003  *
00004  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
00005  *
00006  *      This program is free software; you can redistribute it and/or
00007  *      modify it under the terms of the GNU General Public License
00008  *      as published by the Free Software Foundation; either version
00009  *      2 of the License, or (at your option) any later version.
00010  *
00011  */
00012 
00013 #include <linux/config.h>
00014 #include <linux/module.h>
00015 #include <linux/types.h>
00016 #include <linux/sched.h>
00017 #include <linux/kernel.h>
00018 #include <asm/uaccess.h>
00019 #include <linux/skbuff.h>
00020 #include <linux/netdevice.h>
00021 #include <linux/in.h>
00022 #include <linux/tcp.h>
00023 #include <linux/udp.h>
00024 #include <linux/if_arp.h>
00025 #include <linux/mroute.h>
00026 #include <linux/init.h>
00027 #include <linux/in6.h>
00028 #include <linux/inetdevice.h>
00029 #include <linux/igmp.h>
00030 
00031 #include <net/sock.h>
00032 #include <net/ip.h>
00033 #include <net/icmp.h>
00034 #include <net/protocol.h>
00035 #include <net/ipip.h>
00036 #include <net/arp.h>
00037 #include <net/checksum.h>
00038 
00039 #ifdef CONFIG_IPV6
00040 #include <net/ipv6.h>
00041 #include <net/ip6_fib.h>
00042 #include <net/ip6_route.h>
00043 #endif
00044 
00045 /*
00046    Problems & solutions
00047    --------------------
00048 
00049    1. The most important issue is detecting local dead loops.
00050    They would cause complete host lockup in transmit, which
00051    would be "resolved" by stack overflow or, if queueing is enabled,
00052    with infinite looping in net_bh.
00053 
00054    We cannot track such dead loops during route installation,
00055    it is infeasible task. The most general solutions would be
00056    to keep skb->encapsulation counter (sort of local ttl),
00057    and silently drop packet when it expires. It is the best
00058    solution, but it supposes maintaing new variable in ALL
00059    skb, even if no tunneling is used.
00060 
00061    Current solution: t->recursion lock breaks dead loops. It looks 
00062    like dev->tbusy flag, but I preferred new variable, because
00063    the semantics is different. One day, when hard_start_xmit
00064    will be multithreaded we will have to use skb->encapsulation.
00065 
00066 
00067 
00068    2. Networking dead loops would not kill routers, but would really
00069    kill network. IP hop limit plays role of "t->recursion" in this case,
00070    if we copy it from packet being encapsulated to upper header.
00071    It is very good solution, but it introduces two problems:
00072 
00073    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
00074      do not work over tunnels.
00075    - traceroute does not work. I planned to relay ICMP from tunnel,
00076      so that this problem would be solved and traceroute output
00077      would even more informative. This idea appeared to be wrong:
00078      only Linux complies to rfc1812 now (yes, guys, Linux is the only
00079      true router now :-)), all routers (at least, in neighbourhood of mine)
00080      return only 8 bytes of payload. It is the end.
00081 
00082    Hence, if we want that OSPF worked or traceroute said something reasonable,
00083    we should search for another solution.
00084 
00085    One of them is to parse packet trying to detect inner encapsulation
00086    made by our node. It is difficult or even impossible, especially,
00087    taking into account fragmentation. TO be short, tt is not solution at all.
00088 
00089    Current solution: The solution was UNEXPECTEDLY SIMPLE.
00090    We force DF flag on tunnels with preconfigured hop limit,
00091    that is ALL. :-) Well, it does not remove the problem completely,
00092    but exponential growth of network traffic is changed to linear
00093    (branches, that exceed pmtu are pruned) and tunnel mtu
00094    fastly degrades to value <68, where looping stops.
00095    Yes, it is not good if there exists a router in the loop,
00096    which does not force DF, even when encapsulating packets have DF set.
00097    But it is not our problem! Nobody could accuse us, we made
00098    all that we could make. Even if it is your gated who injected
00099    fatal route to network, even if it were you who configured
00100    fatal static route: you are innocent. :-)
00101 
00102 
00103 
00104    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
00105    practically identical code. It would be good to glue them
00106    together, but it is not very evident, how to make them modular.
00107    sit is integral part of IPv6, ipip and gre are naturally modular.
00108    We could extract common parts (hash table, ioctl etc)
00109    to a separate module (ip_tunnel.c).
00110 
00111    Alexey Kuznetsov.
00112  */
00113 
00114 static int ipgre_tunnel_init(struct device *dev);
00115 
00116 /* Fallback tunnel: no source, no destination, no key, no options */
00117 
00118 static int ipgre_fb_tunnel_init(struct device *dev);
00119 
00120 static struct device ipgre_fb_tunnel_dev = {
00121         NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipgre_fb_tunnel_init,
00122 };
00123 
00124 static struct ip_tunnel ipgre_fb_tunnel = {
00125         NULL, &ipgre_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", }
00126 };
00127 
00128 /* Tunnel hash table */
00129 
00130 /*
00131    4 hash tables:
00132 
00133    3: (remote,local)
00134    2: (remote,*)
00135    1: (*,local)
00136    0: (*,*)
00137 
00138    We require exact key match i.e. if a key is present in packet
00139    it will match only tunnel with the same key; if it is not present,
00140    it will match only keyless tunnel.
00141 
00142    All keysless packets, if not matched configured keyless tunnels
00143    will match fallback tunnel.
00144  */
00145 
00146 #define HASH_SIZE  16
00147 #define HASH(addr) ((addr^(addr>>4))&0xF)
00148 
00149 static struct ip_tunnel *tunnels[4][HASH_SIZE];
00150 
00151 #define tunnels_r_l     (tunnels[3])
00152 #define tunnels_r       (tunnels[2])
00153 #define tunnels_l       (tunnels[1])
00154 #define tunnels_wc      (tunnels[0])
00155 
00156 /* Given src, dst and key, find approriate for input tunnel. */
00157 
00158 static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
00159 {
00160         unsigned h0 = HASH(remote);
00161         unsigned h1 = HASH(key);
00162         struct ip_tunnel *t;
00163 
00164         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
00165                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
00166                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
00167                                 return t;
00168                 }
00169         }
00170         for (t = tunnels_r[h0^h1]; t; t = t->next) {
00171                 if (remote == t->parms.iph.daddr) {
00172                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
00173                                 return t;
00174                 }
00175         }
00176         for (t = tunnels_l[h1]; t; t = t->next) {
00177                 if (local == t->parms.iph.saddr ||
00178                      (local == t->parms.iph.daddr && MULTICAST(local))) {
00179                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
00180                                 return t;
00181                 }
00182         }
00183         for (t = tunnels_wc[h1]; t; t = t->next) {
00184                 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
00185                         return t;
00186         }
00187         if (ipgre_fb_tunnel_dev.flags&IFF_UP)
00188                 return &ipgre_fb_tunnel;
00189         return NULL;
00190 }
00191 
00192 static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
00193 {
00194         u32 remote = t->parms.iph.daddr;
00195         u32 local = t->parms.iph.saddr;
00196         u32 key = t->parms.i_key;
00197         unsigned h = HASH(key);
00198         int prio = 0;
00199 
00200         if (local)
00201                 prio |= 1;
00202         if (remote && !MULTICAST(remote)) {
00203                 prio |= 2;
00204                 h ^= HASH(remote);
00205         }
00206 
00207         return &tunnels[prio][h];
00208 }
00209 
00210 static void ipgre_tunnel_link(struct ip_tunnel *t)
00211 {
00212         struct ip_tunnel **tp = ipgre_bucket(t);
00213 
00214         t->next = *tp;
00215         wmb();
00216         *tp = t;
00217 }
00218 
00219 static void ipgre_tunnel_unlink(struct ip_tunnel *t)
00220 {
00221         struct ip_tunnel **tp;
00222 
00223         for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
00224                 if (t == *tp) {
00225                         *tp = t->next;
00226                         synchronize_bh();
00227                         break;
00228                 }
00229         }
00230 }
00231 
00232 static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
00233 {
00234         u32 remote = parms->iph.daddr;
00235         u32 local = parms->iph.saddr;
00236         u32 key = parms->i_key;
00237         struct ip_tunnel *t, **tp, *nt;
00238         struct device *dev;
00239         unsigned h = HASH(key);
00240         int prio = 0;
00241 
00242         if (local)
00243                 prio |= 1;
00244         if (remote && !MULTICAST(remote)) {
00245                 prio |= 2;
00246                 h ^= HASH(remote);
00247         }
00248         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
00249                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
00250                         if (key == t->parms.i_key)
00251                                 return t;
00252                 }
00253         }
00254         if (!create)
00255                 return NULL;
00256 
00257         MOD_INC_USE_COUNT;
00258         dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
00259         if (dev == NULL) {
00260                 MOD_DEC_USE_COUNT;
00261                 return NULL;
00262         }
00263         memset(dev, 0, sizeof(*dev) + sizeof(*t));
00264         dev->priv = (void*)(dev+1);
00265         nt = (struct ip_tunnel*)dev->priv;
00266         nt->dev = dev;
00267         dev->name = nt->parms.name;
00268         dev->init = ipgre_tunnel_init;
00269         memcpy(&nt->parms, parms, sizeof(*parms));
00270         if (dev->name[0] == 0) {
00271                 int i;
00272                 for (i=1; i<100; i++) {
00273                         sprintf(dev->name, "gre%d", i);
00274                         if (dev_get(dev->name) == NULL)
00275                                 break;
00276                 }
00277                 if (i==100)
00278                         goto failed;
00279                 memcpy(parms->name, dev->name, IFNAMSIZ);
00280         }
00281         if (register_netdevice(dev) < 0)
00282                 goto failed;
00283 
00284         ipgre_tunnel_link(nt);
00285         /* Do not decrement MOD_USE_COUNT here. */
00286         return nt;
00287 
00288 failed:
00289         kfree(dev);
00290         MOD_DEC_USE_COUNT;
00291         return NULL;
00292 }
00293 
00294 static void ipgre_tunnel_destroy(struct device *dev)
00295 {
00296         ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv);
00297 
00298         if (dev != &ipgre_fb_tunnel_dev) {
00299                 kfree(dev);
00300                 MOD_DEC_USE_COUNT;
00301         }
00302 }
00303 
00304 
00305 void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len)
00306 {
00307 #ifndef I_WISH_WORLD_WERE_PERFECT
00308 
00309 /* It is not :-( All the routers (except for Linux) return only
00310    8 bytes of packet payload. It means, that precise relaying of
00311    ICMP in the real Internet is absolutely infeasible.
00312 
00313    Moreover, Cisco "wise men" put GRE key to the third word
00314    in GRE header. It makes impossible maintaining even soft state for keyed
00315    GRE tunnels with enabled checksum. Tell them "thank you".
00316 
00317    Well, I wonder, rfc1812 was written by Cisco employee,
00318    what the hell these idiots break standrads established
00319    by themself???
00320  */
00321 
00322         struct iphdr *iph = (struct iphdr*)dp;
00323         u16          *p = (u16*)(dp+(iph->ihl<<2));
00324         int grehlen = (iph->ihl<<2) + 4;
00325         int type = skb->h.icmph->type;
00326         int code = skb->h.icmph->code;
00327         struct ip_tunnel *t;
00328         u16 flags;
00329 
00330         flags = p[0];
00331         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
00332                 if (flags&(GRE_VERSION|GRE_ROUTING))
00333                         return;
00334                 if (flags&GRE_KEY) {
00335                         grehlen += 4;
00336                         if (flags&GRE_CSUM)
00337                                 grehlen += 4;
00338                 }
00339         }
00340 
00341         /* If only 8 bytes returned, keyed message will be dropped here */
00342         if (len < grehlen)
00343                 return;
00344 
00345         switch (type) {
00346         default:
00347         case ICMP_PARAMETERPROB:
00348                 return;
00349 
00350         case ICMP_DEST_UNREACH:
00351                 switch (code) {
00352                 case ICMP_SR_FAILED:
00353                 case ICMP_PORT_UNREACH:
00354                         /* Impossible event. */
00355                         return;
00356                 case ICMP_FRAG_NEEDED:
00357                         /* Soft state for pmtu is maintained by IP core. */
00358                         return;
00359                 default:
00360                         /* All others are translated to HOST_UNREACH.
00361                            rfc2003 contains "deep thoughts" about NET_UNREACH,
00362                            I believe they are just ether pollution. --ANK
00363                          */
00364                         break;
00365                 }
00366                 break;
00367         case ICMP_TIME_EXCEEDED:
00368                 if (code != ICMP_EXC_TTL)
00369                         return;
00370                 break;
00371         }
00372 
00373         t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
00374         if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
00375                 return;
00376 
00377         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
00378                 return;
00379 
00380         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
00381                 t->err_count++;
00382         else
00383                 t->err_count = 1;
00384         t->err_time = jiffies;
00385         return;
00386 #else
00387         struct iphdr *iph = (struct iphdr*)dp;
00388         struct iphdr *eiph;
00389         u16          *p = (u16*)(dp+(iph->ihl<<2));
00390         int type = skb->h.icmph->type;
00391         int code = skb->h.icmph->code;
00392         int rel_type = 0;
00393         int rel_code = 0;
00394         int rel_info = 0;
00395         u16 flags;
00396         int grehlen = (iph->ihl<<2) + 4;
00397         struct sk_buff *skb2;
00398         struct rtable *rt;
00399 
00400         if (p[1] != __constant_htons(ETH_P_IP))
00401                 return;
00402 
00403         flags = p[0];
00404         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
00405                 if (flags&(GRE_VERSION|GRE_ROUTING))
00406                         return;
00407                 if (flags&GRE_CSUM)
00408                         grehlen += 4;
00409                 if (flags&GRE_KEY)
00410                         grehlen += 4;
00411                 if (flags&GRE_SEQ)
00412                         grehlen += 4;
00413         }
00414         if (len < grehlen + sizeof(struct iphdr))
00415                 return;
00416         eiph = (struct iphdr*)(dp + grehlen);
00417 
00418         switch (type) {
00419         default:
00420                 return;
00421         case ICMP_PARAMETERPROB:
00422                 if (skb->h.icmph->un.gateway < (iph->ihl<<2))
00423                         return;
00424 
00425                 /* So... This guy found something strange INSIDE encapsulated
00426                    packet. Well, he is fool, but what can we do ?
00427                  */
00428                 rel_type = ICMP_PARAMETERPROB;
00429                 rel_info = skb->h.icmph->un.gateway - grehlen;
00430                 break;
00431 
00432         case ICMP_DEST_UNREACH:
00433                 switch (code) {
00434                 case ICMP_SR_FAILED:
00435                 case ICMP_PORT_UNREACH:
00436                         /* Impossible event. */
00437                         return;
00438                 case ICMP_FRAG_NEEDED:
00439                         /* And it is the only really necesary thing :-) */
00440                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
00441                         if (rel_info < grehlen+68)
00442                                 return;
00443                         rel_info -= grehlen;
00444                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
00445                         if (rel_info > ntohs(eiph->tot_len))
00446                                 return;
00447                         break;
00448                 default:
00449                         /* All others are translated to HOST_UNREACH.
00450                            rfc2003 contains "deep thoughts" about NET_UNREACH,
00451                            I believe, it is just ether pollution. --ANK
00452                          */
00453                         rel_type = ICMP_DEST_UNREACH;
00454                         rel_code = ICMP_HOST_UNREACH;
00455                         break;
00456                 }
00457                 break;
00458         case ICMP_TIME_EXCEEDED:
00459                 if (code != ICMP_EXC_TTL)
00460                         return;
00461                 break;
00462         }
00463 
00464         /* Prepare fake skb to feed it to icmp_send */
00465         skb2 = skb_clone(skb, GFP_ATOMIC);
00466         if (skb2 == NULL)
00467                 return;
00468         dst_release(skb2->dst);
00469         skb2->dst = NULL;
00470         skb_pull(skb2, skb->data - (u8*)eiph);
00471         skb2->nh.raw = skb2->data;
00472 
00473         /* Try to guess incoming interface */
00474         if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
00475                 kfree_skb(skb2);
00476                 return;
00477         }
00478         skb2->dev = rt->u.dst.dev;
00479 
00480         /* route "incoming" packet */
00481         if (rt->rt_flags&RTCF_LOCAL) {
00482                 ip_rt_put(rt);
00483                 rt = NULL;
00484                 if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
00485                     rt->u.dst.dev->type != ARPHRD_IPGRE) {
00486                         ip_rt_put(rt);
00487                         kfree_skb(skb2);
00488                         return;
00489                 }
00490         } else {
00491                 ip_rt_put(rt);
00492                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
00493                     skb2->dst->dev->type != ARPHRD_IPGRE) {
00494                         kfree_skb(skb2);
00495                         return;
00496                 }
00497         }
00498 
00499         /* change mtu on this route */
00500         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
00501                 if (rel_info > skb2->dst->pmtu) {
00502                         kfree_skb(skb2);
00503                         return;
00504                 }
00505                 skb2->dst->pmtu = rel_info;
00506                 rel_info = htonl(rel_info);
00507         } else if (type == ICMP_TIME_EXCEEDED) {
00508                 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
00509                 if (t->parms.iph.ttl) {
00510                         rel_type = ICMP_DEST_UNREACH;
00511                         rel_code = ICMP_HOST_UNREACH;
00512                 }
00513         }
00514 
00515         icmp_send(skb2, rel_type, rel_code, rel_info);
00516         kfree_skb(skb2);
00517 #endif
00518 }
00519 
00520 int ipgre_rcv(struct sk_buff *skb, unsigned short len)
00521 {
00522         struct iphdr *iph = skb->nh.iph;
00523         u8     *h = skb->h.raw;
00524         u16    flags = *(u16*)h;
00525         u16    csum = 0;
00526         u32    key = 0;
00527         u32    seqno = 0;
00528         struct ip_tunnel *tunnel;
00529         int    offset = 4;
00530 
00531         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
00532                 /* - Version must be 0.
00533                    - We do not support routing headers.
00534                  */
00535                 if (flags&(GRE_VERSION|GRE_ROUTING))
00536                         goto drop;
00537 
00538                 if (flags&GRE_CSUM) {
00539                         csum = ip_compute_csum(h, len);
00540                         offset += 4;
00541                 }
00542                 if (flags&GRE_KEY) {
00543                         key = *(u32*)(h + offset);
00544                         offset += 4;
00545                 }
00546                 if (flags&GRE_SEQ) {
00547                         seqno = ntohl(*(u32*)(h + offset));
00548                         offset += 4;
00549                 }
00550         }
00551 
00552         if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
00553                 skb->mac.raw = skb->nh.raw;
00554                 skb->nh.raw = skb_pull(skb, h + offset - skb->data);
00555                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
00556                 skb->ip_summed = 0;
00557                 skb->protocol = *(u16*)(h + 2);
00558                 skb->pkt_type = PACKET_HOST;
00559 #ifdef CONFIG_NET_IPGRE_BROADCAST
00560                 if (MULTICAST(iph->daddr)) {
00561                         /* Looped back packet, drop it! */
00562                         if (((struct rtable*)skb->dst)->key.iif == 0)
00563                                 goto drop;
00564                         tunnel->stat.multicast++;
00565                         skb->pkt_type = PACKET_BROADCAST;
00566                 }
00567 #endif
00568 
00569                 if (((flags&GRE_CSUM) && csum) ||
00570                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
00571                         tunnel->stat.rx_crc_errors++;
00572                         tunnel->stat.rx_errors++;
00573                         goto drop;
00574                 }
00575                 if (tunnel->parms.i_flags&GRE_SEQ) {
00576                         if (!(flags&GRE_SEQ) ||
00577                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
00578                                 tunnel->stat.rx_fifo_errors++;
00579                                 tunnel->stat.rx_errors++;
00580                                 goto drop;
00581                         }
00582                         tunnel->i_seqno = seqno + 1;
00583                 }
00584                 tunnel->stat.rx_packets++;
00585                 tunnel->stat.rx_bytes += skb->len;
00586                 skb->dev = tunnel->dev;
00587                 dst_release(skb->dst);
00588                 skb->dst = NULL;
00589                 netif_rx(skb);
00590                 return(0);
00591         }
00592         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
00593 
00594 drop:
00595         kfree_skb(skb);
00596         return(0);
00597 }
00598 
00599 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev)
00600 {
00601         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
00602         struct net_device_stats *stats = &tunnel->stat;
00603         struct iphdr  *old_iph = skb->nh.iph;
00604         struct iphdr  *tiph;
00605         u8     tos;
00606         u16    df;
00607         struct rtable *rt;                      /* Route to the other host */
00608         struct device *tdev;                    /* Device to other host */
00609         struct iphdr  *iph;                     /* Our new IP header */
00610         int    max_headroom;                    /* The extra header space needed */
00611         int    gre_hlen;
00612         u32    dst;
00613         int    mtu;
00614 
00615         if (tunnel->recursion++) {
00616                 tunnel->stat.collisions++;
00617                 goto tx_error;
00618         }
00619 
00620         if (dev->hard_header) {
00621                 gre_hlen = 0;
00622                 tiph = (struct iphdr*)skb->data;
00623         } else {
00624                 gre_hlen = tunnel->hlen;
00625                 tiph = &tunnel->parms.iph;
00626         }
00627 
00628         if ((dst = tiph->daddr) == 0) {
00629                 /* NBMA tunnel */
00630 
00631                 if (skb->dst == NULL) {
00632                         tunnel->stat.tx_fifo_errors++;
00633                         goto tx_error;
00634                 }
00635 
00636                 if (skb->protocol == __constant_htons(ETH_P_IP)) {
00637                         rt = (struct rtable*)skb->dst;
00638                         if ((dst = rt->rt_gateway) == 0)
00639                                 goto tx_error_icmp;
00640                 }
00641 #ifdef CONFIG_IPV6
00642                 else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
00643                         struct in6_addr *addr6;
00644                         int addr_type;
00645                         struct neighbour *neigh = skb->dst->neighbour;
00646 
00647                         if (neigh == NULL)
00648                                 goto tx_error;
00649 
00650                         addr6 = (struct in6_addr*)&neigh->primary_key;
00651                         addr_type = ipv6_addr_type(addr6);
00652 
00653                         if (addr_type == IPV6_ADDR_ANY) {
00654                                 addr6 = &skb->nh.ipv6h->daddr;
00655                                 addr_type = ipv6_addr_type(addr6);
00656                         }
00657 
00658                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
00659                                 goto tx_error_icmp;
00660 
00661                         dst = addr6->s6_addr32[3];
00662                 }
00663 #endif
00664                 else
00665                         goto tx_error;
00666         }
00667 
00668         tos = tiph->tos;
00669         if (tos&1) {
00670                 if (skb->protocol == __constant_htons(ETH_P_IP))
00671                         tos = old_iph->tos;
00672                 tos &= ~1;
00673         }
00674 
00675         if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
00676                 tunnel->stat.tx_carrier_errors++;
00677                 goto tx_error;
00678         }
00679         tdev = rt->u.dst.dev;
00680 
00681         if (tdev == dev) {
00682                 ip_rt_put(rt);
00683                 tunnel->stat.collisions++;
00684                 goto tx_error;
00685         }
00686 
00687         df = tiph->frag_off;
00688         mtu = rt->u.dst.pmtu - tunnel->hlen;
00689 
00690         if (skb->protocol == __constant_htons(ETH_P_IP)) {
00691                 if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68)
00692                         skb->dst->pmtu = mtu;
00693 
00694                 df |= (old_iph->frag_off&__constant_htons(IP_DF));
00695 
00696                 if ((old_iph->frag_off&__constant_htons(IP_DF)) &&
00697                     mtu < ntohs(old_iph->tot_len)) {
00698                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
00699                         ip_rt_put(rt);
00700                         goto tx_error;
00701                 }
00702         }
00703 #ifdef CONFIG_IPV6
00704         else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
00705                 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
00706 
00707                 if (rt6 && mtu < rt6->u.dst.pmtu && mtu >= IPV6_MIN_MTU) {
00708                         if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
00709                             rt6->rt6i_dst.plen == 128) {
00710                                 rt6->rt6i_flags |= RTF_MODIFIED;
00711                                 skb->dst->pmtu = mtu;
00712                         }
00713                 }
00714 
00715                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
00716                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
00717                         ip_rt_put(rt);
00718                         goto tx_error;
00719                 }
00720         }
00721 #endif
00722 
00723         if (tunnel->err_count > 0) {
00724                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
00725                         tunnel->err_count--;
00726 
00727                         dst_link_failure(skb);
00728                 } else
00729                         tunnel->err_count = 0;
00730         }
00731 
00732         skb->h.raw = skb->nh.raw;
00733 
00734         max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen;
00735 
00736         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
00737                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
00738                 if (!new_skb) {
00739                         ip_rt_put(rt);
00740                         stats->tx_dropped++;
00741                         dev_kfree_skb(skb);
00742                         tunnel->recursion--;
00743                         return 0;
00744                 }
00745                 if (skb->sk)
00746                         skb_set_owner_w(new_skb, skb->sk);
00747                 dev_kfree_skb(skb);
00748                 skb = new_skb;
00749         }
00750 
00751         skb->nh.raw = skb_push(skb, gre_hlen);
00752         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
00753         dst_release(skb->dst);
00754         skb->dst = &rt->u.dst;
00755 
00756         /*
00757          *      Push down and install the IPIP header.
00758          */
00759 
00760         iph                     =       skb->nh.iph;
00761         iph->version            =       4;
00762         iph->ihl                =       sizeof(struct iphdr) >> 2;
00763         iph->frag_off           =       df;
00764         iph->protocol           =       IPPROTO_GRE;
00765         iph->tos                =       tos;
00766         iph->daddr              =       rt->rt_dst;
00767         iph->saddr              =       rt->rt_src;
00768 
00769         if ((iph->ttl = tiph->ttl) == 0) {
00770                 if (skb->protocol == __constant_htons(ETH_P_IP))
00771                         iph->ttl = old_iph->ttl;
00772 #ifdef CONFIG_IPV6
00773                 else if (skb->protocol == __constant_htons(ETH_P_IPV6))
00774                         iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
00775 #endif
00776                 else
00777                         iph->ttl = ip_statistics.IpDefaultTTL;
00778         }
00779 
00780         ((u16*)(iph+1))[0] = tunnel->parms.o_flags;
00781         ((u16*)(iph+1))[1] = skb->protocol;
00782 
00783         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
00784                 u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4);
00785 
00786                 if (tunnel->parms.o_flags&GRE_SEQ) {
00787                         ++tunnel->o_seqno;
00788                         *ptr = htonl(tunnel->o_seqno);
00789                         ptr--;
00790                 }
00791                 if (tunnel->parms.o_flags&GRE_KEY) {
00792                         *ptr = tunnel->parms.o_key;
00793                         ptr--;
00794                 }
00795                 if (tunnel->parms.o_flags&GRE_CSUM) {
00796                         *ptr = 0;
00797                         *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
00798                 }
00799         }
00800 
00801         iph->tot_len            =       htons(skb->len);
00802         iph->id                 =       htons(ip_id_count++);
00803         ip_send_check(iph);
00804 
00805         stats->tx_bytes += skb->len;
00806         stats->tx_packets++;
00807         ip_send(skb);
00808         tunnel->recursion--;
00809         return 0;
00810 
00811 tx_error_icmp:
00812         dst_link_failure(skb);
00813 
00814 tx_error:
00815         stats->tx_errors++;
00816         dev_kfree_skb(skb);
00817         tunnel->recursion--;
00818         return 0;
00819 }
00820 
00821 static int
00822 ipgre_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd)
00823 {
00824         int err = 0;
00825         struct ip_tunnel_parm p;
00826         struct ip_tunnel *t;
00827 
00828         MOD_INC_USE_COUNT;
00829 
00830         switch (cmd) {
00831         case SIOCGETTUNNEL:
00832                 t = NULL;
00833                 if (dev == &ipgre_fb_tunnel_dev) {
00834                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
00835                                 err = -EFAULT;
00836                                 break;
00837                         }
00838                         t = ipgre_tunnel_locate(&p, 0);
00839                 }
00840                 if (t == NULL)
00841                         t = (struct ip_tunnel*)dev->priv;
00842                 memcpy(&p, &t->parms, sizeof(p));
00843                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
00844                         err = -EFAULT;
00845                 break;
00846 
00847         case SIOCADDTUNNEL:
00848         case SIOCCHGTUNNEL:
00849                 err = -EPERM;
00850                 if (!capable(CAP_NET_ADMIN))
00851                         goto done;
00852 
00853                 err = -EFAULT;
00854                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
00855                         goto done;
00856 
00857                 err = -EINVAL;
00858                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
00859                     p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)) ||
00860                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
00861                         goto done;
00862                 if (p.iph.ttl)
00863                         p.iph.frag_off |= __constant_htons(IP_DF);
00864 
00865                 if (!(p.i_flags&GRE_KEY))
00866                         p.i_key = 0;
00867                 if (!(p.o_flags&GRE_KEY))
00868                         p.o_key = 0;
00869 
00870                 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
00871 
00872                 if (dev != &ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL &&
00873                     t != &ipgre_fb_tunnel) {
00874                         if (t != NULL) {
00875                                 if (t->dev != dev) {
00876                                         err = -EEXIST;
00877                                         break;
00878                                 }
00879                         } else {
00880                                 unsigned nflags=0;
00881 
00882                                 t = (struct ip_tunnel*)dev->priv;
00883 
00884                                 if (MULTICAST(p.iph.daddr))
00885                                         nflags = IFF_BROADCAST;
00886                                 else if (p.iph.daddr)
00887                                         nflags = IFF_POINTOPOINT;
00888 
00889                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
00890                                         err = -EINVAL;
00891                                         break;
00892                                 }
00893                                 start_bh_atomic();
00894                                 ipgre_tunnel_unlink(t);
00895                                 t->parms.iph.saddr = p.iph.saddr;
00896                                 t->parms.iph.daddr = p.iph.daddr;
00897                                 t->parms.i_key = p.i_key;
00898                                 t->parms.o_key = p.o_key;
00899                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
00900                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
00901                                 ipgre_tunnel_link(t);
00902                                 end_bh_atomic();
00903                                 netdev_state_change(dev);
00904                         }
00905                 }
00906 
00907                 if (t) {
00908                         err = 0;
00909                         if (cmd == SIOCCHGTUNNEL) {
00910                                 t->parms.iph.ttl = p.iph.ttl;
00911                                 t->parms.iph.tos = p.iph.tos;
00912                                 t->parms.iph.frag_off = p.iph.frag_off;
00913                         }
00914                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
00915                                 err = -EFAULT;
00916                 } else
00917                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
00918                 break;
00919 
00920         case SIOCDELTUNNEL:
00921                 err = -EPERM;
00922                 if (!capable(CAP_NET_ADMIN))
00923                         goto done;
00924 
00925                 if (dev == &ipgre_fb_tunnel_dev) {
00926                         err = -EFAULT;
00927                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
00928                                 goto done;
00929                         err = -ENOENT;
00930                         if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
00931                                 goto done;
00932                         err = -EPERM;
00933                         if (t == &ipgre_fb_tunnel)
00934                                 goto done;
00935                 }
00936                 err = unregister_netdevice(dev);
00937                 break;
00938 
00939         default:
00940                 err = -EINVAL;
00941         }
00942 
00943 done:
00944         MOD_DEC_USE_COUNT;
00945         return err;
00946 }
00947 
00948 static struct net_device_stats *ipgre_tunnel_get_stats(struct device *dev)
00949 {
00950         return &(((struct ip_tunnel*)dev->priv)->stat);
00951 }
00952 
00953 static int ipgre_tunnel_change_mtu(struct device *dev, int new_mtu)
00954 {
00955         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
00956         if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
00957                 return -EINVAL;
00958         dev->mtu = new_mtu;
00959         return 0;
00960 }
00961 
00962 #ifdef CONFIG_NET_IPGRE_BROADCAST
00963 /* Nice toy. Unfortunately, useless in real life :-)
00964    It allows to construct virtual multiprotocol broadcast "LAN"
00965    over the Internet, provided multicast routing is tuned.
00966 
00967 
00968    I have no idea was this bicycle invented before me,
00969    so that I had to set ARPHRD_IPGRE to a random value.
00970    I have an impression, that Cisco could make something similar,
00971    but this feature is apparently missing in IOS<=11.2(8).
00972    
00973    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
00974    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
00975 
00976    ping -t 255 224.66.66.66
00977 
00978    If nobody answers, mbone does not work.
00979 
00980    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
00981    ip addr add 10.66.66.<somewhat>/24 dev Universe
00982    ifconfig Universe up
00983    ifconfig Universe add fe80::<Your_real_addr>/10
00984    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
00985    ftp 10.66.66.66
00986    ...
00987    ftp fec0:6666:6666::193.233.7.65
00988    ...
00989 
00990  */
00991 
00992 static int ipgre_header(struct sk_buff *skb, struct device *dev, unsigned short type,
00993                         void *daddr, void *saddr, unsigned len)
00994 {
00995         struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
00996         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
00997         u16 *p = (u16*)(iph+1);
00998 
00999         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
01000         p[0]            = t->parms.o_flags;
01001         p[1]            = htons(type);
01002 
01003         /*
01004          *      Set the source hardware address. 
01005          */
01006          
01007         if (saddr)
01008                 memcpy(&iph->saddr, saddr, 4);
01009 
01010         if (daddr) {
01011                 memcpy(&iph->daddr, daddr, 4);
01012                 return t->hlen;
01013         }
01014         if (iph->daddr && !MULTICAST(iph->daddr))
01015                 return t->hlen;
01016         
01017         return -t->hlen;
01018 }
01019 
01020 static int ipgre_open(struct device *dev)
01021 {
01022         struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
01023 
01024         MOD_INC_USE_COUNT;
01025         if (MULTICAST(t->parms.iph.daddr)) {
01026                 struct rtable *rt;
01027                 if (ip_route_output(&rt, t->parms.iph.daddr,
01028                                     t->parms.iph.saddr, RT_TOS(t->parms.iph.tos), 
01029                                     t->parms.link)) {
01030                         MOD_DEC_USE_COUNT;
01031                         return -EADDRNOTAVAIL;
01032                 }
01033                 dev = rt->u.dst.dev;
01034                 ip_rt_put(rt);
01035                 if (dev->ip_ptr == NULL) {
01036                         MOD_DEC_USE_COUNT;
01037                         return -EADDRNOTAVAIL;
01038                 }
01039                 t->mlink = dev->ifindex;
01040                 ip_mc_inc_group(dev->ip_ptr, t->parms.iph.daddr);
01041         }
01042         return 0;
01043 }
01044 
01045 static int ipgre_close(struct device *dev)
01046 {
01047         struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
01048         if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
01049                 dev = dev_get_by_index(t->mlink);
01050                 if (dev && dev->ip_ptr)
01051                         ip_mc_dec_group(dev->ip_ptr, t->parms.iph.daddr);
01052         }
01053         MOD_DEC_USE_COUNT;
01054         return 0;
01055 }
01056 
01057 #endif
01058 
01059 static void ipgre_tunnel_init_gen(struct device *dev)
01060 {
01061         struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
01062 
01063         dev->destructor         = ipgre_tunnel_destroy;
01064         dev->hard_start_xmit    = ipgre_tunnel_xmit;
01065         dev->get_stats          = ipgre_tunnel_get_stats;
01066         dev->do_ioctl           = ipgre_tunnel_ioctl;
01067         dev->change_mtu         = ipgre_tunnel_change_mtu;
01068 
01069         dev_init_buffers(dev);
01070 
01071         dev->type               = ARPHRD_IPGRE;
01072         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
01073         dev->mtu                = 1500 - sizeof(struct iphdr) - 4;
01074         dev->flags              = IFF_NOARP;
01075         dev->iflink             = 0;
01076         dev->addr_len           = 4;
01077         memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
01078         memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
01079 }
01080 
01081 static int ipgre_tunnel_init(struct device *dev)
01082 {
01083         struct device *tdev = NULL;
01084         struct ip_tunnel *tunnel;
01085         struct iphdr *iph;
01086         int hlen = LL_MAX_HEADER;
01087         int mtu = 1500;
01088         int addend = sizeof(struct iphdr) + 4;
01089 
01090         tunnel = (struct ip_tunnel*)dev->priv;
01091         iph = &tunnel->parms.iph;
01092 
01093         ipgre_tunnel_init_gen(dev);
01094 
01095         /* Guess output device to choose reasonable mtu and hard_header_len */
01096 
01097         if (iph->daddr) {
01098                 struct rtable *rt;
01099                 if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
01100                         tdev = rt->u.dst.dev;
01101                         ip_rt_put(rt);
01102                 }
01103 
01104                 dev->flags |= IFF_POINTOPOINT;
01105 
01106 #ifdef CONFIG_NET_IPGRE_BROADCAST
01107                 if (MULTICAST(iph->daddr)) {
01108                         if (!iph->saddr)
01109                                 return -EINVAL;
01110                         dev->flags = IFF_BROADCAST;
01111                         dev->hard_header = ipgre_header;
01112                         dev->open = ipgre_open;
01113                         dev->stop = ipgre_close;
01114                 }
01115 #endif
01116         }
01117 
01118         if (!tdev && tunnel->parms.link)
01119                 tdev = dev_get_by_index(tunnel->parms.link);
01120 
01121         if (tdev) {
01122                 hlen = tdev->hard_header_len;
01123                 mtu = tdev->mtu;
01124         }
01125         dev->iflink = tunnel->parms.link;
01126 
01127         /* Precalculate GRE options length */
01128         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
01129                 if (tunnel->parms.o_flags&GRE_CSUM)
01130                         addend += 4;
01131                 if (tunnel->parms.o_flags&GRE_KEY)
01132                         addend += 4;
01133                 if (tunnel->parms.o_flags&GRE_SEQ)
01134                         addend += 4;
01135         }
01136         dev->hard_header_len = hlen + addend;
01137         dev->mtu = mtu - addend;
01138         tunnel->hlen = addend;
01139         return 0;
01140 }
01141 
01142 #ifdef MODULE
01143 static int ipgre_fb_tunnel_open(struct device *dev)
01144 {
01145         MOD_INC_USE_COUNT;
01146         return 0;
01147 }
01148 
01149 static int ipgre_fb_tunnel_close(struct device *dev)
01150 {
01151         MOD_DEC_USE_COUNT;
01152         return 0;
01153 }
01154 #endif
01155 
01156 __initfunc(int ipgre_fb_tunnel_init(struct device *dev))
01157 {
01158         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
01159         struct iphdr *iph;
01160 
01161         ipgre_tunnel_init_gen(dev);
01162 #ifdef MODULE
01163         dev->open               = ipgre_fb_tunnel_open;
01164         dev->stop               = ipgre_fb_tunnel_close;
01165 #endif
01166 
01167         iph = &ipgre_fb_tunnel.parms.iph;
01168         iph->version            = 4;
01169         iph->protocol           = IPPROTO_GRE;
01170         iph->ihl                = 5;
01171         tunnel->hlen            = sizeof(struct iphdr) + 4;
01172 
01173         tunnels_wc[0]           = &ipgre_fb_tunnel;
01174         return 0;
01175 }
01176 
01177 
01178 static struct inet_protocol ipgre_protocol = {
01179   ipgre_rcv,             /* GRE handler          */
01180   ipgre_err,             /* TUNNEL error control */
01181   0,                    /* next                 */
01182   IPPROTO_GRE,          /* protocol ID          */
01183   0,                    /* copy                 */
01184   NULL,                 /* data                 */
01185   "GRE"                 /* name                 */
01186 };
01187 
01188 
01189 /*
01190  *      And now the modules code and kernel interface.
01191  */
01192 
01193 #ifdef MODULE
01194 int init_module(void) 
01195 #else
01196 __initfunc(int ipgre_init(void))
01197 #endif
01198 {
01199         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
01200 
01201         ipgre_fb_tunnel_dev.priv = (void*)&ipgre_fb_tunnel;
01202         ipgre_fb_tunnel_dev.name = ipgre_fb_tunnel.parms.name;
01203 #ifdef MODULE
01204         register_netdev(&ipgre_fb_tunnel_dev);
01205 #else
01206         register_netdevice(&ipgre_fb_tunnel_dev);
01207 #endif
01208 
01209         inet_add_protocol(&ipgre_protocol);
01210         return 0;
01211 }
01212 
01213 #ifdef MODULE
01214 
01215 void cleanup_module(void)
01216 {
01217         if ( inet_del_protocol(&ipgre_protocol) < 0 )
01218                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
01219 
01220         unregister_netdev(&ipgre_fb_tunnel_dev);
01221 }
01222 
01223 #endif