| 20 |
20 |
#include <linux/udp.h>
|
| 21 |
21 |
#include <linux/icmp.h>
|
| 22 |
22 |
#include <linux/if_arp.h>
|
|
23 |
#include <linux/etherdevice.h>
|
| 23 |
24 |
#include <linux/seq_file.h>
|
| 24 |
25 |
#include <linux/netfilter_arp.h>
|
| 25 |
26 |
#include <linux/netfilter/x_tables.h>
|
| ... | ... | |
| 28 |
29 |
#include <net/netfilter/nf_conntrack.h>
|
| 29 |
30 |
#include <net/net_namespace.h>
|
| 30 |
31 |
#include <net/checksum.h>
|
|
32 |
#include <net/xfrm.h>
|
| 31 |
33 |
|
| 32 |
|
#define CLUSTERIP_VERSION "0.8"
|
|
34 |
#define CLUSTERIP_VERSION "0.9"
|
| 33 |
35 |
|
| 34 |
36 |
MODULE_LICENSE("GPL");
|
| 35 |
37 |
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
|
| ... | ... | |
| 103 |
105 |
}
|
| 104 |
106 |
|
| 105 |
107 |
static struct clusterip_config *
|
| 106 |
|
__clusterip_config_find(__be32 clusterip)
|
|
108 |
clusterip_config_find(__be32 clusterip)
|
| 107 |
109 |
{
|
| 108 |
110 |
struct clusterip_config *c;
|
| 109 |
111 |
|
| ... | ... | |
| 115 |
117 |
return NULL;
|
| 116 |
118 |
}
|
| 117 |
119 |
|
|
120 |
static struct clusterip_config *
|
|
121 |
clusterip_config_find_mac(u_int8_t clustermac[])
|
|
122 |
{
|
|
123 |
struct clusterip_config *c;
|
|
124 |
|
|
125 |
list_for_each_entry(c, &clusterip_configs, list) {
|
|
126 |
if (memcmp(c->clustermac, clustermac, ETH_ALEN) == 0)
|
|
127 |
return c;
|
|
128 |
}
|
|
129 |
|
|
130 |
return NULL;
|
|
131 |
}
|
|
132 |
|
| 118 |
133 |
static inline struct clusterip_config *
|
| 119 |
134 |
clusterip_config_find_get(__be32 clusterip, int entry)
|
| 120 |
135 |
{
|
| 121 |
136 |
struct clusterip_config *c;
|
| 122 |
137 |
|
| 123 |
138 |
read_lock_bh(&clusterip_lock);
|
| 124 |
|
c = __clusterip_config_find(clusterip);
|
|
139 |
c = clusterip_config_find(clusterip);
|
| 125 |
140 |
if (!c) {
|
| 126 |
141 |
read_unlock_bh(&clusterip_lock);
|
| 127 |
142 |
return NULL;
|
| ... | ... | |
| 134 |
149 |
return c;
|
| 135 |
150 |
}
|
| 136 |
151 |
|
|
152 |
static inline struct clusterip_config *
|
|
153 |
clusterip_config_find_get_mac(u_int8_t clustermac[])
|
|
154 |
{
|
|
155 |
struct clusterip_config *c;
|
|
156 |
|
|
157 |
read_lock_bh(&clusterip_lock);
|
|
158 |
c = clusterip_config_find_mac(clustermac);
|
|
159 |
if (!c) {
|
|
160 |
read_unlock_bh(&clusterip_lock);
|
|
161 |
return NULL;
|
|
162 |
}
|
|
163 |
atomic_inc(&c->refcount);
|
|
164 |
read_unlock_bh(&clusterip_lock);
|
|
165 |
|
|
166 |
return c;
|
|
167 |
}
|
|
168 |
|
| 137 |
169 |
static void
|
| 138 |
170 |
clusterip_config_init_nodelist(struct clusterip_config *c,
|
| 139 |
171 |
const struct ipt_clusterip_tgt_info *i)
|
| ... | ... | |
| 302 |
334 |
return NF_DROP;
|
| 303 |
335 |
}
|
| 304 |
336 |
|
| 305 |
|
/* special case: ICMP error handling. conntrack distinguishes between
|
| 306 |
|
* error messages (RELATED) and information requests (see below) */
|
| 307 |
|
if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
|
| 308 |
|
(ctinfo == IP_CT_RELATED ||
|
| 309 |
|
ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY))
|
|
337 |
switch (ip_hdr(skb)->protocol) {
|
|
338 |
case IPPROTO_ICMP:
|
|
339 |
/* ICMP error handling: conntrack distinguishes between error
|
|
340 |
* messages (RELATED) and information requests (see below)*/
|
|
341 |
if (ctinfo == IP_CT_RELATED ||
|
|
342 |
ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
|
|
343 |
return XT_CONTINUE;
|
|
344 |
break;
|
|
345 |
case IPPROTO_ESP:
|
|
346 |
case IPPROTO_AH:
|
|
347 |
case IPPROTO_IPIP:
|
|
348 |
/* responsibility for IPsec is handled in xfrm input hook */
|
| 310 |
349 |
return XT_CONTINUE;
|
|
350 |
case IPPROTO_UDP: {
|
|
351 |
/* UDP 4500 with an SPI is encapsulated ESP */
|
|
352 |
const struct iphdr *iph = ip_hdr(skb);
|
|
353 |
const u_int16_t *halfs = (const void *)iph+iph->ihl*4;
|
|
354 |
|
|
355 |
if (halfs[1] == htons(4500) && (halfs[4] || halfs[5]))
|
|
356 |
return XT_CONTINUE;
|
|
357 |
break;
|
|
358 |
}
|
|
359 |
default:
|
|
360 |
break;
|
|
361 |
}
|
| 311 |
362 |
|
| 312 |
363 |
/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
|
| 313 |
364 |
* TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
|
| ... | ... | |
| 516 |
567 |
|
| 517 |
568 |
/* normally the linux kernel always replies to arp queries of
|
| 518 |
569 |
* addresses on different interfacs. However, in the CLUSTERIP case
|
| 519 |
|
* this wouldn't work, since we didn't subscribe the mcast group on
|
| 520 |
|
* other interfaces */
|
|
570 |
* this wouldn't work. We need the multicast MAC to identify packets
|
|
571 |
* to pass to forwarding, so drop that ARP. */
|
| 521 |
572 |
if (c->dev != out) {
|
| 522 |
|
pr_debug("CLUSTERIP: not mangling arp reply on different "
|
| 523 |
|
"interface: cip'%s'-skb'%s'\n",
|
| 524 |
|
c->dev->name, out->name);
|
| 525 |
573 |
clusterip_config_put(c);
|
| 526 |
|
return NF_ACCEPT;
|
|
574 |
return NF_DROP;
|
| 527 |
575 |
}
|
| 528 |
576 |
|
| 529 |
577 |
/* mangle reply hardware address */
|
| ... | ... | |
| 547 |
595 |
};
|
| 548 |
596 |
|
| 549 |
597 |
/***********************************************************************
|
|
598 |
* IPSEC FORWARDING HOOKS
|
|
599 |
***********************************************************************/
|
|
600 |
|
|
601 |
static unsigned int
|
|
602 |
cip_pre_routing_hook(unsigned int hook,
|
|
603 |
struct sk_buff *skb,
|
|
604 |
const struct net_device *in,
|
|
605 |
const struct net_device *out,
|
|
606 |
int (*okfn)(struct sk_buff *))
|
|
607 |
{
|
|
608 |
if (skb_mac_header(skb) < skb->head ||
|
|
609 |
skb_mac_header(skb) + ETH_HLEN > skb->data ||
|
|
610 |
!is_multicast_ether_addr(eth_hdr(skb)->h_dest))
|
|
611 |
return NF_ACCEPT;
|
|
612 |
|
|
613 |
/* if we receive a packet for a CLUSTERIP multicast address,
|
|
614 |
* we let it pass through ip_forward. */
|
|
615 |
if (clusterip_config_find_mac(eth_hdr(skb)->h_dest))
|
|
616 |
skb->pkt_type = PACKET_HOST;
|
|
617 |
|
|
618 |
return NF_ACCEPT;
|
|
619 |
}
|
|
620 |
|
|
621 |
static struct nf_hook_ops cip_pre_routing_ops __read_mostly = {
|
|
622 |
.hook = cip_pre_routing_hook,
|
|
623 |
.owner = THIS_MODULE,
|
|
624 |
.pf = PF_INET,
|
|
625 |
.hooknum = NF_INET_PRE_ROUTING,
|
|
626 |
.priority = -1,
|
|
627 |
};
|
|
628 |
|
|
629 |
static inline u_int32_t
|
|
630 |
clusterip_hashfn_xfrm(const struct xfrm_state *x,
|
|
631 |
const struct clusterip_config *config)
|
|
632 |
{
|
|
633 |
unsigned long hashval;
|
|
634 |
|
|
635 |
hashval = jhash_2words(ntohs(x->id.spi), ntohl(x->id.daddr.a4),
|
|
636 |
config->hash_initval);
|
|
637 |
return (((u64)hashval * config->num_total_nodes) >> 32) + 1;
|
|
638 |
}
|
|
639 |
|
|
640 |
/* interval to process packet not responsible */
|
|
641 |
#define SEQ_UPDATE_INTERVAL 16
|
|
642 |
|
|
643 |
static unsigned int
|
|
644 |
cip_xfrm_in_hook(unsigned int hook,
|
|
645 |
struct sk_buff *skb,
|
|
646 |
const struct net_device *in,
|
|
647 |
const struct net_device *out,
|
|
648 |
int (*okfn)(struct sk_buff *))
|
|
649 |
{
|
|
650 |
struct clusterip_config *c;
|
|
651 |
struct xfrm_state *x;
|
|
652 |
u_int32_t hash;
|
|
653 |
__be32 seq;
|
|
654 |
unsigned int res = NF_DROP;
|
|
655 |
|
|
656 |
x = skb->sp->xvec[skb->sp->len - 1];
|
|
657 |
|
|
658 |
switch (x->id.proto) {
|
|
659 |
case IPPROTO_ESP:
|
|
660 |
case IPPROTO_AH:
|
|
661 |
break;
|
|
662 |
case IPPROTO_IPIP:
|
|
663 |
case IPPROTO_COMP:
|
|
664 |
/* FIXME: Accept IPCOMP if packet was encrypted only */
|
|
665 |
default:
|
|
666 |
return NF_ACCEPT;
|
|
667 |
}
|
|
668 |
|
|
669 |
c = clusterip_config_find_get(x->id.daddr.a4, 0);
|
|
670 |
if (!c)
|
|
671 |
return NF_ACCEPT;
|
|
672 |
|
|
673 |
/* process every n-th packet to update sequence counter, but drop it */
|
|
674 |
hash = clusterip_hashfn_xfrm(x, c);
|
|
675 |
seq = XFRM_SKB_CB(skb)->seq.input;
|
|
676 |
if (clusterip_responsible(c, hash))
|
|
677 |
res = NF_ACCEPT;
|
|
678 |
else if (ntohl(seq) % SEQ_UPDATE_INTERVAL == 0) {
|
|
679 |
if (x->type->input(x, skb) > 0) {
|
|
680 |
spin_lock(&x->lock);
|
|
681 |
|
|
682 |
if (x->props.replay_window)
|
|
683 |
xfrm_replay_advance(x, seq);
|
|
684 |
|
|
685 |
spin_unlock(&x->lock);
|
|
686 |
}
|
|
687 |
}
|
|
688 |
clusterip_config_put(c);
|
|
689 |
return res;
|
|
690 |
}
|
|
691 |
|
|
692 |
static struct nf_hook_ops cip_xfrm_in_ops __read_mostly = {
|
|
693 |
.hook = cip_xfrm_in_hook,
|
|
694 |
.owner = THIS_MODULE,
|
|
695 |
.pf = PF_INET,
|
|
696 |
.hooknum = NF_INET_XFRM_IN,
|
|
697 |
.priority = -1,
|
|
698 |
};
|
|
699 |
|
|
700 |
static unsigned int
|
|
701 |
cip_xfrm_out_hook(unsigned int hook,
|
|
702 |
struct sk_buff *skb,
|
|
703 |
const struct net_device *in,
|
|
704 |
const struct net_device *out,
|
|
705 |
int (*okfn)(struct sk_buff *))
|
|
706 |
{
|
|
707 |
struct clusterip_config *c;
|
|
708 |
struct xfrm_state *x;
|
|
709 |
u_int32_t hash;
|
|
710 |
unsigned int res = NF_DROP;
|
|
711 |
|
|
712 |
x = skb_dst(skb)->xfrm;
|
|
713 |
|
|
714 |
switch (x->id.proto) {
|
|
715 |
case IPPROTO_ESP:
|
|
716 |
case IPPROTO_AH:
|
|
717 |
break;
|
|
718 |
case IPPROTO_IPIP:
|
|
719 |
case IPPROTO_COMP:
|
|
720 |
/* FIXME: Skip IPCOMP processing if we are not responsible */
|
|
721 |
default:
|
|
722 |
return NF_ACCEPT;
|
|
723 |
}
|
|
724 |
|
|
725 |
c = clusterip_config_find_get(x->props.saddr.a4, 0);
|
|
726 |
if (!c)
|
|
727 |
return NF_ACCEPT;
|
|
728 |
|
|
729 |
hash = clusterip_hashfn_xfrm(x, c);
|
|
730 |
if (clusterip_responsible(c, hash))
|
|
731 |
res = NF_ACCEPT;
|
|
732 |
|
|
733 |
clusterip_config_put(c);
|
|
734 |
return res;
|
|
735 |
}
|
|
736 |
|
|
737 |
static struct nf_hook_ops cip_xfrm_out_ops __read_mostly = {
|
|
738 |
.hook = cip_xfrm_out_hook,
|
|
739 |
.owner = THIS_MODULE,
|
|
740 |
.pf = PF_INET,
|
|
741 |
.hooknum = NF_INET_XFRM_OUT,
|
|
742 |
.priority = -1,
|
|
743 |
};
|
|
744 |
|
|
745 |
/***********************************************************************
|
| 550 |
746 |
* PROC DIR HANDLING
|
| 551 |
747 |
***********************************************************************/
|
| 552 |
748 |
|
| ... | ... | |
| 703 |
899 |
if (ret < 0)
|
| 704 |
900 |
goto cleanup_target;
|
| 705 |
901 |
|
|
902 |
ret = nf_register_hook(&cip_pre_routing_ops);
|
|
903 |
if (ret < 0)
|
|
904 |
goto cleanup_arp;
|
|
905 |
|
|
906 |
ret = nf_register_hook(&cip_xfrm_in_ops);
|
|
907 |
if (ret < 0)
|
|
908 |
goto cleanup_pre;
|
|
909 |
|
|
910 |
ret = nf_register_hook(&cip_xfrm_out_ops);
|
|
911 |
if (ret < 0)
|
|
912 |
goto cleanup_xfrm_in;
|
|
913 |
|
| 706 |
914 |
#ifdef CONFIG_PROC_FS
|
| 707 |
915 |
clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
|
| 708 |
916 |
if (!clusterip_procdir) {
|
| ... | ... | |
| 718 |
926 |
|
| 719 |
927 |
#ifdef CONFIG_PROC_FS
|
| 720 |
928 |
cleanup_hook:
|
| 721 |
|
nf_unregister_hook(&cip_arp_ops);
|
|
929 |
nf_unregister_hook(&cip_xfrm_out_ops);
|
| 722 |
930 |
#endif /* CONFIG_PROC_FS */
|
|
931 |
cleanup_xfrm_in:
|
|
932 |
nf_unregister_hook(&cip_xfrm_in_ops);
|
|
933 |
cleanup_pre:
|
|
934 |
nf_unregister_hook(&cip_pre_routing_ops);
|
|
935 |
cleanup_arp:
|
|
936 |
nf_unregister_hook(&cip_arp_ops);
|
| 723 |
937 |
cleanup_target:
|
| 724 |
938 |
xt_unregister_target(&clusterip_tg_reg);
|
| 725 |
939 |
return ret;
|
| ... | ... | |
| 733 |
947 |
remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
|
| 734 |
948 |
#endif
|
| 735 |
949 |
nf_unregister_hook(&cip_arp_ops);
|
|
950 |
nf_unregister_hook(&cip_pre_routing_ops);
|
|
951 |
nf_unregister_hook(&cip_xfrm_in_ops);
|
|
952 |
nf_unregister_hook(&cip_xfrm_out_ops);
|
| 736 |
953 |
xt_unregister_target(&clusterip_tg_reg);
|
| 737 |
954 |
}
|
| 738 |
955 |
|
| 739 |
|
-
|