0002-Extended-the-CLUSTERIP-module-to-use-it-on-a-IPSec-g.patch

Martin Willi, 23.07.2010 10:53

Download (9.9 kB)

b/net/ipv4/netfilter/ipt_CLUSTERIP.c
20 20
#include <linux/udp.h>
21 21
#include <linux/icmp.h>
22 22
#include <linux/if_arp.h>
23
#include <linux/etherdevice.h>
23 24
#include <linux/seq_file.h>
24 25
#include <linux/netfilter_arp.h>
25 26
#include <linux/netfilter/x_tables.h>
......
28 29
#include <net/netfilter/nf_conntrack.h>
29 30
#include <net/net_namespace.h>
30 31
#include <net/checksum.h>
32
#include <net/xfrm.h>
31 33

  
32
#define CLUSTERIP_VERSION "0.8"
34
#define CLUSTERIP_VERSION "0.9"
33 35

  
34 36
MODULE_LICENSE("GPL");
35 37
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
......
103 105
}
104 106

  
105 107
static struct clusterip_config *
106
__clusterip_config_find(__be32 clusterip)
108
clusterip_config_find(__be32 clusterip)
107 109
{
108 110
	struct clusterip_config *c;
109 111

  
......
115 117
	return NULL;
116 118
}
117 119

  
120
static struct clusterip_config *
121
clusterip_config_find_mac(u_int8_t clustermac[])
122
{
123
	struct clusterip_config *c;
124

  
125
	list_for_each_entry(c, &clusterip_configs, list) {
126
		if (memcmp(c->clustermac, clustermac, ETH_ALEN) == 0)
127
			return c;
128
	}
129

  
130
	return NULL;
131
}
132

  
118 133
static inline struct clusterip_config *
119 134
clusterip_config_find_get(__be32 clusterip, int entry)
120 135
{
121 136
	struct clusterip_config *c;
122 137

  
123 138
	read_lock_bh(&clusterip_lock);
124
	c = __clusterip_config_find(clusterip);
139
	c = clusterip_config_find(clusterip);
125 140
	if (!c) {
126 141
		read_unlock_bh(&clusterip_lock);
127 142
		return NULL;
......
134 149
	return c;
135 150
}
136 151

  
152
static inline struct clusterip_config *
153
clusterip_config_find_get_mac(u_int8_t clustermac[])
154
{
155
	struct clusterip_config *c;
156

  
157
	read_lock_bh(&clusterip_lock);
158
	c = clusterip_config_find_mac(clustermac);
159
	if (!c) {
160
		read_unlock_bh(&clusterip_lock);
161
		return NULL;
162
	}
163
	atomic_inc(&c->refcount);
164
	read_unlock_bh(&clusterip_lock);
165

  
166
	return c;
167
}
168

  
137 169
static void
138 170
clusterip_config_init_nodelist(struct clusterip_config *c,
139 171
			       const struct ipt_clusterip_tgt_info *i)
......
302 334
		return NF_DROP;
303 335
	}
304 336

  
305
	/* special case: ICMP error handling. conntrack distinguishes between
306
	 * error messages (RELATED) and information requests (see below) */
307
	if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
308
	    (ctinfo == IP_CT_RELATED ||
309
	     ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY))
337
	switch (ip_hdr(skb)->protocol) {
338
	case IPPROTO_ICMP:
339
		/* ICMP error handling: conntrack distinguishes between error
340
		 * messages (RELATED) and information requests (see below)*/
341
		if (ctinfo == IP_CT_RELATED ||
342
		    ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
343
			return XT_CONTINUE;
344
		break;
345
	case IPPROTO_ESP:
346
	case IPPROTO_AH:
347
	case IPPROTO_IPIP:
348
		/* responsibility for IPsec is handled in xfrm input hook */
310 349
		return XT_CONTINUE;
350
	case IPPROTO_UDP: {
351
		/* UDP 4500 with an SPI is encapsulated ESP */
352
		const struct iphdr *iph = ip_hdr(skb);
353
		const u_int16_t *halfs = (const void *)iph+iph->ihl*4;
354

  
355
		if (halfs[1] == htons(4500) && (halfs[4] || halfs[5]))
356
			return XT_CONTINUE;
357
		break;
358
	}
359
	default:
360
		break;
361
	}
311 362

  
312 363
	/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
313 364
	 * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
......
516 567

  
517 568
	/* normally the linux kernel always replies to arp queries of
518 569
	 * addresses on different interfacs.  However, in the CLUSTERIP case
519
	 * this wouldn't work, since we didn't subscribe the mcast group on
520
	 * other interfaces */
570
	 * this wouldn't work. We need the multicast MAC to identify packets
571
	 * to pass to forwarding, so drop that ARP. */
521 572
	if (c->dev != out) {
522
		pr_debug("CLUSTERIP: not mangling arp reply on different "
523
			 "interface: cip'%s'-skb'%s'\n",
524
			 c->dev->name, out->name);
525 573
		clusterip_config_put(c);
526
		return NF_ACCEPT;
574
		return NF_DROP;
527 575
	}
528 576

  
529 577
	/* mangle reply hardware address */
......
547 595
};
548 596

  
549 597
/***********************************************************************
598
 * IPSEC FORWARDING HOOKS
599
 ***********************************************************************/
600

  
601
static unsigned int
602
cip_pre_routing_hook(unsigned int hook,
603
		     struct sk_buff *skb,
604
		     const struct net_device *in,
605
		     const struct net_device *out,
606
		     int (*okfn)(struct sk_buff *))
607
{
608
	if (skb_mac_header(skb) < skb->head ||
609
	    skb_mac_header(skb) + ETH_HLEN > skb->data ||
610
	    !is_multicast_ether_addr(eth_hdr(skb)->h_dest))
611
		return NF_ACCEPT;
612

  
613
	/* if we receive a packet for a CLUSTERIP multicast address,
614
	 * we let it pass through ip_forward. */
615
	if (clusterip_config_find_mac(eth_hdr(skb)->h_dest))
616
		skb->pkt_type = PACKET_HOST;
617

  
618
	return NF_ACCEPT;
619
}
620

  
621
static struct nf_hook_ops cip_pre_routing_ops __read_mostly = {
622
	.hook		= cip_pre_routing_hook,
623
	.owner		= THIS_MODULE,
624
	.pf		= PF_INET,
625
	.hooknum	= NF_INET_PRE_ROUTING,
626
	.priority	= -1,
627
};
628

  
629
static inline u_int32_t
630
clusterip_hashfn_xfrm(const struct xfrm_state *x,
631
		      const struct clusterip_config *config)
632
{
633
	unsigned long hashval;
634

  
635
	hashval = jhash_2words(ntohs(x->id.spi), ntohl(x->id.daddr.a4),
636
			       config->hash_initval);
637
	return (((u64)hashval * config->num_total_nodes) >> 32) + 1;
638
}
639

  
640
/* interval to process packet not responsible */
641
#define SEQ_UPDATE_INTERVAL 16
642

  
643
static unsigned int
644
cip_xfrm_in_hook(unsigned int hook,
645
		 struct sk_buff *skb,
646
		 const struct net_device *in,
647
		 const struct net_device *out,
648
		 int (*okfn)(struct sk_buff *))
649
{
650
	struct clusterip_config *c;
651
	struct xfrm_state *x;
652
	u_int32_t hash;
653
	__be32 seq;
654
	unsigned int res = NF_DROP;
655

  
656
	x = skb->sp->xvec[skb->sp->len - 1];
657

  
658
	switch (x->id.proto) {
659
	case IPPROTO_ESP:
660
	case IPPROTO_AH:
661
		break;
662
	case IPPROTO_IPIP:
663
	case IPPROTO_COMP:
664
		/* FIXME: Accept IPCOMP if packet was encrypted only */
665
	default:
666
		return NF_ACCEPT;
667
	}
668

  
669
	c = clusterip_config_find_get(x->id.daddr.a4, 0);
670
	if (!c)
671
		return NF_ACCEPT;
672

  
673
	/* process every n-th packet to update sequence counter, but drop it */
674
	hash = clusterip_hashfn_xfrm(x, c);
675
	seq = XFRM_SKB_CB(skb)->seq.input;
676
	if (clusterip_responsible(c, hash))
677
		res = NF_ACCEPT;
678
	else if (ntohl(seq) % SEQ_UPDATE_INTERVAL == 0) {
679
		if (x->type->input(x, skb) > 0) {
680
			spin_lock(&x->lock);
681

  
682
			if (x->props.replay_window)
683
				xfrm_replay_advance(x, seq);
684

  
685
			spin_unlock(&x->lock);
686
		}
687
	}
688
	clusterip_config_put(c);
689
	return res;
690
}
691

  
692
static struct nf_hook_ops cip_xfrm_in_ops __read_mostly = {
693
	.hook		= cip_xfrm_in_hook,
694
	.owner		= THIS_MODULE,
695
	.pf		= PF_INET,
696
	.hooknum	= NF_INET_XFRM_IN,
697
	.priority	= -1,
698
};
699

  
700
static unsigned int
701
cip_xfrm_out_hook(unsigned int hook,
702
		  struct sk_buff *skb,
703
		  const struct net_device *in,
704
		  const struct net_device *out,
705
		  int (*okfn)(struct sk_buff *))
706
{
707
	struct clusterip_config *c;
708
	struct xfrm_state *x;
709
	u_int32_t hash;
710
	unsigned int res = NF_DROP;
711

  
712
	x = skb_dst(skb)->xfrm;
713

  
714
	switch (x->id.proto) {
715
	case IPPROTO_ESP:
716
	case IPPROTO_AH:
717
		break;
718
	case IPPROTO_IPIP:
719
	case IPPROTO_COMP:
720
		/* FIXME: Skip IPCOMP processing if we are not responsible */
721
	default:
722
		return NF_ACCEPT;
723
	}
724

  
725
	c = clusterip_config_find_get(x->props.saddr.a4, 0);
726
	if (!c)
727
		return NF_ACCEPT;
728

  
729
	hash = clusterip_hashfn_xfrm(x, c);
730
	if (clusterip_responsible(c, hash))
731
		res = NF_ACCEPT;
732

  
733
	clusterip_config_put(c);
734
	return res;
735
}
736

  
737
static struct nf_hook_ops cip_xfrm_out_ops __read_mostly = {
738
	.hook		= cip_xfrm_out_hook,
739
	.owner		= THIS_MODULE,
740
	.pf		= PF_INET,
741
	.hooknum	= NF_INET_XFRM_OUT,
742
	.priority	= -1,
743
};
744

  
745
/***********************************************************************
550 746
 * PROC DIR HANDLING
551 747
 ***********************************************************************/
552 748

  
......
703 899
	if (ret < 0)
704 900
		goto cleanup_target;
705 901

  
902
	ret = nf_register_hook(&cip_pre_routing_ops);
903
	if (ret < 0)
904
		goto cleanup_arp;
905

  
906
	ret = nf_register_hook(&cip_xfrm_in_ops);
907
	if (ret < 0)
908
		goto cleanup_pre;
909

  
910
	ret = nf_register_hook(&cip_xfrm_out_ops);
911
	if (ret < 0)
912
		goto cleanup_xfrm_in;
913

  
706 914
#ifdef CONFIG_PROC_FS
707 915
	clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
708 916
	if (!clusterip_procdir) {
......
718 926

  
719 927
#ifdef CONFIG_PROC_FS
720 928
cleanup_hook:
721
	nf_unregister_hook(&cip_arp_ops);
929
	nf_unregister_hook(&cip_xfrm_out_ops);
722 930
#endif /* CONFIG_PROC_FS */
931
cleanup_xfrm_in:
932
	nf_unregister_hook(&cip_xfrm_in_ops);
933
cleanup_pre:
934
	nf_unregister_hook(&cip_pre_routing_ops);
935
cleanup_arp:
936
	nf_unregister_hook(&cip_arp_ops);
723 937
cleanup_target:
724 938
	xt_unregister_target(&clusterip_tg_reg);
725 939
	return ret;
......
733 947
	remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
734 948
#endif
735 949
	nf_unregister_hook(&cip_arp_ops);
950
	nf_unregister_hook(&cip_pre_routing_ops);
951
	nf_unregister_hook(&cip_xfrm_in_ops);
952
	nf_unregister_hook(&cip_xfrm_out_ops);
736 953
	xt_unregister_target(&clusterip_tg_reg);
737 954
}
738 955

  
739
-