Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index f82a495..ee92634 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -4,6 +4,7 @@
depends on INET
depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
select IP_SCTP
+ select SRCU
help
A general purpose distributed lock manager for kernel or userspace
applications.
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index b7e288c..18a8ffc 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -73,6 +73,7 @@ struct dlm_cluster {
unsigned int cl_log_debug;
unsigned int cl_log_info;
unsigned int cl_protocol;
+ unsigned int cl_mark;
unsigned int cl_timewarn_cs;
unsigned int cl_waitwarn_us;
unsigned int cl_new_rsb_count;
@@ -99,6 +100,7 @@ enum {
CLUSTER_ATTR_LOG_DEBUG,
CLUSTER_ATTR_LOG_INFO,
CLUSTER_ATTR_PROTOCOL,
+ CLUSTER_ATTR_MARK,
CLUSTER_ATTR_TIMEWARN_CS,
CLUSTER_ATTR_WAITWARN_US,
CLUSTER_ATTR_NEW_RSB_COUNT,
@@ -126,7 +128,7 @@ static ssize_t cluster_cluster_name_store(struct config_item *item,
CONFIGFS_ATTR(cluster_, cluster_name);
static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
- int *info_field, int check_zero,
+ int *info_field, int (*check_cb)(unsigned int x),
const char *buf, size_t len)
{
unsigned int x;
@@ -138,8 +140,11 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
if (rc)
return rc;
- if (check_zero && !x)
- return -EINVAL;
+ if (check_cb) {
+ rc = check_cb(x);
+ if (rc)
+ return rc;
+ }
*cl_field = x;
*info_field = x;
@@ -147,13 +152,13 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
return len;
}
-#define CLUSTER_ATTR(name, check_zero) \
+#define CLUSTER_ATTR(name, check_cb) \
static ssize_t cluster_##name##_store(struct config_item *item, \
const char *buf, size_t len) \
{ \
struct dlm_cluster *cl = config_item_to_cluster(item); \
return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
- check_zero, buf, len); \
+ check_cb, buf, len); \
} \
static ssize_t cluster_##name##_show(struct config_item *item, char *buf) \
{ \
@@ -162,19 +167,36 @@ static ssize_t cluster_##name##_show(struct config_item *item, char *buf) \
} \
CONFIGFS_ATTR(cluster_, name);
-CLUSTER_ATTR(tcp_port, 1);
-CLUSTER_ATTR(buffer_size, 1);
-CLUSTER_ATTR(rsbtbl_size, 1);
-CLUSTER_ATTR(recover_timer, 1);
-CLUSTER_ATTR(toss_secs, 1);
-CLUSTER_ATTR(scan_secs, 1);
-CLUSTER_ATTR(log_debug, 0);
-CLUSTER_ATTR(log_info, 0);
-CLUSTER_ATTR(protocol, 0);
-CLUSTER_ATTR(timewarn_cs, 1);
-CLUSTER_ATTR(waitwarn_us, 0);
-CLUSTER_ATTR(new_rsb_count, 0);
-CLUSTER_ATTR(recover_callbacks, 0);
+static int dlm_check_zero(unsigned int x)
+{
+ if (!x)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int dlm_check_buffer_size(unsigned int x)
+{
+ if (x < DEFAULT_BUFFER_SIZE)
+ return -EINVAL;
+
+ return 0;
+}
+
+CLUSTER_ATTR(tcp_port, dlm_check_zero);
+CLUSTER_ATTR(buffer_size, dlm_check_buffer_size);
+CLUSTER_ATTR(rsbtbl_size, dlm_check_zero);
+CLUSTER_ATTR(recover_timer, dlm_check_zero);
+CLUSTER_ATTR(toss_secs, dlm_check_zero);
+CLUSTER_ATTR(scan_secs, dlm_check_zero);
+CLUSTER_ATTR(log_debug, NULL);
+CLUSTER_ATTR(log_info, NULL);
+CLUSTER_ATTR(protocol, NULL);
+CLUSTER_ATTR(mark, NULL);
+CLUSTER_ATTR(timewarn_cs, dlm_check_zero);
+CLUSTER_ATTR(waitwarn_us, NULL);
+CLUSTER_ATTR(new_rsb_count, NULL);
+CLUSTER_ATTR(recover_callbacks, NULL);
static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port,
@@ -186,6 +208,7 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug,
[CLUSTER_ATTR_LOG_INFO] = &cluster_attr_log_info,
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol,
+ [CLUSTER_ATTR_MARK] = &cluster_attr_mark,
[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs,
[CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us,
[CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count,
@@ -199,6 +222,7 @@ enum {
COMM_ATTR_LOCAL,
COMM_ATTR_ADDR,
COMM_ATTR_ADDR_LIST,
+ COMM_ATTR_MARK,
};
enum {
@@ -232,6 +256,7 @@ struct dlm_comm {
int nodeid;
int local;
int addr_count;
+ unsigned int mark;
struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
};
@@ -477,6 +502,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
cm->nodeid = -1;
cm->local = 0;
cm->addr_count = 0;
+ cm->mark = 0;
return &cm->item;
}
@@ -672,8 +698,28 @@ static ssize_t comm_addr_list_show(struct config_item *item, char *buf)
return 4096 - allowance;
}
+static ssize_t comm_mark_show(struct config_item *item, char *buf)
+{
+ return sprintf(buf, "%u\n", config_item_to_comm(item)->mark);
+}
+
+static ssize_t comm_mark_store(struct config_item *item, const char *buf,
+ size_t len)
+{
+ unsigned int mark;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &mark);
+ if (rc)
+ return rc;
+
+ config_item_to_comm(item)->mark = mark;
+ return len;
+}
+
CONFIGFS_ATTR(comm_, nodeid);
CONFIGFS_ATTR(comm_, local);
+CONFIGFS_ATTR(comm_, mark);
CONFIGFS_ATTR_WO(comm_, addr);
CONFIGFS_ATTR_RO(comm_, addr_list);
@@ -682,6 +728,7 @@ static struct configfs_attribute *comm_attrs[] = {
[COMM_ATTR_LOCAL] = &comm_attr_local,
[COMM_ATTR_ADDR] = &comm_attr_addr,
[COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list,
+ [COMM_ATTR_MARK] = &comm_attr_mark,
NULL,
};
@@ -841,6 +888,24 @@ int dlm_comm_seq(int nodeid, uint32_t *seq)
return 0;
}
+void dlm_comm_mark(int nodeid, unsigned int *mark)
+{
+ struct dlm_comm *cm;
+
+ cm = get_comm(nodeid);
+ if (!cm) {
+ *mark = dlm_config.ci_mark;
+ return;
+ }
+
+ if (cm->mark)
+ *mark = cm->mark;
+ else
+ *mark = dlm_config.ci_mark;
+
+ put_comm(cm);
+}
+
int dlm_our_nodeid(void)
{
return local_comm ? local_comm->nodeid : 0;
@@ -859,7 +924,6 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
/* Config file defaults */
#define DEFAULT_TCP_PORT 21064
-#define DEFAULT_BUFFER_SIZE 4096
#define DEFAULT_RSBTBL_SIZE 1024
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10
@@ -867,6 +931,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_LOG_DEBUG 0
#define DEFAULT_LOG_INFO 1
#define DEFAULT_PROTOCOL 0
+#define DEFAULT_MARK 0
#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
#define DEFAULT_WAITWARN_US 0
#define DEFAULT_NEW_RSB_COUNT 128
@@ -883,6 +948,7 @@ struct dlm_config_info dlm_config = {
.ci_log_debug = DEFAULT_LOG_DEBUG,
.ci_log_info = DEFAULT_LOG_INFO,
.ci_protocol = DEFAULT_PROTOCOL,
+ .ci_mark = DEFAULT_MARK,
.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
.ci_waitwarn_us = DEFAULT_WAITWARN_US,
.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT,
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 2b471aa..c210250 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -12,6 +12,8 @@
#ifndef __CONFIG_DOT_H__
#define __CONFIG_DOT_H__
+#define DEFAULT_BUFFER_SIZE 4096
+
struct dlm_config_node {
int nodeid;
int weight;
@@ -31,6 +33,7 @@ struct dlm_config_info {
int ci_log_debug;
int ci_log_info;
int ci_protocol;
+ int ci_mark;
int ci_timewarn_cs;
int ci_waitwarn_us;
int ci_new_rsb_count;
@@ -45,6 +48,7 @@ void dlm_config_exit(void);
int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
int *count_out);
int dlm_comm_seq(int nodeid, uint32_t *seq);
+void dlm_comm_mark(int nodeid, unsigned int *mark);
int dlm_our_nodeid(void);
int dlm_our_addr(struct sockaddr_storage *addr, int num);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 4311d01..04fe9f5 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -420,7 +420,7 @@ struct dlm_message {
int m_bastmode;
int m_asts;
int m_result; /* 0 or -EXXX */
- char m_extra[0]; /* name or lvb */
+ char m_extra[]; /* name or lvb */
};
@@ -449,7 +449,7 @@ struct dlm_rcom {
uint64_t rc_id; /* match reply with request */
uint64_t rc_seq; /* sender's ls_recover_seq */
uint64_t rc_seq_reply; /* remote ls_recover_seq */
- char rc_buf[0];
+ char rc_buf[];
};
union dlm_packet {
@@ -505,7 +505,7 @@ struct rcom_lock {
__le16 rl_wait_type;
__le16 rl_namelen;
char rl_name[DLM_RESNAME_MAXLEN];
- char rl_lvb[0];
+ char rl_lvb[];
};
/*
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 18d8159..1e9d899 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3975,6 +3975,14 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
int from = ms->m_header.h_nodeid;
int error = 0;
+ /* currently mixing of user/kernel locks are not supported */
+ if (ms->m_flags & DLM_IFL_USER && ~lkb->lkb_flags & DLM_IFL_USER) {
+ log_error(lkb->lkb_resource->res_ls,
+ "got user dlm message for a kernel lock");
+ error = -EINVAL;
+ goto out;
+ }
+
switch (ms->m_type) {
case DLM_MSG_CONVERT:
case DLM_MSG_UNLOCK:
@@ -4003,6 +4011,7 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
error = -EINVAL;
}
+out:
if (error)
log_error(lkb->lkb_resource->res_ls,
"ignore invalid message %d from %d %x %x %x %d",
@@ -5817,7 +5826,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
break;
case -EAGAIN:
error = 0;
- /* fall through */
+ fallthrough;
default:
__put_lkb(ls, lkb);
goto out;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index c689359..624617c 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -197,8 +197,6 @@ static struct kset *dlm_kset;
static int do_uevent(struct dlm_ls *ls, int in)
{
- int error;
-
if (in)
kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
else
@@ -209,20 +207,12 @@ static int do_uevent(struct dlm_ls *ls, int in)
/* dlm_controld will see the uevent, do the necessary group management
and then write to sysfs to wake us */
- error = wait_event_interruptible(ls->ls_uevent_wait,
- test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
+ wait_event(ls->ls_uevent_wait,
+ test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
- log_rinfo(ls, "group event done %d %d", error, ls->ls_uevent_result);
+ log_rinfo(ls, "group event done %d", ls->ls_uevent_result);
- if (error)
- goto out;
-
- error = ls->ls_uevent_result;
- out:
- if (error)
- log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
- error, ls->ls_uevent_result);
- return error;
+ return ls->ls_uevent_result;
}
static int dlm_uevent(struct kset *kset, struct kobject *kobj,
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index d9202ba..68b7653 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -63,40 +63,7 @@
/* Number of messages to send before rescheduling */
#define MAX_SEND_MSG_COUNT 25
-
-struct cbuf {
- unsigned int base;
- unsigned int len;
- unsigned int mask;
-};
-
-static void cbuf_add(struct cbuf *cb, int n)
-{
- cb->len += n;
-}
-
-static int cbuf_data(struct cbuf *cb)
-{
- return ((cb->base + cb->len) & cb->mask);
-}
-
-static void cbuf_init(struct cbuf *cb, int size)
-{
- cb->base = cb->len = 0;
- cb->mask = size-1;
-}
-
-static void cbuf_eat(struct cbuf *cb, int n)
-{
- cb->len -= n;
- cb->base += n;
- cb->base &= cb->mask;
-}
-
-static bool cbuf_empty(struct cbuf *cb)
-{
- return cb->len == 0;
-}
+#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000)
struct connection {
struct socket *sock; /* NULL if not connected */
@@ -110,18 +77,23 @@ struct connection {
#define CF_CLOSE 6
#define CF_APP_LIMITED 7
#define CF_CLOSING 8
+#define CF_SHUTDOWN 9
struct list_head writequeue; /* List of outgoing writequeue_entries */
spinlock_t writequeue_lock;
int (*rx_action) (struct connection *); /* What to do when active */
void (*connect_action) (struct connection *); /* What to do to connect */
- struct page *rx_page;
- struct cbuf cb;
+ void (*shutdown_action)(struct connection *con); /* What to do to shutdown */
int retries;
#define MAX_CONNECT_RETRIES 3
struct hlist_node list;
struct connection *othercon;
struct work_struct rwork; /* Receive workqueue */
struct work_struct swork; /* Send workqueue */
+ wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
+ unsigned char *rx_buf;
+ int rx_buflen;
+ int rx_leftover;
+ struct rcu_head rcu;
};
#define sock2con(x) ((struct connection *)(x)->sk_user_data)
@@ -163,8 +135,8 @@ static struct workqueue_struct *recv_workqueue;
static struct workqueue_struct *send_workqueue;
static struct hlist_head connection_hash[CONN_HASH_SIZE];
-static DEFINE_MUTEX(connections_lock);
-static struct kmem_cache *con_cache;
+static DEFINE_SPINLOCK(connections_lock);
+DEFINE_STATIC_SRCU(connections_srcu);
static void process_recv_sockets(struct work_struct *work);
static void process_send_sockets(struct work_struct *work);
@@ -180,15 +152,20 @@ static inline int nodeid_hash(int nodeid)
static struct connection *__find_con(int nodeid)
{
- int r;
+ int r, idx;
struct connection *con;
r = nodeid_hash(nodeid);
- hlist_for_each_entry(con, &connection_hash[r], list) {
- if (con->nodeid == nodeid)
+ idx = srcu_read_lock(&connections_srcu);
+ hlist_for_each_entry_rcu(con, &connection_hash[r], list) {
+ if (con->nodeid == nodeid) {
+ srcu_read_unlock(&connections_srcu, idx);
return con;
+ }
}
+ srcu_read_unlock(&connections_srcu, idx);
+
return NULL;
}
@@ -196,21 +173,25 @@ static struct connection *__find_con(int nodeid)
* If 'allocation' is zero then we don't attempt to create a new
* connection structure for this node.
*/
-static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
+static struct connection *nodeid2con(int nodeid, gfp_t alloc)
{
- struct connection *con = NULL;
+ struct connection *con, *tmp;
int r;
con = __find_con(nodeid);
if (con || !alloc)
return con;
- con = kmem_cache_zalloc(con_cache, alloc);
+ con = kzalloc(sizeof(*con), alloc);
if (!con)
return NULL;
- r = nodeid_hash(nodeid);
- hlist_add_head(&con->list, &connection_hash[r]);
+ con->rx_buflen = dlm_config.ci_buffer_size;
+ con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS);
+ if (!con->rx_buf) {
+ kfree(con);
+ return NULL;
+ }
con->nodeid = nodeid;
mutex_init(&con->sock_mutex);
@@ -218,6 +199,7 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
spin_lock_init(&con->writequeue_lock);
INIT_WORK(&con->swork, process_send_sockets);
INIT_WORK(&con->rwork, process_recv_sockets);
+ init_waitqueue_head(&con->shutdown_wait);
/* Setup action pointers for child sockets */
if (con->nodeid) {
@@ -228,31 +210,41 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
con->rx_action = zerocon->rx_action;
}
+ r = nodeid_hash(nodeid);
+
+ spin_lock(&connections_lock);
+ /* Because multiple workqueues/threads calls this function it can
+ * race on multiple cpu's. Instead of locking hot path __find_con()
+ * we just check in rare cases of recently added nodes again
+ * under protection of connections_lock. If this is the case we
+ * abort our connection creation and return the existing connection.
+ */
+ tmp = __find_con(nodeid);
+ if (tmp) {
+ spin_unlock(&connections_lock);
+ kfree(con->rx_buf);
+ kfree(con);
+ return tmp;
+ }
+
+ hlist_add_head_rcu(&con->list, &connection_hash[r]);
+ spin_unlock(&connections_lock);
+
return con;
}
/* Loop round all connections */
static void foreach_conn(void (*conn_func)(struct connection *c))
{
- int i;
- struct hlist_node *n;
+ int i, idx;
struct connection *con;
+ idx = srcu_read_lock(&connections_srcu);
for (i = 0; i < CONN_HASH_SIZE; i++) {
- hlist_for_each_entry_safe(con, n, &connection_hash[i], list)
+ hlist_for_each_entry_rcu(con, &connection_hash[i], list)
conn_func(con);
}
-}
-
-static struct connection *nodeid2con(int nodeid, gfp_t allocation)
-{
- struct connection *con;
-
- mutex_lock(&connections_lock);
- con = __nodeid2con(nodeid, allocation);
- mutex_unlock(&connections_lock);
-
- return con;
+ srcu_read_unlock(&connections_srcu, idx);
}
static struct dlm_node_addr *find_node_addr(int nodeid)
@@ -479,8 +471,8 @@ int dlm_lowcomms_connect_node(int nodeid)
static void lowcomms_error_report(struct sock *sk)
{
struct connection *con;
- struct sockaddr_storage saddr;
void (*orig_report)(struct sock *) = NULL;
+ struct inet_sock *inet;
read_lock_bh(&sk->sk_callback_lock);
con = sock2con(sk);
@@ -488,34 +480,33 @@ static void lowcomms_error_report(struct sock *sk)
goto out;
orig_report = listen_sock.sk_error_report;
- if (con->sock == NULL ||
- kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
- printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
- "sending to node %d, port %d, "
- "sk_err=%d/%d\n", dlm_our_nodeid(),
- con->nodeid, dlm_config.ci_tcp_port,
- sk->sk_err, sk->sk_err_soft);
- } else if (saddr.ss_family == AF_INET) {
- struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
+ inet = inet_sk(sk);
+ switch (sk->sk_family) {
+ case AF_INET:
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
- "sending to node %d at %pI4, port %d, "
+ "sending to node %d at %pI4, dport %d, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
- con->nodeid, &sin4->sin_addr.s_addr,
- dlm_config.ci_tcp_port, sk->sk_err,
+ con->nodeid, &inet->inet_daddr,
+ ntohs(inet->inet_dport), sk->sk_err,
sk->sk_err_soft);
- } else {
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr;
-
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
- "sending to node %d at %u.%u.%u.%u, "
- "port %d, sk_err=%d/%d\n", dlm_our_nodeid(),
- con->nodeid, sin6->sin6_addr.s6_addr32[0],
- sin6->sin6_addr.s6_addr32[1],
- sin6->sin6_addr.s6_addr32[2],
- sin6->sin6_addr.s6_addr32[3],
- dlm_config.ci_tcp_port, sk->sk_err,
+ "sending to node %d at %pI6c, "
+ "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(),
+ con->nodeid, &sk->sk_v6_daddr,
+ ntohs(inet->inet_dport), sk->sk_err,
sk->sk_err_soft);
+ break;
+#endif
+ default:
+ printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
+ "invalid socket family %d set, "
+ "sk_err=%d/%d\n", dlm_our_nodeid(),
+ sk->sk_family, sk->sk_err, sk->sk_err_soft);
+ goto out;
}
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -609,26 +600,85 @@ static void close_connection(struct connection *con, bool and_other,
/* Will only re-enter once. */
close_connection(con->othercon, false, tx, rx);
}
- if (con->rx_page) {
- __free_page(con->rx_page);
- con->rx_page = NULL;
- }
+ con->rx_leftover = 0;
con->retries = 0;
mutex_unlock(&con->sock_mutex);
clear_bit(CF_CLOSING, &con->flags);
}
+static void shutdown_connection(struct connection *con)
+{
+ int ret;
+
+ flush_work(&con->swork);
+
+ mutex_lock(&con->sock_mutex);
+ /* nothing to shutdown */
+ if (!con->sock) {
+ mutex_unlock(&con->sock_mutex);
+ return;
+ }
+
+ set_bit(CF_SHUTDOWN, &con->flags);
+ ret = kernel_sock_shutdown(con->sock, SHUT_WR);
+ mutex_unlock(&con->sock_mutex);
+ if (ret) {
+ log_print("Connection %p failed to shutdown: %d will force close",
+ con, ret);
+ goto force_close;
+ } else {
+ ret = wait_event_timeout(con->shutdown_wait,
+ !test_bit(CF_SHUTDOWN, &con->flags),
+ DLM_SHUTDOWN_WAIT_TIMEOUT);
+ if (ret == 0) {
+ log_print("Connection %p shutdown timed out, will force close",
+ con);
+ goto force_close;
+ }
+ }
+
+ return;
+
+force_close:
+ clear_bit(CF_SHUTDOWN, &con->flags);
+ close_connection(con, false, true, true);
+}
+
+static void dlm_tcp_shutdown(struct connection *con)
+{
+ if (con->othercon)
+ shutdown_connection(con->othercon);
+ shutdown_connection(con);
+}
+
+static int con_realloc_receive_buf(struct connection *con, int newlen)
+{
+ unsigned char *newbuf;
+
+ newbuf = kmalloc(newlen, GFP_NOFS);
+ if (!newbuf)
+ return -ENOMEM;
+
+ /* copy any leftover from last receive */
+ if (con->rx_leftover)
+ memmove(newbuf, con->rx_buf, con->rx_leftover);
+
+ /* swap to new buffer space */
+ kfree(con->rx_buf);
+ con->rx_buflen = newlen;
+ con->rx_buf = newbuf;
+
+ return 0;
+}
+
/* Data received from remote end */
static int receive_from_sock(struct connection *con)
{
- int ret = 0;
- struct msghdr msg = {};
- struct kvec iov[2];
- unsigned len;
- int r;
int call_again_soon = 0;
- int nvec;
+ struct msghdr msg;
+ struct kvec iov;
+ int ret, buflen;
mutex_lock(&con->sock_mutex);
@@ -636,71 +686,55 @@ static int receive_from_sock(struct connection *con)
ret = -EAGAIN;
goto out_close;
}
+
if (con->nodeid == 0) {
ret = -EINVAL;
goto out_close;
}
- if (con->rx_page == NULL) {
- /*
- * This doesn't need to be atomic, but I think it should
- * improve performance if it is.
- */
- con->rx_page = alloc_page(GFP_ATOMIC);
- if (con->rx_page == NULL)
+ /* realloc if we get new buffer size to read out */
+ buflen = dlm_config.ci_buffer_size;
+ if (con->rx_buflen != buflen && con->rx_leftover <= buflen) {
+ ret = con_realloc_receive_buf(con, buflen);
+ if (ret < 0)
goto out_resched;
- cbuf_init(&con->cb, PAGE_SIZE);
}
- /*
- * iov[0] is the bit of the circular buffer between the current end
- * point (cb.base + cb.len) and the end of the buffer.
+ /* calculate new buffer parameter regarding last receive and
+ * possible leftover bytes
*/
- iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
- iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
- iov[1].iov_len = 0;
- nvec = 1;
+ iov.iov_base = con->rx_buf + con->rx_leftover;
+ iov.iov_len = con->rx_buflen - con->rx_leftover;
- /*
- * iov[1] is the bit of the circular buffer between the start of the
- * buffer and the start of the currently used section (cb.base)
- */
- if (cbuf_data(&con->cb) >= con->cb.base) {
- iov[0].iov_len = PAGE_SIZE - cbuf_data(&con->cb);
- iov[1].iov_len = con->cb.base;
- iov[1].iov_base = page_address(con->rx_page);
- nvec = 2;
- }
- len = iov[0].iov_len + iov[1].iov_len;
- iov_iter_kvec(&msg.msg_iter, READ, iov, nvec, len);
-
- r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL);
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+ ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
+ msg.msg_flags);
if (ret <= 0)
goto out_close;
- else if (ret == len)
+ else if (ret == iov.iov_len)
call_again_soon = 1;
- cbuf_add(&con->cb, ret);
- ret = dlm_process_incoming_buffer(con->nodeid,
- page_address(con->rx_page),
- con->cb.base, con->cb.len,
- PAGE_SIZE);
- if (ret == -EBADMSG) {
- log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d",
- page_address(con->rx_page), con->cb.base,
- con->cb.len, r);
- }
+ /* new buflen according readed bytes and leftover from last receive */
+ buflen = ret + con->rx_leftover;
+ ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
if (ret < 0)
goto out_close;
- cbuf_eat(&con->cb, ret);
- if (cbuf_empty(&con->cb) && !call_again_soon) {
- __free_page(con->rx_page);
- con->rx_page = NULL;
+ /* calculate leftover bytes from process and put it into begin of
+ * the receive buffer, so next receive we have the full message
+ * at the start address of the receive buffer.
+ */
+ con->rx_leftover = buflen - ret;
+ if (con->rx_leftover) {
+ memmove(con->rx_buf, con->rx_buf + ret,
+ con->rx_leftover);
+ call_again_soon = true;
}
if (call_again_soon)
goto out_resched;
+
mutex_unlock(&con->sock_mutex);
return 0;
@@ -713,18 +747,23 @@ static int receive_from_sock(struct connection *con)
out_close:
mutex_unlock(&con->sock_mutex);
if (ret != -EAGAIN) {
- close_connection(con, true, true, false);
/* Reconnect when there is something to send */
+ close_connection(con, false, true, false);
+ if (ret == 0) {
+ log_print("connection %p got EOF from %d",
+ con, con->nodeid);
+ /* handling for tcp shutdown */
+ clear_bit(CF_SHUTDOWN, &con->flags);
+ wake_up(&con->shutdown_wait);
+ /* signal to breaking receive worker */
+ ret = -1;
+ }
}
- /* Don't return success if we really got EOF */
- if (ret == 0)
- ret = -EAGAIN;
-
return ret;
}
/* Listening socket is busy, accept a connection */
-static int tcp_accept_from_sock(struct connection *con)
+static int accept_from_sock(struct connection *con)
{
int result;
struct sockaddr_storage peeraddr;
@@ -733,13 +772,11 @@ static int tcp_accept_from_sock(struct connection *con)
int nodeid;
struct connection *newcon;
struct connection *addcon;
+ unsigned int mark;
- mutex_lock(&connections_lock);
if (!dlm_allow_conn) {
- mutex_unlock(&connections_lock);
return -1;
}
- mutex_unlock(&connections_lock);
mutex_lock_nested(&con->sock_mutex, 0);
@@ -772,6 +809,9 @@ static int tcp_accept_from_sock(struct connection *con)
return -1;
}
+ dlm_comm_mark(nodeid, &mark);
+ sock_set_mark(newsock->sk, mark);
+
log_print("got connection from %d", nodeid);
/* Check to see if we already have a connection to this node. This
@@ -789,13 +829,24 @@ static int tcp_accept_from_sock(struct connection *con)
struct connection *othercon = newcon->othercon;
if (!othercon) {
- othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
+ othercon = kzalloc(sizeof(*othercon), GFP_NOFS);
if (!othercon) {
log_print("failed to allocate incoming socket");
mutex_unlock(&newcon->sock_mutex);
result = -ENOMEM;
goto accept_err;
}
+
+ othercon->rx_buflen = dlm_config.ci_buffer_size;
+ othercon->rx_buf = kmalloc(othercon->rx_buflen, GFP_NOFS);
+ if (!othercon->rx_buf) {
+ mutex_unlock(&newcon->sock_mutex);
+ kfree(othercon);
+ log_print("failed to allocate incoming socket receive buffer");
+ result = -ENOMEM;
+ goto accept_err;
+ }
+
othercon->nodeid = nodeid;
othercon->rx_action = receive_from_sock;
mutex_init(&othercon->sock_mutex);
@@ -803,22 +854,18 @@ static int tcp_accept_from_sock(struct connection *con)
spin_lock_init(&othercon->writequeue_lock);
INIT_WORK(&othercon->swork, process_send_sockets);
INIT_WORK(&othercon->rwork, process_recv_sockets);
+ init_waitqueue_head(&othercon->shutdown_wait);
set_bit(CF_IS_OTHERCON, &othercon->flags);
+ } else {
+ /* close other sock con if we have something new */
+ close_connection(othercon, false, true, false);
}
+
mutex_lock_nested(&othercon->sock_mutex, 2);
- if (!othercon->sock) {
- newcon->othercon = othercon;
- add_sock(newsock, othercon);
- addcon = othercon;
- mutex_unlock(&othercon->sock_mutex);
- }
- else {
- printk("Extra connection from node %d attempted\n", nodeid);
- result = -EAGAIN;
- mutex_unlock(&othercon->sock_mutex);
- mutex_unlock(&newcon->sock_mutex);
- goto accept_err;
- }
+ newcon->othercon = othercon;
+ add_sock(newsock, othercon);
+ addcon = othercon;
+ mutex_unlock(&othercon->sock_mutex);
}
else {
newcon->rx_action = receive_from_sock;
@@ -852,123 +899,6 @@ static int tcp_accept_from_sock(struct connection *con)
return result;
}
-static int sctp_accept_from_sock(struct connection *con)
-{
- /* Check that the new node is in the lockspace */
- struct sctp_prim prim;
- int nodeid;
- int prim_len, ret;
- int addr_len;
- struct connection *newcon;
- struct connection *addcon;
- struct socket *newsock;
-
- mutex_lock(&connections_lock);
- if (!dlm_allow_conn) {
- mutex_unlock(&connections_lock);
- return -1;
- }
- mutex_unlock(&connections_lock);
-
- mutex_lock_nested(&con->sock_mutex, 0);
-
- ret = kernel_accept(con->sock, &newsock, O_NONBLOCK);
- if (ret < 0)
- goto accept_err;
-
- memset(&prim, 0, sizeof(struct sctp_prim));
- prim_len = sizeof(struct sctp_prim);
-
- ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
- (char *)&prim, &prim_len);
- if (ret < 0) {
- log_print("getsockopt/sctp_primary_addr failed: %d", ret);
- goto accept_err;
- }
-
- make_sockaddr(&prim.ssp_addr, 0, &addr_len);
- ret = addr_to_nodeid(&prim.ssp_addr, &nodeid);
- if (ret) {
- unsigned char *b = (unsigned char *)&prim.ssp_addr;
-
- log_print("reject connect from unknown addr");
- print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
- b, sizeof(struct sockaddr_storage));
- goto accept_err;
- }
-
- newcon = nodeid2con(nodeid, GFP_NOFS);
- if (!newcon) {
- ret = -ENOMEM;
- goto accept_err;
- }
-
- mutex_lock_nested(&newcon->sock_mutex, 1);
-
- if (newcon->sock) {
- struct connection *othercon = newcon->othercon;
-
- if (!othercon) {
- othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
- if (!othercon) {
- log_print("failed to allocate incoming socket");
- mutex_unlock(&newcon->sock_mutex);
- ret = -ENOMEM;
- goto accept_err;
- }
- othercon->nodeid = nodeid;
- othercon->rx_action = receive_from_sock;
- mutex_init(&othercon->sock_mutex);
- INIT_LIST_HEAD(&othercon->writequeue);
- spin_lock_init(&othercon->writequeue_lock);
- INIT_WORK(&othercon->swork, process_send_sockets);
- INIT_WORK(&othercon->rwork, process_recv_sockets);
- set_bit(CF_IS_OTHERCON, &othercon->flags);
- }
- mutex_lock_nested(&othercon->sock_mutex, 2);
- if (!othercon->sock) {
- newcon->othercon = othercon;
- add_sock(newsock, othercon);
- addcon = othercon;
- mutex_unlock(&othercon->sock_mutex);
- } else {
- printk("Extra connection from node %d attempted\n", nodeid);
- ret = -EAGAIN;
- mutex_unlock(&othercon->sock_mutex);
- mutex_unlock(&newcon->sock_mutex);
- goto accept_err;
- }
- } else {
- newcon->rx_action = receive_from_sock;
- add_sock(newsock, newcon);
- addcon = newcon;
- }
-
- log_print("connected to %d", nodeid);
-
- mutex_unlock(&newcon->sock_mutex);
-
- /*
- * Add it to the active queue in case we got data
- * between processing the accept adding the socket
- * to the read_sockets list
- */
- if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
- queue_work(recv_workqueue, &addcon->rwork);
- mutex_unlock(&con->sock_mutex);
-
- return 0;
-
-accept_err:
- mutex_unlock(&con->sock_mutex);
- if (newsock)
- sock_release(newsock);
- if (ret != -EAGAIN)
- log_print("error accepting connection from node: %d", ret);
-
- return ret;
-}
-
static void free_entry(struct writequeue_entry *e)
{
__free_page(e->page);
@@ -999,6 +929,7 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
static int sctp_bind_addrs(struct connection *con, uint16_t port)
{
struct sockaddr_storage localaddr;
+ struct sockaddr *addr = (struct sockaddr *)&localaddr;
int i, addr_len, result = 0;
for (i = 0; i < dlm_local_count; i++) {
@@ -1006,13 +937,9 @@ static int sctp_bind_addrs(struct connection *con, uint16_t port)
make_sockaddr(&localaddr, port, &addr_len);
if (!i)
- result = kernel_bind(con->sock,
- (struct sockaddr *)&localaddr,
- addr_len);
+ result = kernel_bind(con->sock, addr, addr_len);
else
- result = kernel_setsockopt(con->sock, SOL_SCTP,
- SCTP_SOCKOPT_BINDX_ADD,
- (char *)&localaddr, addr_len);
+ result = sock_bind_add(con->sock->sk, addr, addr_len);
if (result < 0) {
log_print("Can't bind to %d addr number %d, %d.\n",
@@ -1031,17 +958,18 @@ static int sctp_bind_addrs(struct connection *con, uint16_t port)
static void sctp_connect_to_sock(struct connection *con)
{
struct sockaddr_storage daddr;
- int one = 1;
int result;
int addr_len;
struct socket *sock;
- struct timeval tv = { .tv_sec = 5, .tv_usec = 0 };
+ unsigned int mark;
if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled");
return;
}
+ dlm_comm_mark(con->nodeid, &mark);
+
mutex_lock(&con->sock_mutex);
/* Some odd races can cause double-connects, ignore them */
@@ -1066,6 +994,8 @@ static void sctp_connect_to_sock(struct connection *con)
if (result < 0)
goto socket_err;
+ sock_set_mark(sock->sk, mark);
+
con->rx_action = receive_from_sock;
con->connect_action = sctp_connect_to_sock;
add_sock(sock, con);
@@ -1079,21 +1009,17 @@ static void sctp_connect_to_sock(struct connection *con)
log_print("connecting to %d", con->nodeid);
/* Turn off Nagle's algorithm */
- kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
- sizeof(one));
+ sctp_sock_set_nodelay(sock->sk);
/*
* Make sock->ops->connect() function return in specified time,
* since O_NONBLOCK argument in connect() function does not work here,
* then, we should restore the default value of this attribute.
*/
- kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_OLD, (char *)&tv,
- sizeof(tv));
+ sock_set_sndtimeo(sock->sk, 5);
result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
0);
- memset(&tv, 0, sizeof(tv));
- kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_OLD, (char *)&tv,
- sizeof(tv));
+ sock_set_sndtimeo(sock->sk, 0);
if (result == -EINPROGRESS)
result = 0;
@@ -1132,7 +1058,7 @@ static void tcp_connect_to_sock(struct connection *con)
struct sockaddr_storage saddr, src_addr;
int addr_len;
struct socket *sock = NULL;
- int one = 1;
+ unsigned int mark;
int result;
if (con->nodeid == 0) {
@@ -1140,6 +1066,8 @@ static void tcp_connect_to_sock(struct connection *con)
return;
}
+ dlm_comm_mark(con->nodeid, &mark);
+
mutex_lock(&con->sock_mutex);
if (con->retries++ > MAX_CONNECT_RETRIES)
goto out;
@@ -1154,6 +1082,8 @@ static void tcp_connect_to_sock(struct connection *con)
if (result < 0)
goto out_err;
+ sock_set_mark(sock->sk, mark);
+
memset(&saddr, 0, sizeof(saddr));
result = nodeid_to_addr(con->nodeid, &saddr, NULL, false);
if (result < 0) {
@@ -1163,6 +1093,7 @@ static void tcp_connect_to_sock(struct connection *con)
con->rx_action = receive_from_sock;
con->connect_action = tcp_connect_to_sock;
+ con->shutdown_action = dlm_tcp_shutdown;
add_sock(sock, con);
/* Bind to our cluster-known address connecting to avoid
@@ -1181,8 +1112,7 @@ static void tcp_connect_to_sock(struct connection *con)
log_print("connecting to %d", con->nodeid);
/* Turn off Nagle's algorithm */
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
- sizeof(one));
+ tcp_sock_set_nodelay(sock->sk);
result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
O_NONBLOCK);
@@ -1224,7 +1154,6 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
{
struct socket *sock = NULL;
int result = 0;
- int one = 1;
int addr_len;
if (dlm_local_addr[0]->ss_family == AF_INET)
@@ -1240,20 +1169,17 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
goto create_out;
}
+ sock_set_mark(sock->sk, dlm_config.ci_mark);
+
/* Turn off Nagle's algorithm */
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
- sizeof(one));
+ tcp_sock_set_nodelay(sock->sk);
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&one, sizeof(one));
+ sock_set_reuseaddr(sock->sk);
- if (result < 0) {
- log_print("Failed to set SO_REUSEADDR on socket: %d", result);
- }
write_lock_bh(&sock->sk->sk_callback_lock);
sock->sk->sk_user_data = con;
save_listen_callbacks(sock);
- con->rx_action = tcp_accept_from_sock;
+ con->rx_action = accept_from_sock;
con->connect_action = tcp_connect_to_sock;
write_unlock_bh(&sock->sk->sk_callback_lock);
@@ -1267,11 +1193,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
con->sock = NULL;
goto create_out;
}
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&one, sizeof(one));
- if (result < 0) {
- log_print("Set keepalive failed: %d", result);
- }
+ sock_set_keepalive(sock->sk);
result = sock->ops->listen(sock, 5);
if (result < 0) {
@@ -1303,14 +1225,20 @@ static void init_local(void)
}
}
+static void deinit_local(void)
+{
+ int i;
+
+ for (i = 0; i < dlm_local_count; i++)
+ kfree(dlm_local_addr[i]);
+}
+
/* Initialise SCTP socket and bind to all interfaces */
static int sctp_listen_for_all(void)
{
struct socket *sock = NULL;
int result = -EINVAL;
struct connection *con = nodeid2con(0, GFP_NOFS);
- int bufsize = NEEDED_RMEM;
- int one = 1;
if (!con)
return -ENOMEM;
@@ -1324,15 +1252,9 @@ static int sctp_listen_for_all(void)
goto out;
}
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE,
- (char *)&bufsize, sizeof(bufsize));
- if (result)
- log_print("Error increasing buffer space on socket %d", result);
-
- result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
- sizeof(one));
- if (result < 0)
- log_print("Could not set SCTP NODELAY error %d\n", result);
+ sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
+ sock_set_mark(sock->sk, dlm_config.ci_mark);
+ sctp_sock_set_nodelay(sock->sk);
write_lock_bh(&sock->sk->sk_callback_lock);
/* Init con struct */
@@ -1340,7 +1262,7 @@ static int sctp_listen_for_all(void)
save_listen_callbacks(sock);
con->sock = sock;
con->sock->sk->sk_data_ready = lowcomms_data_ready;
- con->rx_action = sctp_accept_from_sock;
+ con->rx_action = accept_from_sock;
con->connect_action = sctp_connect_to_sock;
write_unlock_bh(&sock->sk->sk_callback_lock);
@@ -1543,7 +1465,7 @@ static void send_to_sock(struct connection *con)
send_error:
mutex_unlock(&con->sock_mutex);
- close_connection(con, true, false, true);
+ close_connection(con, false, false, true);
/* Requeue the send work. When the work daemon runs again, it will try
a new connection, then call this function again. */
queue_work(send_workqueue, &con->swork);
@@ -1619,13 +1541,6 @@ static void process_send_sockets(struct work_struct *work)
send_to_sock(con);
}
-
-/* Discard all entries on the write queues */
-static void clean_writequeues(void)
-{
- foreach_conn(clean_one_writequeue);
-}
-
static void work_stop(void)
{
if (recv_workqueue)
@@ -1675,26 +1590,40 @@ static void stop_conn(struct connection *con)
_stop_conn(con, true);
}
+static void shutdown_conn(struct connection *con)
+{
+ if (con->shutdown_action)
+ con->shutdown_action(con);
+}
+
+static void connection_release(struct rcu_head *rcu)
+{
+ struct connection *con = container_of(rcu, struct connection, rcu);
+
+ kfree(con->rx_buf);
+ kfree(con);
+}
+
static void free_conn(struct connection *con)
{
close_connection(con, true, true, true);
- if (con->othercon)
- kmem_cache_free(con_cache, con->othercon);
- hlist_del(&con->list);
- kmem_cache_free(con_cache, con);
+ spin_lock(&connections_lock);
+ hlist_del_rcu(&con->list);
+ spin_unlock(&connections_lock);
+ if (con->othercon) {
+ clean_one_writequeue(con->othercon);
+ call_rcu(&con->othercon->rcu, connection_release);
+ }
+ clean_one_writequeue(con);
+ call_rcu(&con->rcu, connection_release);
}
static void work_flush(void)
{
- int ok;
+ int ok, idx;
int i;
- struct hlist_node *n;
struct connection *con;
- if (recv_workqueue)
- flush_workqueue(recv_workqueue);
- if (send_workqueue)
- flush_workqueue(send_workqueue);
do {
ok = 1;
foreach_conn(stop_conn);
@@ -1702,9 +1631,10 @@ static void work_flush(void)
flush_workqueue(recv_workqueue);
if (send_workqueue)
flush_workqueue(send_workqueue);
+ idx = srcu_read_lock(&connections_srcu);
for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
- hlist_for_each_entry_safe(con, n,
- &connection_hash[i], list) {
+ hlist_for_each_entry_rcu(con, &connection_hash[i],
+ list) {
ok &= test_bit(CF_READ_PENDING, &con->flags);
ok &= test_bit(CF_WRITE_PENDING, &con->flags);
if (con->othercon) {
@@ -1715,6 +1645,7 @@ static void work_flush(void)
}
}
}
+ srcu_read_unlock(&connections_srcu, idx);
} while (!ok);
}
@@ -1723,15 +1654,18 @@ void dlm_lowcomms_stop(void)
/* Set all the flags to prevent any
socket activity.
*/
- mutex_lock(&connections_lock);
dlm_allow_conn = 0;
- mutex_unlock(&connections_lock);
+
+ if (recv_workqueue)
+ flush_workqueue(recv_workqueue);
+ if (send_workqueue)
+ flush_workqueue(send_workqueue);
+
+ foreach_conn(shutdown_conn);
work_flush();
- clean_writequeues();
foreach_conn(free_conn);
work_stop();
-
- kmem_cache_destroy(con_cache);
+ deinit_local();
}
int dlm_lowcomms_start(void)
@@ -1750,16 +1684,9 @@ int dlm_lowcomms_start(void)
goto fail;
}
- error = -ENOMEM;
- con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
- __alignof__(struct connection), 0,
- NULL);
- if (!con_cache)
- goto fail;
-
error = work_start();
if (error)
- goto fail_destroy;
+ goto fail;
dlm_allow_conn = 1;
@@ -1776,12 +1703,8 @@ int dlm_lowcomms_start(void)
fail_unlisten:
dlm_allow_conn = 0;
con = nodeid2con(0,0);
- if (con) {
- close_connection(con, false, true, true);
- kmem_cache_free(con_cache, con);
- }
-fail_destroy:
- kmem_cache_destroy(con_cache);
+ if (con)
+ free_conn(con);
fail:
return error;
}
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 921322d..0bedfa8 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -22,114 +22,85 @@
* into packets and sends them to the comms layer.
*/
+#include <asm/unaligned.h>
+
#include "dlm_internal.h"
#include "lowcomms.h"
#include "config.h"
#include "lock.h"
#include "midcomms.h"
-
-static void copy_from_cb(void *dst, const void *base, unsigned offset,
- unsigned len, unsigned limit)
-{
- unsigned copy = len;
-
- if ((copy + offset) > limit)
- copy = limit - offset;
- memcpy(dst, base + offset, copy);
- len -= copy;
- if (len)
- memcpy(dst + copy, base, len);
-}
-
/*
* Called from the low-level comms layer to process a buffer of
* commands.
- *
- * Only complete messages are processed here, any "spare" bytes from
- * the end of a buffer are saved and tacked onto the front of the next
- * message that comes in. I doubt this will happen very often but we
- * need to be able to cope with it and I don't want the task to be waiting
- * for packets to come in when there is useful work to be done.
*/
-int dlm_process_incoming_buffer(int nodeid, const void *base,
- unsigned offset, unsigned len, unsigned limit)
+int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
{
- union {
- unsigned char __buf[DLM_INBUF_LEN];
- /* this is to force proper alignment on some arches */
- union dlm_packet p;
- } __tmp;
- union dlm_packet *p = &__tmp.p;
- int ret = 0;
- int err = 0;
+ const unsigned char *ptr = buf;
+ const struct dlm_header *hd;
uint16_t msglen;
- uint32_t lockspace;
+ int ret = 0;
- while (len > sizeof(struct dlm_header)) {
+ while (len >= sizeof(struct dlm_header)) {
+ hd = (struct dlm_header *)ptr;
- /* Copy just the header to check the total length. The
- message may wrap around the end of the buffer back to the
- start, so we need to use a temp buffer and copy_from_cb. */
-
- copy_from_cb(p, base, offset, sizeof(struct dlm_header),
- limit);
-
- msglen = le16_to_cpu(p->header.h_length);
- lockspace = p->header.h_lockspace;
-
- err = -EINVAL;
- if (msglen < sizeof(struct dlm_header))
- break;
- if (p->header.h_cmd == DLM_MSG) {
- if (msglen < sizeof(struct dlm_message))
- break;
- } else {
- if (msglen < sizeof(struct dlm_rcom))
- break;
+ /* no message should be more than this otherwise we
+ * cannot deliver this message to upper layers
+ */
+ msglen = get_unaligned_le16(&hd->h_length);
+ if (msglen > DEFAULT_BUFFER_SIZE ||
+ msglen < sizeof(struct dlm_header)) {
+ log_print("received invalid length header: %u from node %d, will abort message parsing",
+ msglen, nodeid);
+ return -EBADMSG;
}
- err = -E2BIG;
- if (msglen > dlm_config.ci_buffer_size) {
- log_print("message size %d from %d too big, buf len %d",
- msglen, nodeid, len);
- break;
- }
- err = 0;
- /* If only part of the full message is contained in this
- buffer, then do nothing and wait for lowcomms to call
- us again later with more data. We return 0 meaning
- we've consumed none of the input buffer. */
-
+ /* caller will take care that leftover
+ * will be parsed next call with more data
+ */
if (msglen > len)
break;
- /* Allocate a larger temp buffer if the full message won't fit
- in the buffer on the stack (which should work for most
- ordinary messages). */
+ switch (hd->h_cmd) {
+ case DLM_MSG:
+ if (msglen < sizeof(struct dlm_message)) {
+ log_print("dlm msg too small: %u, will skip this message",
+ msglen);
+ goto skip;
+ }
- if (msglen > sizeof(__tmp) && p == &__tmp.p) {
- p = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
- if (p == NULL)
- return ret;
+ break;
+ case DLM_RCOM:
+ if (msglen < sizeof(struct dlm_rcom)) {
+ log_print("dlm rcom msg too small: %u, will skip this message",
+ msglen);
+ goto skip;
+ }
+
+ break;
+ default:
+ log_print("unsupported h_cmd received: %u, will skip this message",
+ hd->h_cmd);
+ goto skip;
}
- copy_from_cb(p, base, offset, msglen, limit);
+ /* for aligned memory access, we just copy current message
+ * to begin of the buffer which contains already parsed buffer
+ * data and should provide align access for upper layers
+ * because the start address of the buffer has a aligned
+ * address. This memmove can be removed when the upperlayer
+ * is capable of unaligned memory access.
+ */
+ memmove(buf, ptr, msglen);
+ dlm_receive_buffer((union dlm_packet *)buf, nodeid);
- BUG_ON(lockspace != p->header.h_lockspace);
-
+skip:
ret += msglen;
- offset += msglen;
- offset &= (limit - 1);
len -= msglen;
-
- dlm_receive_buffer(p, nodeid);
+ ptr += msglen;
}
- if (p != &__tmp.p)
- kfree(p);
-
- return err ? err : ret;
+ return ret;
}
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index 2e122e8..61e90a9 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -12,8 +12,7 @@
#ifndef __MIDCOMMS_DOT_H__
#define __MIDCOMMS_DOT_H__
-int dlm_process_incoming_buffer(int nodeid, const void *base, unsigned offset,
- unsigned len, unsigned limit);
+int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int buflen);
#endif /* __MIDCOMMS_DOT_H__ */
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index e7f5503..67f68d4 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -62,7 +62,7 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-static const struct genl_ops dlm_nl_ops[] = {
+static const struct genl_small_ops dlm_nl_ops[] = {
{
.cmd = DLM_CMD_HELLO,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -73,8 +73,8 @@ static const struct genl_ops dlm_nl_ops[] = {
static struct genl_family family __ro_after_init = {
.name = DLM_GENL_NAME,
.version = DLM_GENL_VERSION,
- .ops = dlm_nl_ops,
- .n_ops = ARRAY_SIZE(dlm_nl_ops),
+ .small_ops = dlm_nl_ops,
+ .n_small_ops = ARRAY_SIZE(dlm_nl_ops),
.module = THIS_MODULE,
};
@@ -113,7 +113,7 @@ static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
void dlm_timeout_warn(struct dlm_lkb *lkb)
{
- struct sk_buff *uninitialized_var(send_skb);
+ struct sk_buff *send_skb;
struct dlm_lock_data *data;
size_t size;
int rv;
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index e3d9f72..4daf5dc 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -563,7 +563,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
lock = 1;
reply = 1;
break;
- };
+ }
spin_lock(&ls->ls_recover_lock);
status = ls->ls_recover_status;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 5264bac..e5cefa9 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -46,7 +46,7 @@ struct dlm_lock_params32 {
__u32 bastaddr;
__u32 lksb;
char lvb[DLM_USER_LVB_LEN];
- char name[0];
+ char name[];
};
struct dlm_write_request32 {