Commit a71e3604 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'nfsd-4.8' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Highlights:

   - Trond made a change to the server's tcp logic that allows a fast
     client to better take advantage of high bandwidth networks, but may
     increase the risk that a single client could starve other clients;
     a new sunrpc.svc_rpc_per_connection_limit parameter should help
     mitigate this in the (hopefully unlikely) event this becomes a
     problem in practice.

   - Tom Haynes added a minimal flex-layout pnfs server, which is of no
     use in production for now--don't build it unless you're doing
     client testing or further server development"

* tag 'nfsd-4.8' of git://linux-nfs.org/~bfields/linux: (32 commits)
  nfsd: remove some dead code in nfsd_create_locked()
  nfsd: drop unnecessary MAY_EXEC check from create
  nfsd: clean up bad-type check in nfsd_create_locked
  nfsd: remove unnecessary positive-dentry check
  nfsd: reorganize nfsd_create
  nfsd: check d_can_lookup in fh_verify of directories
  nfsd: remove redundant zero-length check from create
  nfsd: Make creates return EEXIST instead of EACCES
  SUNRPC: Detect immediate closure of accepted sockets
  SUNRPC: accept() may return sockets that are still in SYN_RECV
  nfsd: allow nfsd to advertise multiple layout types
  nfsd: Close race between nfsd4_release_lockowner and nfsd4_lock
  nfsd/blocklayout: Make sure calculate signature/designator length aligned
  xfs: abstract block export operations from nfsd layouts
  SUNRPC: Remove unused callback xpo_adjust_wspace()
  SUNRPC: Change TCP socket space reservation
  SUNRPC: Add a server side per-connection limit
  SUNRPC: Micro optimisation for svc_data_ready
  SUNRPC: Call the default socket callbacks instead of open coding
  SUNRPC: lock the socket while detaching it
  ...
parents d58b0d98 2b118859
......@@ -345,6 +345,7 @@ struct nfs4_client {
u32 cl_exchange_flags;
/* number of rpc's in progress over an associated session: */
atomic_t cl_refcount;
struct nfs4_op_map cl_spo_must_allow;
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
......
......@@ -1135,96 +1135,37 @@ nfsd_check_ignore_resizing(struct iattr *iap)
iap->ia_valid &= ~ATTR_SIZE;
}
/*
* Create a file (regular, directory, device, fifo); UNIX sockets
* not yet implemented.
* If the response fh has been verified, the parent directory should
* already be locked. Note that the parent directory is left locked.
*
* N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
*/
/* The parent directory should already be locked: */
__be32
nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen, struct iattr *iap,
int type, dev_t rdev, struct svc_fh *resfhp)
{
struct dentry *dentry, *dchild = NULL;
struct dentry *dentry, *dchild;
struct inode *dirp;
__be32 err;
__be32 err2;
int host_err;
err = nfserr_perm;
if (!flen)
goto out;
err = nfserr_exist;
if (isdotent(fname, flen))
goto out;
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
err = nfserr_notdir;
if (!dirp->i_op->lookup)
goto out;
/*
* Check whether the response file handle has been verified yet.
* If it has, the parent directory should already be locked.
*/
if (!resfhp->fh_dentry) {
host_err = fh_want_write(fhp);
if (host_err)
goto out_nfserr;
/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
fh_lock_nested(fhp, I_MUTEX_PARENT);
dchild = lookup_one_len(fname, dentry, flen);
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild))
goto out_nfserr;
err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
if (err)
goto out;
} else {
/* called from nfsd_proc_create */
dchild = dget(resfhp->fh_dentry);
if (!fhp->fh_locked) {
/* not actually possible */
printk(KERN_ERR
"nfsd_create: parent %pd2 not locked!\n",
dchild = dget(resfhp->fh_dentry);
if (!fhp->fh_locked) {
WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n",
dentry);
err = nfserr_io;
goto out;
}
}
/*
* Make sure the child dentry is still negative ...
*/
err = nfserr_exist;
if (d_really_is_positive(dchild)) {
dprintk("nfsd_create: dentry %pd/%pd not negative!\n",
dentry, dchild);
goto out;
err = nfserr_io;
goto out;
}
err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE);
if (err)
goto out;
if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0;
iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
err = nfserr_inval;
if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
type);
goto out;
}
/*
* Get the dir op function pointer.
*/
err = 0;
host_err = 0;
switch (type) {
......@@ -1242,6 +1183,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
case S_IFSOCK:
host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
break;
default:
printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
type);
host_err = -EINVAL;
}
if (host_err < 0)
goto out_nfserr;
......@@ -1251,7 +1196,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
/*
* nfsd_create_setattr already committed the child. Transactional
* filesystems had a chance to commit changes for both parent and
* child * simultaneously making the following commit_metadata a
* child simultaneously making the following commit_metadata a
* noop.
*/
err2 = nfserrno(commit_metadata(fhp));
......@@ -1263,8 +1208,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!err)
err = fh_update(resfhp);
out:
if (dchild && !IS_ERR(dchild))
dput(dchild);
dput(dchild);
return err;
out_nfserr:
......@@ -1272,6 +1216,50 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
}
/*
* Create a filesystem object (regular, directory, special).
* Note that the parent directory is left locked.
*
* N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
*/
__be32
nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen, struct iattr *iap,
int type, dev_t rdev, struct svc_fh *resfhp)
{
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
int host_err;
if (isdotent(fname, flen))
return nfserr_exist;
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_NOP);
if (err)
return err;
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
host_err = fh_want_write(fhp);
if (host_err)
return nfserrno(host_err);
fh_lock_nested(fhp, I_MUTEX_PARENT);
dchild = lookup_one_len(fname, dentry, flen);
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild))
return nfserrno(host_err);
err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
if (err) {
dput(dchild);
return err;
}
return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
rdev, resfhp);
}
#ifdef CONFIG_NFSD_V3
/*
......@@ -1304,12 +1292,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
/* Get all the sanity checks out of the way before
* we lock the parent. */
err = nfserr_notdir;
if (!dirp->i_op->lookup)
goto out;
host_err = fh_want_write(fhp);
if (host_err)
goto out_nfserr;
......
......@@ -59,6 +59,9 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
__be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
u64, u64);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
int type, dev_t rdev, struct svc_fh *res);
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
int type, dev_t rdev, struct svc_fh *res);
......
......@@ -59,6 +59,7 @@ struct nfsd4_compound_state {
struct nfsd4_session *session;
struct nfsd4_slot *slot;
int data_offset;
bool spo_must_allowed;
size_t iovlen;
u32 minorversion;
__be32 status;
......@@ -403,6 +404,8 @@ struct nfsd4_exchange_id {
clientid_t clientid;
u32 seqid;
int spa_how;
u32 spo_must_enforce[3];
u32 spo_must_allow[3];
};
struct nfsd4_sequence {
......@@ -654,6 +657,8 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
}
bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
struct nfsd4_compoundargs *);
......
......@@ -121,5 +121,4 @@ xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o
xfs-$(CONFIG_NFSD_SCSILAYOUT) += xfs_pnfs.o
xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
......@@ -246,7 +246,7 @@ const struct export_operations xfs_export_operations = {
.fh_to_parent = xfs_fs_fh_to_parent,
.get_parent = xfs_fs_get_parent,
.commit_metadata = xfs_fs_nfs_commit_metadata,
#ifdef CONFIG_NFSD_BLOCKLAYOUT
#ifdef CONFIG_EXPORTFS_BLOCK_OPS
.get_uuid = xfs_fs_get_uuid,
.map_blocks = xfs_fs_map_blocks,
.commit_blocks = xfs_fs_commit_blocks,
......
#ifndef _XFS_PNFS_H
#define _XFS_PNFS_H 1
#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT)
#ifdef CONFIG_EXPORTFS_BLOCK_OPS
int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset);
int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
struct iomap *iomap, bool write, u32 *device_generation);
......@@ -15,5 +15,5 @@ xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex)
{
return 0;
}
#endif /* CONFIG_NFSD_PNFS */
#endif /* CONFIG_EXPORTFS_BLOCK_OPS */
#endif /* _XFS_PNFS_H */
......@@ -643,4 +643,15 @@ enum pnfs_update_layout_reason {
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET,
};
#define NFS4_OP_MAP_NUM_LONGS \
DIV_ROUND_UP(LAST_NFS4_OP, 8 * sizeof(unsigned long))
#define NFS4_OP_MAP_NUM_WORDS \
(NFS4_OP_MAP_NUM_LONGS * sizeof(unsigned long) / sizeof(u32))
struct nfs4_op_map {
union {
unsigned long longs[NFS4_OP_MAP_NUM_LONGS];
u32 words[NFS4_OP_MAP_NUM_WORDS];
} u;
};
#endif
......@@ -1185,17 +1185,6 @@ struct pnfs_ds_commit_info {
struct pnfs_commit_bucket *buckets;
};
#define NFS4_OP_MAP_NUM_LONGS \
DIV_ROUND_UP(LAST_NFS4_OP, 8 * sizeof(unsigned long))
#define NFS4_OP_MAP_NUM_WORDS \
(NFS4_OP_MAP_NUM_LONGS * sizeof(unsigned long) / sizeof(u32))
struct nfs4_op_map {
union {
unsigned long longs[NFS4_OP_MAP_NUM_LONGS];
u32 words[NFS4_OP_MAP_NUM_WORDS];
} u;
};
struct nfs41_state_protection {
u32 how;
struct nfs4_op_map enforce;
......
......@@ -78,8 +78,6 @@ struct cache_detail {
struct hlist_head * hash_table;
rwlock_t hash_lock;
atomic_t inuse; /* active user-space update or lookup */
char *name;
void (*cache_put)(struct kref *);
......
......@@ -268,6 +268,7 @@ struct svc_rqst {
* cache pages */
#define RQ_VICTIM (5) /* about to be shut down */
#define RQ_BUSY (6) /* request is busy */
#define RQ_DATA (7) /* request has data */
unsigned long rq_flags; /* flags field */
void * rq_argp; /* decoded arguments */
......
......@@ -25,7 +25,6 @@ struct svc_xprt_ops {
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
int (*xpo_secure_port)(struct svc_rqst *);
void (*xpo_adjust_wspace)(struct svc_xprt *);
};
struct svc_xprt_class {
......@@ -69,6 +68,7 @@ struct svc_xprt {
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
atomic_t xpt_nr_rqsts; /* Number of requests */
struct mutex xpt_mutex; /* to serialize sending data */
spinlock_t xpt_lock; /* protects sk_deferred
* and xpt_auth_cache */
......
......@@ -473,6 +473,39 @@ TRACE_EVENT(svc_recv,
show_rqstp_flags(__entry->flags))
);
DECLARE_EVENT_CLASS(svc_rqst_event,
TP_PROTO(struct svc_rqst *rqst),
TP_ARGS(rqst),
TP_STRUCT__entry(
__field(__be32, xid)
__field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
),
TP_fast_assign(
__entry->xid = rqst->rq_xid;
__entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr),
&rqst->rq_addr, rqst->rq_addrlen);
),
TP_printk("addr=%pIScp rq_xid=0x%x flags=%s",
(struct sockaddr *)__get_dynamic_array(addr),
be32_to_cpu(__entry->xid),
show_rqstp_flags(__entry->flags))
);
DEFINE_EVENT(svc_rqst_event, svc_defer,
TP_PROTO(struct svc_rqst *rqst),
TP_ARGS(rqst));
DEFINE_EVENT(svc_rqst_event, svc_drop,
TP_PROTO(struct svc_rqst *rqst),
TP_ARGS(rqst));
DECLARE_EVENT_CLASS(svc_rqst_status,
TP_PROTO(struct svc_rqst *rqst, int status),
......@@ -529,45 +562,67 @@ TRACE_EVENT(svc_xprt_do_enqueue,
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field_struct(struct sockaddr_storage, ss)
__field(int, pid)
__field(unsigned long, flags)
__dynamic_array(unsigned char, addr, xprt != NULL ?
xprt->xpt_remotelen : 0)
),
TP_fast_assign(
__entry->xprt = xprt;
xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
__entry->pid = rqst? rqst->rq_task->pid : 0;
__entry->flags = xprt ? xprt->xpt_flags : 0;
if (xprt) {
memcpy(__get_dynamic_array(addr),
&xprt->xpt_remote,
xprt->xpt_remotelen);
__entry->flags = xprt->xpt_flags;
} else
__entry->flags = 0;
),
TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
(struct sockaddr *)&__entry->ss,
__get_dynamic_array_len(addr) != 0 ?
(struct sockaddr *)__get_dynamic_array(addr) : NULL,
__entry->pid, show_svc_xprt_flags(__entry->flags))
);
TRACE_EVENT(svc_xprt_dequeue,
DECLARE_EVENT_CLASS(svc_xprt_event,
TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field_struct(struct sockaddr_storage, ss)
__field(unsigned long, flags)
__dynamic_array(unsigned char, addr, xprt != NULL ?
xprt->xpt_remotelen : 0)
),
TP_fast_assign(
__entry->xprt = xprt,
xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
__entry->flags = xprt ? xprt->xpt_flags : 0;
__entry->xprt = xprt;
if (xprt) {
memcpy(__get_dynamic_array(addr),
&xprt->xpt_remote,
xprt->xpt_remotelen);
__entry->flags = xprt->xpt_flags;
} else
__entry->flags = 0;
),
TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt,
(struct sockaddr *)&__entry->ss,
__get_dynamic_array_len(addr) != 0 ?
(struct sockaddr *)__get_dynamic_array(addr) : NULL,
show_svc_xprt_flags(__entry->flags))
);
DEFINE_EVENT(svc_xprt_event, svc_xprt_dequeue,
TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt));
DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space,
TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt));
TRACE_EVENT(svc_wake_up,
TP_PROTO(int pid),
......@@ -592,21 +647,56 @@ TRACE_EVENT(svc_handle_xprt,
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field(int, len)
__field_struct(struct sockaddr_storage, ss)
__field(unsigned long, flags)
__dynamic_array(unsigned char, addr, xprt != NULL ?
xprt->xpt_remotelen : 0)
),
TP_fast_assign(
__entry->xprt = xprt;
xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
__entry->len = len;
__entry->flags = xprt ? xprt->xpt_flags : 0;
if (xprt) {
memcpy(__get_dynamic_array(addr),
&xprt->xpt_remote,
xprt->xpt_remotelen);
__entry->flags = xprt->xpt_flags;
} else
__entry->flags = 0;
),
TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
(struct sockaddr *)&__entry->ss,
__get_dynamic_array_len(addr) != 0 ?
(struct sockaddr *)__get_dynamic_array(addr) : NULL,
__entry->len, show_svc_xprt_flags(__entry->flags))
);
DECLARE_EVENT_CLASS(svc_deferred_event,
TP_PROTO(struct svc_deferred_req *dr),
TP_ARGS(dr),
TP_STRUCT__entry(
__field(__be32, xid)
__dynamic_array(unsigned char, addr, dr->addrlen)
),
TP_fast_assign(
__entry->xid = *(__be32 *)(dr->args + (dr->xprt_hlen>>2));
memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen);
),
TP_printk("addr=%pIScp xid=0x%x",
(struct sockaddr *)__get_dynamic_array(addr),
be32_to_cpu(__entry->xid))
);
DEFINE_EVENT(svc_deferred_event, svc_drop_deferred,
TP_PROTO(struct svc_deferred_req *dr),
TP_ARGS(dr));
DEFINE_EVENT(svc_deferred_event, svc_revisit_deferred,
TP_PROTO(struct svc_deferred_req *dr),
TP_ARGS(dr));
#endif /* _TRACE_SUNRPC_H */
#include <trace/define_trace.h>
......@@ -1230,8 +1230,9 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
if (status)
goto out;
dprintk("RPC: svcauth_gss: gss major status = %d\n",
ud.major_status);
dprintk("RPC: svcauth_gss: gss major status = %d "
"minor status = %d\n",
ud.major_status, ud.minor_status);
switch (ud.major_status) {
case GSS_S_CONTINUE_NEEDED:
......
......@@ -362,7 +362,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
cache_purge(cd);
spin_lock(&cache_list_lock);
write_lock(&cd->hash_lock);
if (cd->entries || atomic_read(&cd->inuse)) {
if (cd->entries) {
write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
goto out;
......
......@@ -21,6 +21,10 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static unsigned int svc_rpc_per_connection_limit __read_mostly;
module_param(svc_rpc_per_connection_limit, uint, 0644);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
......@@ -329,12 +333,45 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
}
EXPORT_SYMBOL_GPL(svc_print_addr);
static bool svc_xprt_slots_in_range(struct svc_xprt *xprt)
{
unsigned int limit = svc_rpc_per_connection_limit;
int nrqsts = atomic_read(&xprt->xpt_nr_rqsts);
return limit == 0 || (nrqsts >= 0 && nrqsts < limit);
}
static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt)
{
if (!test_bit(RQ_DATA, &rqstp->rq_flags)) {
if (!svc_xprt_slots_in_range(xprt))
return false;
atomic_inc(&xprt->xpt_nr_rqsts);
set_bit(RQ_DATA, &rqstp->rq_flags);
}
return true;
}
static void svc_xprt_release_slot(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
atomic_dec(&xprt->xpt_nr_rqsts);
svc_xprt_enqueue(xprt);
}
}
static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
{
if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
return true;
if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED)))
return xprt->xpt_ops->xpo_has_wspace(xprt);
if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
svc_xprt_slots_in_range(xprt))
return true;
trace_svc_xprt_no_write_space(xprt);
return false;
}
return false;
}
......@@ -480,8 +517,6 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
if (xprt->xpt_ops->xpo_adjust_wspace)
xprt->xpt_ops->xpo_adjust_wspace(xprt);
svc_xprt_enqueue(xprt);
}
}
......@@ -512,8 +547,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
rqstp->rq_res.head[0].iov_len = 0;
svc_reserve(rqstp, 0);
svc_xprt_release_slot(rqstp);
rqstp->rq_xprt = NULL;
svc_xprt_put(xprt);
}
......@@ -781,7 +816,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_add_new_temp_xprt(serv, newxpt);
else
module_put(xprt->xpt_class->xcl_owner);
} else {
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
......@@ -871,6 +906,7 @@ EXPORT_SYMBOL_GPL(svc_recv);
*/
void svc_drop(struct svc_rqst *rqstp)
{
trace_svc_drop(rqstp);
dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
svc_xprt_release(rqstp);
}
......@@ -1148,6 +1184,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
spin_unlock(&xprt->xpt_lock);
dprintk("revisit canceled\n");
svc_xprt_put(xprt);
trace_svc_drop_deferred(dr);
kfree(dr);
return;
}
......@@ -1205,6 +1242,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
trace_svc_defer(rqstp);
return &dr->handle;
}
......@@ -1245,6 +1283,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
struct svc_deferred_req,
handle.recent);
list_del_init(&dr->handle.recent);
trace_svc_revisit_deferred(dr);
} else
clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
spin_unlock(&xprt->xpt_lock);
......
......@@ -60,7 +60,6 @@
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int flags);
static void svc_udp_data_ready(struct sock *);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
static void svc_sock_detach(struct svc_xprt *);
......@@ -398,48 +397,21 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp)
return svc_port_is_privileged(svc_addr(rqstp));
}
static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
{
if (!wq)
return false;
/*
* There should normally be a memory * barrier here--see
* wq_has_sleeper().
*
* It appears that isn't currently necessary, though, basically
* because callers all appear to have sufficient memory barriers
* between the time the relevant change is made and the
* time they call these callbacks.
*
* The nfsd code itself doesn't actually explicitly wait on
* these waitqueues, but it may wait on them for example in
* sendpage() or sendmsg() calls. (And those may be the only
* places, since it it uses nonblocking reads.)
*
* Maybe we should add the memory barriers anyway, but these are
* hot paths so we'd need to be convinced there's no sigificant
* penalty.
*/
return waitqueue_active(wq);
}
/*
* INET callback when data has been received on the socket.
*/
static void svc_udp_data_ready(struct sock *sk)
static void svc_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq = sk_sleep(sk);
if (svsk) {
dprintk("svc: socket %p(inet %p), busy=%d\n",
svsk, sk,
test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
svsk->sk_odata(sk);
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
}
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
}
/*
......@@ -448,56 +420,22 @@ static void svc_udp_data_ready(struct sock *sk)
static void svc_write_space(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
wait_queue_head_t *wq = sk_sleep(sk);
if (svsk) {
dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt);
}
if (sunrpc_waitqueue_active(wq)) {
dprintk("RPC svc_write_space: someone sleeping on %p\n",
svsk);
wake_up_interruptible(wq);
}
}
static int svc_tcp_has_wspace(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
int required;
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
return 1;
required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
if (sk_stream_wspace(svsk->sk_sk) >= required ||
(sk_stream_min_wspace(svsk->sk_sk) == 0 &&
atomic_read(&xprt->xpt_reserved) == 0))
return 1;
set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
return 0;
}
static void svc_tcp_write_space(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
struct socket *sock = sk->sk_socket;
if (!sk_stream_is_writeable(sk) || !sock)
return;
if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt))
clear_bit(SOCK_NOSPACE, &sock->flags);
svc_write_space(sk);
}
static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
if (svc_tcp_has_wspace(xprt))
clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
}
/*
......@@ -746,7 +684,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
&svsk->sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
svsk->sk_sk->sk_data_ready = svc_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
/* initialise setting must have enough space to
......@@ -786,11 +724,12 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq;
dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->sk_state);
if (svsk)
svsk->sk_odata(sk);
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
......@@ -808,10 +747,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
} else
printk("svc: socket %p: no user data\n", sk);
}
wq = sk_sleep(sk);
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible_all(wq);
}
/*
......@@ -820,7 +755,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
static void svc_tcp_state_change(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq = sk_sleep(sk);
dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
sk, sk->sk_state, sk->sk_user_data);
......@@ -828,26 +762,12 @@ static void svc_tcp_state_change(struct sock *sk)
if (!svsk)
printk("svc: socket %p: no user data\n", sk);
else {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible_all(wq);
}
static void svc_tcp_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq = sk_sleep(sk);
dprintk("svc: socket %p TCP data ready (svsk %p)\n",
sk, sk->sk_user_data);
if (svsk) {
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
svsk->sk_ostate(sk);
if (sk->sk_state != TCP_ESTABLISHED) {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
}
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
}
/*
......@@ -901,6 +821,11 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
dprintk("%s: connect from %s\n", serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf)));
/* Reset the inherited callbacks before calling svc_setup_socket */
newsock->sk->sk_state_change = svsk->sk_ostate;
newsock->sk->sk_data_ready = svsk->sk_odata;
newsock->sk->sk_write_space = svsk->sk_owspace;
/* make sure that a write doesn't block forever when
* low on memory
*/
......@@ -1317,7 +1242,6 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
.xpo_adjust_wspace = svc_tcp_adjust_wspace,
};
static struct svc_xprt_class svc_tcp_class = {
......@@ -1357,8 +1281,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
} else {
dprintk("setting up TCP socket for reading\n");
sk->sk_state_change = svc_tcp_state_change;
sk->sk_data_ready = svc_tcp_data_ready;
sk->sk_write_space = svc_tcp_write_space;
sk->sk_data_ready = svc_data_ready;
sk->sk_write_space = svc_write_space;
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
......@@ -1368,8 +1292,13 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state != TCP_ESTABLISHED)
switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
break;
default:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
}
}
}
......@@ -1428,17 +1357,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
svc_udp_init(svsk, serv);
else {
/* initialise setting must have enough space to
* receive and respond to one request.
*/
svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
4 * serv->sv_max_mesg);
else
svc_tcp_init(svsk, serv);
}
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
dprintk("svc: svc_setup_socket created %p (inet %p), "
"listen %d close %d\n",
svsk, svsk->sk_sk,
test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
return svsk;
}
......@@ -1606,18 +1532,16 @@ static void svc_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct sock *sk = svsk->sk_sk;
wait_queue_head_t *wq;
dprintk("svc: svc_sock_detach(%p)\n", svsk);
/* put back the old socket callbacks */
lock_sock(sk);
sk->sk_state_change = svsk->sk_ostate;
sk->sk_data_ready = svsk->sk_odata;
sk->sk_write_space = svsk->sk_owspace;
wq = sk_sleep(sk);
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
sk->sk_user_data = NULL;
release_sock(sk);
}
/*
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment