Commit d484467c authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'xfs-4.12-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs updates from Darrick Wong:
 "Here are the XFS changes for 4.12. The big new feature for this
  release is the new space mapping ioctl that we've been discussing
  since LSF2016, but other than that most of the patches are larger bug
  fixes, memory corruption prevention, and other cleanups.

  Summary:
   - various code cleanups
   - introduce GETFSMAP ioctl
   - various refactoring
   - avoid dio reads past eof
   - fix memory corruption and other errors with fragmented directory blocks
   - fix accidental userspace memory corruptions
   - publish fs uuid in superblock
   - make fstrim terminatable
   - fix race between quotaoff and in-core inode creation
   - avoid use-after-free when finishing up w/ buffer heads
   - reserve enough space to handle bmap tree resizing during cow remap"

* tag 'xfs-4.12-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (53 commits)
  xfs: fix use-after-free in xfs_finish_page_writeback
  xfs: reserve enough blocks to handle btree splits when remapping
  xfs: wait on new inodes during quotaoff dquot release
  xfs: update ag iterator to support wait on new inodes
  xfs: support ability to wait on new inodes
  xfs: publish UUID in struct super_block
  xfs: Allow user to kill fstrim process
  xfs: better log intent item refcount checking
  xfs: fix up quotacheck buffer list error handling
  xfs: remove xfs_trans_ail_delete_bulk
  xfs: don't use bool values in trace buffers
  xfs: fix getfsmap userspace memory corruption while setting OF_LAST
  xfs: fix __user annotations for xfs_ioc_getfsmap
  xfs: corruption needs to respect endianess too!
  xfs: use NULL instead of 0 to initialize a pointer in xfs_ioc_getfsmap
  xfs: use NULL instead of 0 to initialize a pointer in xfs_getfsmap
  xfs: simplify validation of the unwritten extent bit
  xfs: remove unused values from xfs_exntst_t
  xfs: remove the unused XFS_MAXLINK_1 define
  xfs: more do_div cleanups
  ...
parents 044f1daa 161f55ef
......@@ -73,6 +73,10 @@ xfs_uuid_mount(
uuid_t *uuid = &mp->m_sb.sb_uuid;
int hole, i;
/* Publish UUID in struct super_block */
BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t));
memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t));
if (mp->m_flags & XFS_MOUNT_NOUUID)
return 0;
......
......@@ -183,6 +183,7 @@ typedef struct xfs_mount {
struct workqueue_struct *m_reclaim_workqueue;
struct workqueue_struct *m_log_workqueue;
struct workqueue_struct *m_eofblocks_workqueue;
struct workqueue_struct *m_sync_workqueue;
/*
* Generation of the filesysyem layout. This is incremented by each
......@@ -312,7 +313,7 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
static inline xfs_agnumber_t
xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
{
xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d);
xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
do_div(ld, mp->m_sb.sb_agblocks);
return (xfs_agnumber_t) ld;
}
......@@ -320,7 +321,7 @@ xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
static inline xfs_agblock_t
xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
{
xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d);
xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
......
......@@ -851,8 +851,8 @@ xfs_qm_reset_dqcounts(
* started afresh by xfs_qm_quotacheck.
*/
#ifdef DEBUG
j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
do_div(j, sizeof(xfs_dqblk_t));
j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) /
sizeof(xfs_dqblk_t);
ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
#endif
dqb = bp->b_addr;
......@@ -1384,12 +1384,7 @@ xfs_qm_quotacheck(
mp->m_qflags |= flags;
error_return:
while (!list_empty(&buffer_list)) {
struct xfs_buf *bp =
list_first_entry(&buffer_list, struct xfs_buf, b_list);
list_del_init(&bp->b_list);
xfs_buf_relse(bp);
}
xfs_buf_delwri_cancel(&buffer_list);
if (error) {
xfs_warn(mp,
......
......@@ -759,5 +759,6 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL);
xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL,
XFS_AGITER_INEW_WAIT);
}
......@@ -221,6 +221,7 @@ void
xfs_cui_release(
struct xfs_cui_log_item *cuip)
{
ASSERT(atomic_read(&cuip->cui_refcount) > 0);
if (atomic_dec_and_test(&cuip->cui_refcount)) {
xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
xfs_cui_item_free(cuip);
......
......@@ -206,11 +206,7 @@ xfs_reflink_trim_around_shared(
int error = 0;
/* Holes, unwritten, and delalloc extents cannot be shared */
if (!xfs_is_reflink_inode(ip) ||
ISUNWRITTEN(irec) ||
irec->br_startblock == HOLESTARTBLOCK ||
irec->br_startblock == DELAYSTARTBLOCK ||
isnullstartblock(irec->br_startblock)) {
if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_real_extent(irec)) {
*shared = false;
return 0;
}
......@@ -709,8 +705,22 @@ xfs_reflink_end_cow(
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
/* Start a rolling transaction to switch the mappings */
resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
/*
* Start a rolling transaction to switch the mappings. We're
* unlikely ever to have to remap 16T worth of single-block
* extents, so just cap the worst case extent count to 2^32-1.
* Stick a warning in just in case, and avoid 64-bit division.
*/
BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX);
if (end_fsb - offset_fsb > UINT_MAX) {
error = -EFSCORRUPTED;
xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE);
ASSERT(0);
goto out;
}
resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount,
(unsigned int)(end_fsb - offset_fsb),
XFS_DATA_FORK);
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
resblks, 0, 0, &tp);
if (error)
......@@ -1045,12 +1055,12 @@ xfs_reflink_remap_extent(
xfs_off_t new_isize)
{
struct xfs_mount *mp = ip->i_mount;
bool real_extent = xfs_bmap_is_real_extent(irec);
struct xfs_trans *tp;
xfs_fsblock_t firstfsb;
unsigned int resblks;
struct xfs_defer_ops dfops;
struct xfs_bmbt_irec uirec;
bool real_extent;
xfs_filblks_t rlen;
xfs_filblks_t unmap_len;
xfs_off_t newlen;
......@@ -1059,11 +1069,6 @@ xfs_reflink_remap_extent(
unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
trace_xfs_reflink_punch_range(ip, destoff, unmap_len);
/* Only remap normal extents. */
real_extent = (irec->br_startblock != HOLESTARTBLOCK &&
irec->br_startblock != DELAYSTARTBLOCK &&
!ISUNWRITTEN(irec));
/* No reflinking if we're low on space */
if (real_extent) {
error = xfs_reflink_ag_has_free_space(mp,
......@@ -1359,9 +1364,7 @@ xfs_reflink_dirty_extents(
goto out;
if (nmaps == 0)
break;
if (map[0].br_startblock == HOLESTARTBLOCK ||
map[0].br_startblock == DELAYSTARTBLOCK ||
ISUNWRITTEN(&map[0]))
if (!xfs_bmap_is_real_extent(&map[0]))
goto next;
map[1] = map[0];
......@@ -1435,9 +1438,7 @@ xfs_reflink_clear_inode_flag(
return error;
if (nmaps == 0)
break;
if (map.br_startblock == HOLESTARTBLOCK ||
map.br_startblock == DELAYSTARTBLOCK ||
ISUNWRITTEN(&map))
if (!xfs_bmap_is_real_extent(&map))
goto next;
agno = XFS_FSB_TO_AGNO(mp, map.br_startblock);
......
......@@ -243,6 +243,7 @@ void
xfs_rui_release(
struct xfs_rui_log_item *ruip)
{
ASSERT(atomic_read(&ruip->rui_refcount) > 0);
if (atomic_dec_and_test(&ruip->rui_refcount)) {
xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
xfs_rui_item_free(ruip);
......
......@@ -23,6 +23,16 @@
struct xfs_mount;
struct xfs_trans;
struct xfs_rtalloc_rec {
xfs_rtblock_t ar_startblock;
xfs_rtblock_t ar_blockcount;
};
typedef int (*xfs_rtalloc_query_range_fn)(
struct xfs_trans *tp,
struct xfs_rtalloc_rec *rec,
void *priv);
#ifdef CONFIG_XFS_RT
/*
* Function prototypes for exported functions.
......@@ -118,13 +128,21 @@ int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtblock_t start, xfs_extlen_t len,
struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
int xfs_rtalloc_query_range(struct xfs_trans *tp,
struct xfs_rtalloc_rec *low_rec,
struct xfs_rtalloc_rec *high_rec,
xfs_rtalloc_query_range_fn fn,
void *priv);
int xfs_rtalloc_query_all(struct xfs_trans *tp,
xfs_rtalloc_query_range_fn fn,
void *priv);
#else
# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
# define xfs_growfs_rt(mp,in) (ENOSYS)
# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS)
# define xfs_rtalloc_query_all(t,f,p) (ENOSYS)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
......
......@@ -877,8 +877,15 @@ xfs_init_mount_workqueues(
if (!mp->m_eofblocks_workqueue)
goto out_destroy_log;
mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
mp->m_fsname);
if (!mp->m_sync_workqueue)
goto out_destroy_eofb;
return 0;
out_destroy_eofb:
destroy_workqueue(mp->m_eofblocks_workqueue);
out_destroy_log:
destroy_workqueue(mp->m_log_workqueue);
out_destroy_reclaim:
......@@ -899,6 +906,7 @@ STATIC void
xfs_destroy_mount_workqueues(
struct xfs_mount *mp)
{
destroy_workqueue(mp->m_sync_workqueue);
destroy_workqueue(mp->m_eofblocks_workqueue);
destroy_workqueue(mp->m_log_workqueue);
destroy_workqueue(mp->m_reclaim_workqueue);
......
......@@ -47,6 +47,7 @@
#include "xfs_inode_item.h"
#include "xfs_bmap_btree.h"
#include "xfs_filestream.h"
#include "xfs_fsmap.h"
/*
* We include this last to have the helpers above available for the trace
......
......@@ -40,6 +40,8 @@ struct xfs_inode_log_format;
struct xfs_bmbt_irec;
struct xfs_btree_cur;
struct xfs_refcount_irec;
struct xfs_fsmap;
struct xfs_rmap_irec;
DECLARE_EVENT_CLASS(xfs_attr_list_class,
TP_PROTO(struct xfs_attr_list_context *ctx),
......@@ -2190,7 +2192,7 @@ DECLARE_EVENT_CLASS(xfs_discard_class,
__entry->agbno = agbno;
__entry->len = len;
),
TP_printk("dev %d:%d agno %u agbno %u len %u\n",
TP_printk("dev %d:%d agno %u agbno %u len %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
......@@ -2253,8 +2255,8 @@ DECLARE_EVENT_CLASS(xfs_defer_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(void *, dop)
__field(bool, committed)
__field(bool, low)
__field(char, committed)
__field(char, low)
),
TP_fast_assign(
__entry->dev = mp ? mp->m_super->s_dev : 0;
......@@ -2262,7 +2264,7 @@ DECLARE_EVENT_CLASS(xfs_defer_class,
__entry->committed = dop->dop_committed;
__entry->low = dop->dop_low;
),
TP_printk("dev %d:%d ops %p committed %d low %d\n",
TP_printk("dev %d:%d ops %p committed %d low %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->dop,
__entry->committed,
......@@ -2279,8 +2281,8 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(void *, dop)
__field(bool, committed)
__field(bool, low)
__field(char, committed)
__field(char, low)
__field(int, error)
),
TP_fast_assign(
......@@ -2290,7 +2292,7 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class,
__entry->low = dop->dop_low;
__entry->error = error;
),
TP_printk("dev %d:%d ops %p committed %d low %d err %d\n",
TP_printk("dev %d:%d ops %p committed %d low %d err %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->dop,
__entry->committed,
......@@ -2309,7 +2311,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
__field(dev_t, dev)
__field(int, type)
__field(void *, intent)
__field(bool, committed)
__field(char, committed)
__field(int, nr)
),
TP_fast_assign(
......@@ -2319,7 +2321,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
__entry->committed = dfp->dfp_done != NULL;
__entry->nr = dfp->dfp_count;
),
TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n",
TP_printk("dev %d:%d optype %d intent %p committed %d nr %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->intent,
......@@ -2614,7 +2616,8 @@ DECLARE_EVENT_CLASS(xfs_ag_resv_class,
__entry->asked = r ? r->ar_asked : 0;
__entry->len = len;
),
TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u resv %u ask %u len %u\n",
TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u "
"resv %u ask %u len %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->resv,
......@@ -2667,7 +2670,7 @@ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class,
__entry->agbno = agbno;
__entry->dir = dir;
),
TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)\n",
TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
......@@ -2700,7 +2703,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
__entry->blockcount = irec->rc_blockcount;
__entry->refcount = irec->rc_refcount;
),
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u\n",
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->startblock,
......@@ -2735,7 +2738,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
__entry->refcount = irec->rc_refcount;
__entry->agbno = agbno;
),
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u\n",
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->startblock,
......@@ -2776,7 +2779,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
__entry->i2_refcount = i2->rc_refcount;
),
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
"agbno %u len %u refcount %u\n",
"agbno %u len %u refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->i1_startblock,
......@@ -2822,7 +2825,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
__entry->agbno = agbno;
),
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
"agbno %u len %u refcount %u @ agbno %u\n",
"agbno %u len %u refcount %u @ agbno %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->i1_startblock,
......@@ -2875,7 +2878,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
),
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
"agbno %u len %u refcount %u -- "
"agbno %u len %u refcount %u\n",
"agbno %u len %u refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->i1_startblock,
......@@ -3001,31 +3004,6 @@ DEFINE_EVENT(xfs_inode_error_class, name, \
unsigned long caller_ip), \
TP_ARGS(ip, error, caller_ip))
/* reflink allocator */
TRACE_EVENT(xfs_bmap_remap_alloc,
TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno,
xfs_extlen_t len),
TP_ARGS(ip, fsbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_fsblock_t, fsbno)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->fsbno = fsbno;
__entry->len = len;
),
TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->fsbno,
__entry->len)
);
DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error);
/* reflink tracepoint classes */
/* two-file io tracepoint class */
......@@ -3227,7 +3205,7 @@ TRACE_EVENT(xfs_ioctl_clone,
),
TP_printk("dev %d:%d "
"ino 0x%lx isize 0x%llx -> "
"ino 0x%lx isize 0x%llx\n",
"ino 0x%lx isize 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->src_ino,
__entry->src_isize,
......@@ -3267,6 +3245,88 @@ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
/* fsmap traces */
DECLARE_EVENT_CLASS(xfs_fsmap_class,
TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno,
struct xfs_rmap_irec *rmap),
TP_ARGS(mp, keydev, agno, rmap),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(dev_t, keydev)
__field(xfs_agnumber_t, agno)
__field(xfs_fsblock_t, bno)
__field(xfs_filblks_t, len)
__field(__uint64_t, owner)
__field(__uint64_t, offset)
__field(unsigned int, flags)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->keydev = new_decode_dev(keydev);
__entry->agno = agno;
__entry->bno = rmap->rm_startblock;
__entry->len = rmap->rm_blockcount;
__entry->owner = rmap->rm_owner;
__entry->offset = rmap->rm_offset;
__entry->flags = rmap->rm_flags;
),
TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset %llu flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
MAJOR(__entry->keydev), MINOR(__entry->keydev),
__entry->agno,
__entry->bno,
__entry->len,
__entry->owner,
__entry->offset,
__entry->flags)
)
#define DEFINE_FSMAP_EVENT(name) \
DEFINE_EVENT(xfs_fsmap_class, name, \
TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \
struct xfs_rmap_irec *rmap), \
TP_ARGS(mp, keydev, agno, rmap))
DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
DECLARE_EVENT_CLASS(xfs_getfsmap_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),
TP_ARGS(mp, fsmap),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(dev_t, keydev)
__field(xfs_daddr_t, block)
__field(xfs_daddr_t, len)
__field(__uint64_t, owner)
__field(__uint64_t, offset)
__field(__uint64_t, flags)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->keydev = new_decode_dev(fsmap->fmr_device);
__entry->block = fsmap->fmr_physical;
__entry->len = fsmap->fmr_length;
__entry->owner = fsmap->fmr_owner;
__entry->offset = fsmap->fmr_offset;
__entry->flags = fsmap->fmr_flags;
),
TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
MAJOR(__entry->keydev), MINOR(__entry->keydev),
__entry->block,
__entry->len,
__entry->owner,
__entry->offset,
__entry->flags)
)
#define DEFINE_GETFSMAP_EVENT(name) \
DEFINE_EVENT(xfs_getfsmap_class, name, \
TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), \
TP_ARGS(mp, fsmap))
DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
......
......@@ -262,6 +262,28 @@ xfs_trans_alloc(
return 0;
}
/*
* Create an empty transaction with no reservation. This is a defensive
* mechanism for routines that query metadata without actually modifying
* them -- if the metadata being queried is somehow cross-linked (think a
* btree block pointer that points higher in the tree), we risk deadlock.
* However, blocks grabbed as part of a transaction can be re-grabbed.
* The verifiers will notice the corrupt block and the operation will fail
* back to userspace without deadlocking.
*
* Note the zero-length reservation; this transaction MUST be cancelled
* without any dirty data.
*/
int
xfs_trans_alloc_empty(
struct xfs_mount *mp,
struct xfs_trans **tpp)
{
struct xfs_trans_res resv = {0};
return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
}
/*
* Record the indicated change to the given field for application
* to the file system's superblock when the transaction commits.
......@@ -1012,17 +1034,14 @@ xfs_trans_cancel(
* chunk we've been working on and get a new transaction to continue.
*/
int
__xfs_trans_roll(
xfs_trans_roll(
struct xfs_trans **tpp,
struct xfs_inode *dp,
int *committed)
struct xfs_inode *dp)
{
struct xfs_trans *trans;
struct xfs_trans_res tres;
int error;
*committed = 0;
/*
* Ensure that the inode is always logged.
*/
......@@ -1048,7 +1067,6 @@ __xfs_trans_roll(
if (error)
return error;
*committed = 1;
trans = *tpp;
/*
......@@ -1071,12 +1089,3 @@ __xfs_trans_roll(
xfs_trans_ijoin(trans, dp, 0);
return 0;
}
int
xfs_trans_roll(
struct xfs_trans **tpp,
struct xfs_inode *dp)
{
int committed;
return __xfs_trans_roll(tpp, dp, &committed);
}
......@@ -158,6 +158,8 @@ typedef struct xfs_trans {
int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
uint blocks, uint rtextents, uint flags,
struct xfs_trans **tpp);
int xfs_trans_alloc_empty(struct xfs_mount *mp,
struct xfs_trans **tpp);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
......@@ -226,7 +228,6 @@ int xfs_trans_free_extent(struct xfs_trans *,
struct xfs_efd_log_item *, xfs_fsblock_t,
xfs_extlen_t, struct xfs_owner_info *);
int xfs_trans_commit(struct xfs_trans *);
int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *);
int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
void xfs_trans_cancel(xfs_trans_t *);
int xfs_trans_ail_init(struct xfs_mount *);
......
......@@ -684,8 +684,23 @@ xfs_trans_ail_update_bulk(
}
}
/*
* xfs_trans_ail_delete_bulk - remove multiple log items from the AIL
bool
xfs_ail_delete_one(
struct xfs_ail *ailp,
struct xfs_log_item *lip)
{
struct xfs_log_item *mlip = xfs_ail_min(ailp);
trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
xfs_ail_delete(ailp, lip);
lip->li_flags &= ~XFS_LI_IN_AIL;
lip->li_lsn = 0;
return mlip == lip;
}
/**
* Remove a log items from the AIL
*
* @xfs_trans_ail_delete_bulk takes an array of log items that all need to
* removed from the AIL. The caller is already holding the AIL lock, and done
......@@ -706,52 +721,36 @@ xfs_trans_ail_update_bulk(
* before returning.
*/
void
xfs_trans_ail_delete_bulk(
xfs_trans_ail_delete(
struct xfs_ail *ailp,
struct xfs_log_item **log_items,
int nr_items,
struct xfs_log_item *lip,
int shutdown_type) __releases(ailp->xa_lock)
{
xfs_log_item_t *mlip;
int mlip_changed = 0;
int i;
struct xfs_mount *mp = ailp->xa_mount;
bool mlip_changed;
mlip = xfs_ail_min(ailp);
for (i = 0; i < nr_items; i++) {
struct xfs_log_item *lip = log_items[i];
if (!(lip->li_flags & XFS_LI_IN_AIL)) {
struct xfs_mount *mp = ailp->xa_mount;
spin_unlock(&ailp->xa_lock);
if (!XFS_FORCED_SHUTDOWN(mp)) {
xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
"%s: attempting to delete a log item that is not in the AIL",
__func__);
xfs_force_shutdown(mp, shutdown_type);
}
return;
if (!(lip->li_flags & XFS_LI_IN_AIL)) {
spin_unlock(&ailp->xa_lock);
if (!XFS_FORCED_SHUTDOWN(mp)) {
xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
"%s: attempting to delete a log item that is not in the AIL",
__func__);
xfs_force_shutdown(mp, shutdown_type);
}
trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
xfs_ail_delete(ailp, lip);
lip->li_flags &= ~XFS_LI_IN_AIL;
lip->li_lsn = 0;
if (mlip == lip)
mlip_changed = 1;
return;
}
mlip_changed = xfs_ail_delete_one(ailp, lip);
if (mlip_changed) {
if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
xlog_assign_tail_lsn_locked(ailp->xa_mount);
if (!XFS_FORCED_SHUTDOWN(mp))
xlog_assign_tail_lsn_locked(mp);
if (list_empty(&ailp->xa_ail))
wake_up_all(&ailp->xa_empty);
spin_unlock(&ailp->xa_lock);
}
spin_unlock(&ailp->xa_lock);
if (mlip_changed)
xfs_log_space_wake(ailp->xa_mount);
} else {
spin_unlock(&ailp->xa_lock);
}
}
int
......
......@@ -106,18 +106,9 @@ xfs_trans_ail_update(
xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
}
void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
struct xfs_log_item **log_items, int nr_items,
int shutdown_type)
__releases(ailp->xa_lock);
static inline void
xfs_trans_ail_delete(
struct xfs_ail *ailp,
xfs_log_item_t *lip,
int shutdown_type) __releases(ailp->xa_lock)
{
xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type);
}
bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
int shutdown_type) __releases(ailp->xa_lock);
static inline void
xfs_trans_ail_remove(
......
/*
* FS_IOC_GETFSMAP ioctl infrastructure.
*
* Copyright (C) 2017 Oracle. All Rights Reserved.
*
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
#ifndef _LINUX_FSMAP_H
#define _LINUX_FSMAP_H
#include <linux/types.h>
/*
* Structure for FS_IOC_GETFSMAP.
*
* The memory layout for this call are the scalar values defined in
* struct fsmap_head, followed by two struct fsmap that describe
* the lower and upper bound of mappings to return, followed by an
* array of struct fsmap mappings.
*
* fmh_iflags control the output of the call, whereas fmh_oflags report
* on the overall record output. fmh_count should be set to the
* length of the fmh_recs array, and fmh_entries will be set to the
* number of entries filled out during each call. If fmh_count is
* zero, the number of reverse mappings will be returned in
* fmh_entries, though no mappings will be returned. fmh_reserved
* must be set to zero.
*
* The two elements in the fmh_keys array are used to constrain the
* output. The first element in the array should represent the
* lowest disk mapping ("low key") that the user wants to learn
* about. If this value is all zeroes, the filesystem will return
* the first entry it knows about. For a subsequent call, the
* contents of fsmap_head.fmh_recs[fsmap_head.fmh_count - 1] should be
* copied into fmh_keys[0] to have the kernel start where it left off.
*
* The second element in the fmh_keys array should represent the
* highest disk mapping ("high key") that the user wants to learn
* about. If this value is all ones, the filesystem will not stop
* until it runs out of mapping to return or runs out of space in
* fmh_recs.
*
* fmr_device can be either a 32-bit cookie representing a device, or
* a 32-bit dev_t if the FMH_OF_DEV_T flag is set. fmr_physical,
* fmr_offset, and fmr_length are expressed in units of bytes.
* fmr_owner is either an inode number, or a special value if
* FMR_OF_SPECIAL_OWNER is set in fmr_flags.
*/
struct fsmap {
__u32 fmr_device; /* device id */
__u32 fmr_flags; /* mapping flags */
__u64 fmr_physical; /* device offset of segment */
__u64 fmr_owner; /* owner id */
__u64 fmr_offset; /* file offset of segment */
__u64 fmr_length; /* length of segment */
__u64 fmr_reserved[3]; /* must be zero */
};
struct fsmap_head {
__u32 fmh_iflags; /* control flags */
__u32 fmh_oflags; /* output flags */
__u32 fmh_count; /* # of entries in array incl. input */
__u32 fmh_entries; /* # of entries filled in (output). */
__u64 fmh_reserved[6]; /* must be zero */
struct fsmap fmh_keys[2]; /* low and high keys for the mapping search */
struct fsmap fmh_recs[]; /* returned records */
};
/* Size of an fsmap_head with room for nr records. */
static inline size_t
fsmap_sizeof(
unsigned int nr)
{
return sizeof(struct fsmap_head) + nr * sizeof(struct fsmap);
}
/* Start the next fsmap query at the end of the current query results. */
static inline void
fsmap_advance(
struct fsmap_head *head)
{
head->fmh_keys[0] = head->fmh_recs[head->fmh_entries - 1];
}
/* fmh_iflags values - set by FS_IOC_GETFSMAP caller in the header. */
/* no flags defined yet */
#define FMH_IF_VALID 0
/* fmh_oflags values - returned in the header segment only. */
#define FMH_OF_DEV_T 0x1 /* fmr_device values will be dev_t */
/* fmr_flags values - returned for each non-header segment */
#define FMR_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
#define FMR_OF_ATTR_FORK 0x2 /* segment = attribute fork */
#define FMR_OF_EXTENT_MAP 0x4 /* segment = extent map */
#define FMR_OF_SHARED 0x8 /* segment = shared with another file */
#define FMR_OF_SPECIAL_OWNER 0x10 /* owner is a special value */
#define FMR_OF_LAST 0x20 /* segment is the last in the FS */
/* Each FS gets to define its own special owner codes. */
#define FMR_OWNER(type, code) (((__u64)type << 32) | \
((__u64)code & 0xFFFFFFFFULL))
#define FMR_OWNER_TYPE(owner) ((__u32)((__u64)owner >> 32))
#define FMR_OWNER_CODE(owner) ((__u32)(((__u64)owner & 0xFFFFFFFFULL)))
#define FMR_OWN_FREE FMR_OWNER(0, 1) /* free space */
#define FMR_OWN_UNKNOWN FMR_OWNER(0, 2) /* unknown owner */
#define FMR_OWN_METADATA FMR_OWNER(0, 3) /* metadata */
#define FS_IOC_GETFSMAP _IOWR('X', 59, struct fsmap_head)
#endif /* _LINUX_FSMAP_H */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment