ext4: address scalability issue by removing extent cache statistics
[linux-flexiantxendom0.git] / fs / ext4 / ext4.h
index de937fc..7b1cd5c 100644 (file)
@@ -108,7 +108,8 @@ typedef unsigned int ext4_group_t;
 #define EXT4_MB_DELALLOC_RESERVED      0x0400
 /* We are doing stream allocation */
 #define EXT4_MB_STREAM_ALLOC           0x0800
-
+/* Use reserved root blocks if needed */
+#define EXT4_MB_USE_ROOT_BLOCKS                0x1000
 
 struct ext4_allocation_request {
        /* target inode for block we're allocating */
@@ -143,9 +144,17 @@ struct ext4_allocation_request {
 #define EXT4_MAP_UNWRITTEN     (1 << BH_Unwritten)
 #define EXT4_MAP_BOUNDARY      (1 << BH_Boundary)
 #define EXT4_MAP_UNINIT                (1 << BH_Uninit)
+/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
+ * ext4_map_blocks wants to know whether or not the underlying cluster has
+ * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
+ * the requested mapping was from previously mapped (or delayed allocated)
+ * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
+ * should never appear on buffer_head's state flags.
+ */
+#define EXT4_MAP_FROM_CLUSTER  (1 << BH_AllocFromCluster)
 #define EXT4_MAP_FLAGS         (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
                                 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
-                                EXT4_MAP_UNINIT)
+                                EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
 
 struct ext4_map_blocks {
        ext4_fsblk_t m_pblk;
@@ -174,6 +183,9 @@ struct mpage_da_data {
  */
 #define        EXT4_IO_END_UNWRITTEN   0x0001
 #define EXT4_IO_END_ERROR      0x0002
+#define EXT4_IO_END_QUEUED     0x0004
+#define EXT4_IO_END_DIRECT     0x0008
+#define EXT4_IO_END_IN_FSYNC   0x0010
 
 struct ext4_io_page {
        struct page     *p_page;
@@ -209,6 +221,8 @@ struct ext4_io_submit {
  */
 #define        EXT4_BAD_INO             1      /* Bad blocks inode */
 #define EXT4_ROOT_INO           2      /* Root inode */
+#define EXT4_USR_QUOTA_INO      3      /* User quota inode */
+#define EXT4_GRP_QUOTA_INO      4      /* Group quota inode */
 #define EXT4_BOOT_LOADER_INO    5      /* Boot loader inode */
 #define EXT4_UNDEL_DIR_INO      6      /* Undelete directory inode */
 #define EXT4_RESIZE_INO                 7      /* Reserved group descriptors inode */
@@ -235,8 +249,11 @@ struct ext4_io_submit {
 # define EXT4_BLOCK_SIZE(s)            (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
 #endif
 #define        EXT4_ADDR_PER_BLOCK(s)          (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
+#define EXT4_CLUSTER_SIZE(s)           (EXT4_BLOCK_SIZE(s) << \
+                                        EXT4_SB(s)->s_cluster_bits)
 #ifdef __KERNEL__
 # define EXT4_BLOCK_SIZE_BITS(s)       ((s)->s_blocksize_bits)
+# define EXT4_CLUSTER_BITS(s)          (EXT4_SB(s)->s_cluster_bits)
 #else
 # define EXT4_BLOCK_SIZE_BITS(s)       ((s)->s_log_block_size + 10)
 #endif
@@ -254,6 +271,14 @@ struct ext4_io_submit {
 #endif
 #define EXT4_BLOCK_ALIGN(size, blkbits)                ALIGN((size), (1 << (blkbits)))
 
+/* Translate a block number to a cluster number */
+#define EXT4_B2C(sbi, blk)     ((blk) >> (sbi)->s_cluster_bits)
+/* Translate a cluster number to a block number */
+#define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits)
+/* Translate # of blks to # of clusters */
+#define EXT4_NUM_B2C(sbi, blks)        (((blks) + (sbi)->s_cluster_ratio - 1) >> \
+                                (sbi)->s_cluster_bits)
+
 /*
  * Structure of a blocks group descriptor
  */
@@ -285,7 +310,7 @@ struct ext4_group_desc
 
 struct flex_groups {
        atomic_t free_inodes;
-       atomic_t free_blocks;
+       atomic_t free_clusters;
        atomic_t used_dirs;
 };
 
@@ -302,6 +327,7 @@ struct flex_groups {
 #define EXT4_DESC_SIZE(s)              (EXT4_SB(s)->s_desc_size)
 #ifdef __KERNEL__
 # define EXT4_BLOCKS_PER_GROUP(s)      (EXT4_SB(s)->s_blocks_per_group)
+# define EXT4_CLUSTERS_PER_GROUP(s)    (EXT4_SB(s)->s_clusters_per_group)
 # define EXT4_DESC_PER_BLOCK(s)                (EXT4_SB(s)->s_desc_per_block)
 # define EXT4_INODES_PER_GROUP(s)      (EXT4_SB(s)->s_inodes_per_group)
 # define EXT4_DESC_PER_BLOCK_BITS(s)   (EXT4_SB(s)->s_desc_per_block_bits)
@@ -354,8 +380,7 @@ struct flex_groups {
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
-                          EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
-                          EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
+                          EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
                           EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
                           EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
 
@@ -512,6 +537,12 @@ struct ext4_new_group_data {
        /* Convert extent to initialized after IO complete */
 #define EXT4_GET_BLOCKS_IO_CONVERT_EXT         (EXT4_GET_BLOCKS_CONVERT|\
                                         EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
+       /* Punch out blocks of an extent */
+#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT          0x0020
+       /* Don't normalize allocation size (used for fallocate) */
+#define EXT4_GET_BLOCKS_NO_NORMALIZE           0x0040
+       /* Request will not result in inode size update (user for fallocate) */
+#define EXT4_GET_BLOCKS_KEEP_SIZE              0x0080
 
 /*
  * Flags used by ext4_free_blocks
@@ -519,6 +550,14 @@ struct ext4_new_group_data {
 #define EXT4_FREE_BLOCKS_METADATA      0x0001
 #define EXT4_FREE_BLOCKS_FORGET                0x0002
 #define EXT4_FREE_BLOCKS_VALIDATED     0x0004
+#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE        0x0008
+#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER  0x0010
+#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER   0x0020
+
+/*
+ * Flags used by ext4_discard_partial_page_buffers
+ */
+#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED  0x0001
 
 /*
  * ioctl commands
@@ -529,9 +568,6 @@ struct ext4_new_group_data {
 #define        EXT4_IOC_SETVERSION             _IOW('f', 4, long)
 #define        EXT4_IOC_GETVERSION_OLD         FS_IOC_GETVERSION
 #define        EXT4_IOC_SETVERSION_OLD         FS_IOC_SETVERSION
-#ifdef CONFIG_JBD2_DEBUG
-#define EXT4_IOC_WAIT_FOR_READONLY     _IOR('f', 99, long)
-#endif
 #define EXT4_IOC_GETRSVSZ              _IOR('f', 5, long)
 #define EXT4_IOC_SETRSVSZ              _IOW('f', 6, long)
 #define EXT4_IOC_GROUP_EXTEND          _IOW('f', 7, unsigned long)
@@ -554,14 +590,11 @@ struct ext4_new_group_data {
 #define EXT4_IOC32_SETRSVSZ            _IOW('f', 6, int)
 #define EXT4_IOC32_GROUP_EXTEND                _IOW('f', 7, unsigned int)
 #define EXT4_IOC32_GROUP_ADD           _IOW('f', 8, struct compat_ext4_new_group_input)
-#ifdef CONFIG_JBD2_DEBUG
-#define EXT4_IOC32_WAIT_FOR_READONLY   _IOR('f', 99, int)
-#endif
 #define EXT4_IOC32_GETVERSION_OLD      FS_IOC32_GETVERSION
 #define EXT4_IOC32_SETVERSION_OLD      FS_IOC32_SETVERSION
 #endif
 
-/* Max physical block we can addres w/o extents */
+/* Max physical block we can address w/o extents */
 #define EXT4_MAX_BLOCK_FILE_PHYS       0xFFFFFFFF
 
 /*
@@ -763,10 +796,12 @@ struct ext4_inode_info {
         * near to their parent directory's inode.
         */
        ext4_group_t    i_block_group;
+       ext4_lblk_t     i_dir_start_lookup;
+#if (BITS_PER_LONG < 64)
        unsigned long   i_state_flags;          /* Dynamic state flags */
+#endif
        unsigned long   i_flags;
 
-       ext4_lblk_t             i_dir_start_lookup;
 #ifdef CONFIG_EXT4_FS_XATTR
        /*
         * Extended attributes can be read independently of the main file
@@ -809,7 +844,7 @@ struct ext4_inode_info {
         */
        struct rw_semaphore i_data_sem;
        struct inode vfs_inode;
-       struct jbd2_inode jinode;
+       struct jbd2_inode *jinode;
 
        struct ext4_ext_cache i_cached_extent;
        /*
@@ -826,6 +861,7 @@ struct ext4_inode_info {
        ext4_group_t    i_last_alloc_group;
 
        /* allocation reservation info for delalloc */
+       /* In case of bigalloc, these refer to clusters rather than blocks */
        unsigned int i_reserved_data_blocks;
        unsigned int i_reserved_meta_blocks;
        unsigned int i_allocated_meta_blocks;
@@ -835,7 +871,6 @@ struct ext4_inode_info {
        /* on-disk additional length */
        __u16 i_extra_isize;
 
-       spinlock_t i_block_reservation_lock;
 #ifdef CONFIG_QUOTA
        /* quota space reservation, managed internally by quota code */
        qsize_t i_reserved_quota;
@@ -844,9 +879,12 @@ struct ext4_inode_info {
        /* completed IOs that might need unwritten extents handling */
        struct list_head i_completed_io_list;
        spinlock_t i_completed_io_lock;
+       atomic_t i_ioend_count; /* Number of outstanding io_end structs */
        /* current io_end structure for async DIO write*/
        ext4_io_end_t *cur_aio_dio;
-       atomic_t i_ioend_count; /* Number of outstanding io_end structs */
+       atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
+
+       spinlock_t i_block_reservation_lock;
 
        /*
         * Transactions that contain inode's metadata needed to complete
@@ -873,7 +911,6 @@ struct ext4_inode_info {
 /*
  * Mount flags
  */
-#define EXT4_MOUNT_OLDALLOC            0x00002  /* Don't use the new Orlov allocator */
 #define EXT4_MOUNT_GRPID               0x00004 /* Create files with directory's group */
 #define EXT4_MOUNT_DEBUG               0x00008 /* Some debugging messages */
 #define EXT4_MOUNT_ERRORS_CONT         0x00010 /* Continue on errors */
@@ -905,6 +942,9 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_DISCARD             0x40000000 /* Issue DISCARD requests */
 #define EXT4_MOUNT_INIT_INODE_TABLE    0x80000000 /* Initialize uninitialized itables */
 
+#define EXT4_MOUNT2_EXPLICIT_DELALLOC  0x00000001 /* User explicitly
+                                                     specified delalloc */
+
 #define clear_opt(sb, opt)             EXT4_SB(sb)->s_mount_opt &= \
                                                ~EXT4_MOUNT_##opt
 #define set_opt(sb, opt)               EXT4_SB(sb)->s_mount_opt |= \
@@ -919,14 +959,16 @@ struct ext4_inode_info {
 #define test_opt2(sb, opt)             (EXT4_SB(sb)->s_mount_opt2 & \
                                         EXT4_MOUNT2_##opt)
 
-#define ext4_set_bit                   ext2_set_bit
+#define ext4_set_bit                   __test_and_set_bit_le
 #define ext4_set_bit_atomic            ext2_set_bit_atomic
-#define ext4_clear_bit                 ext2_clear_bit
+#define ext4_clear_bit                 __test_and_clear_bit_le
 #define ext4_clear_bit_atomic          ext2_clear_bit_atomic
-#define ext4_test_bit                  ext2_test_bit
-#define ext4_find_first_zero_bit       ext2_find_first_zero_bit
-#define ext4_find_next_zero_bit                ext2_find_next_zero_bit
-#define ext4_find_next_bit             ext2_find_next_bit
+#define ext4_test_bit                  test_bit_le
+#define ext4_find_first_zero_bit       find_first_zero_bit_le
+#define ext4_find_next_zero_bit                find_next_zero_bit_le
+#define ext4_find_next_bit             find_next_bit_le
+
+extern void ext4_set_bits(void *bm, int cur, int len);
 
 /*
  * Maximal mount counts between two filesystem checks
@@ -953,9 +995,9 @@ struct ext4_super_block {
 /*10*/ __le32  s_free_inodes_count;    /* Free inodes count */
        __le32  s_first_data_block;     /* First Data Block */
        __le32  s_log_block_size;       /* Block size */
-       __le32  s_obso_log_frag_size;   /* Obsoleted fragment size */
+       __le32  s_log_cluster_size;     /* Allocation cluster size */
 /*20*/ __le32  s_blocks_per_group;     /* # Blocks per group */
-       __le32  s_obso_frags_per_group; /* Obsoleted fragments per group */
+       __le32  s_clusters_per_group;   /* # Clusters per group */
        __le32  s_inodes_per_group;     /* # Inodes per group */
        __le32  s_mtime;                /* Mount time */
 /*30*/ __le32  s_wtime;                /* Write time */
@@ -1024,7 +1066,7 @@ struct ext4_super_block {
        __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
        __le32  s_flags;                /* Miscellaneous flags */
        __le16  s_raid_stride;          /* RAID stride */
-       __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
+       __le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
        __le64  s_mmp_block;            /* Block for multi-mount protection */
        __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
        __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
@@ -1051,7 +1093,10 @@ struct ext4_super_block {
        __u8    s_last_error_func[32];  /* function where the error happened */
 #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
        __u8    s_mount_opts[64];
-       __le32  s_reserved[112];        /* Padding to the end of the block */
+       __le32  s_usr_quota_inum;       /* inode for tracking user quota */
+       __le32  s_grp_quota_inum;       /* inode for tracking group quota */
+       __le32  s_overhead_clusters;    /* overhead blocks/clusters in fs */
+       __le32  s_reserved[109];        /* Padding to the end of the block */
 };
 
 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@ -1071,6 +1116,7 @@ struct ext4_sb_info {
        unsigned long s_desc_size;      /* Size of a group descriptor in bytes */
        unsigned long s_inodes_per_block;/* Number of inodes per block */
        unsigned long s_blocks_per_group;/* Number of blocks in a group */
+       unsigned long s_clusters_per_group; /* Number of clusters in a group */
        unsigned long s_inodes_per_group;/* Number of inodes in a group */
        unsigned long s_itb_per_group;  /* Number of inode table blocks per group */
        unsigned long s_gdb_count;      /* Number of group descriptor blocks */
@@ -1079,6 +1125,8 @@ struct ext4_sb_info {
        ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
        unsigned long s_overhead_last;  /* Last calculated overhead */
        unsigned long s_blocks_last;    /* Last seen block count */
+       unsigned int s_cluster_ratio;   /* Number of blocks per cluster */
+       unsigned int s_cluster_bits;    /* log2 of s_cluster_ratio */
        loff_t s_bitmap_maxbytes;       /* max bytes for bitmap files */
        struct buffer_head * s_sbh;     /* Buffer containing the super block */
        struct ext4_super_block *s_es;  /* Pointer to the super block in the buffer */
@@ -1102,10 +1150,10 @@ struct ext4_sb_info {
        u32 s_hash_seed[4];
        int s_def_hash_version;
        int s_hash_unsigned;    /* 3 if hash should be signed, 0 if not */
-       struct percpu_counter s_freeblocks_counter;
+       struct percpu_counter s_freeclusters_counter;
        struct percpu_counter s_freeinodes_counter;
        struct percpu_counter s_dirs_counter;
-       struct percpu_counter s_dirtyblocks_counter;
+       struct percpu_counter s_dirtyclusters_counter;
        struct blockgroup_lock *s_blockgroup_lock;
        struct proc_dir_entry *s_proc;
        struct kobject s_kobj;
@@ -1115,15 +1163,12 @@ struct ext4_sb_info {
        struct journal_s *s_journal;
        struct list_head s_orphan;
        struct mutex s_orphan_lock;
-       struct mutex s_resize_lock;
+       unsigned long s_resize_flags;           /* Flags indicating if there
+                                                  is a resizer */
        unsigned long s_commit_interval;
        u32 s_max_batch_time;
        u32 s_min_batch_time;
        struct block_device *journal_bdev;
-#ifdef CONFIG_JBD2_DEBUG
-       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
-       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
-#endif
 #ifdef CONFIG_QUOTA
        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
@@ -1197,6 +1242,12 @@ struct ext4_sb_info {
        struct ext4_li_request *s_li_request;
        /* Wait multiplier for lazy initialization thread */
        unsigned int s_li_wait_mult;
+
+       /* Kernel thread for multiple mount protection */
+       struct task_struct *s_mmp_tsk;
+
+       /* record the last minlen when FITRIM is called. */
+       atomic_t s_last_trim_minblks;
 };
 
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1223,6 +1274,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
                 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
 
+static inline void ext4_set_io_unwritten_flag(struct inode *inode,
+                                             struct ext4_io_end *io_end)
+{
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               io_end->flag |= EXT4_IO_END_UNWRITTEN;
+               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+       }
+}
+
 /*
  * Inode dynamic state flags
  */
@@ -1238,22 +1298,36 @@ enum {
        EXT4_STATE_DELALLOC_RESERVED,   /* blks already reserved for delalloc */
 };
 
-#define EXT4_INODE_BIT_FNS(name, field)                                        \
+#define EXT4_INODE_BIT_FNS(name, field, offset)                                \
 static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
 {                                                                      \
-       return test_bit(bit, &EXT4_I(inode)->i_##field);                \
+       return test_bit(bit + (offset), &EXT4_I(inode)->i_##field);     \
 }                                                                      \
 static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
 {                                                                      \
-       set_bit(bit, &EXT4_I(inode)->i_##field);                        \
+       set_bit(bit + (offset), &EXT4_I(inode)->i_##field);             \
 }                                                                      \
 static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
 {                                                                      \
-       clear_bit(bit, &EXT4_I(inode)->i_##field);                      \
+       clear_bit(bit + (offset), &EXT4_I(inode)->i_##field);           \
 }
 
-EXT4_INODE_BIT_FNS(flag, flags)
-EXT4_INODE_BIT_FNS(state, state_flags)
+EXT4_INODE_BIT_FNS(flag, flags, 0)
+#if (BITS_PER_LONG < 64)
+EXT4_INODE_BIT_FNS(state, state_flags, 0)
+
+static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
+{
+       (ei)->i_state_flags = 0;
+}
+#else
+EXT4_INODE_BIT_FNS(state, flags, 32)
+
+static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
+{
+       /* We depend on the fact that callers will set i_flags */
+}
+#endif
 #else
 /* Assume that user mode programs are passing in an ext4fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
@@ -1320,6 +1394,8 @@ EXT4_INODE_BIT_FNS(state, state_flags)
 #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM                0x0010
 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK       0x0020
 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE     0x0040
+#define EXT4_FEATURE_RO_COMPAT_QUOTA           0x0100
+#define EXT4_FEATURE_RO_COMPAT_BIGALLOC                0x0200
 
 #define EXT4_FEATURE_INCOMPAT_COMPRESSION      0x0001
 #define EXT4_FEATURE_INCOMPAT_FILETYPE         0x0002
@@ -1333,20 +1409,37 @@ EXT4_INODE_BIT_FNS(state, state_flags)
 #define EXT4_FEATURE_INCOMPAT_EA_INODE         0x0400 /* EA in inode */
 #define EXT4_FEATURE_INCOMPAT_DIRDATA          0x1000 /* data in dirent */
 
+#define EXT2_FEATURE_COMPAT_SUPP       EXT4_FEATURE_COMPAT_EXT_ATTR
+#define EXT2_FEATURE_INCOMPAT_SUPP     (EXT4_FEATURE_INCOMPAT_FILETYPE| \
+                                        EXT4_FEATURE_INCOMPAT_META_BG)
+#define EXT2_FEATURE_RO_COMPAT_SUPP    (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+                                        EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
+
+#define EXT3_FEATURE_COMPAT_SUPP       EXT4_FEATURE_COMPAT_EXT_ATTR
+#define EXT3_FEATURE_INCOMPAT_SUPP     (EXT4_FEATURE_INCOMPAT_FILETYPE| \
+                                        EXT4_FEATURE_INCOMPAT_RECOVER| \
+                                        EXT4_FEATURE_INCOMPAT_META_BG)
+#define EXT3_FEATURE_RO_COMPAT_SUPP    (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+                                        EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
+
 #define EXT4_FEATURE_COMPAT_SUPP       EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP     (EXT4_FEATURE_INCOMPAT_FILETYPE| \
                                         EXT4_FEATURE_INCOMPAT_RECOVER| \
                                         EXT4_FEATURE_INCOMPAT_META_BG| \
                                         EXT4_FEATURE_INCOMPAT_EXTENTS| \
                                         EXT4_FEATURE_INCOMPAT_64BIT| \
-                                        EXT4_FEATURE_INCOMPAT_FLEX_BG)
+                                        EXT4_FEATURE_INCOMPAT_FLEX_BG| \
+                                        EXT4_FEATURE_INCOMPAT_MMP)
 #define EXT4_FEATURE_RO_COMPAT_SUPP    (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
                                         EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
                                         EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
                                         EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
                                         EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
                                         EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
-                                        EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
+                                        EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
+                                        EXT4_FEATURE_RO_COMPAT_BIGALLOC)
 
 /*
  * Default values for user and/or group using reserved blocks
@@ -1572,12 +1665,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
  */
 struct ext4_lazy_init {
        unsigned long           li_state;
-
-       wait_queue_head_t       li_wait_daemon;
-       wait_queue_head_t       li_wait_task;
-       struct timer_list       li_timer;
-       struct task_struct      *li_task;
-
        struct list_head        li_request_list;
        struct mutex            li_list_mtx;
 };
@@ -1597,6 +1684,67 @@ struct ext4_features {
 };
 
 /*
+ * This structure will be used for multiple mount protection. It will be
+ * written into the block number saved in the s_mmp_block field in the
+ * superblock. Programs that check MMP should assume that if
+ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
+ * to use the filesystem, regardless of how old the timestamp is.
+ */
+#define EXT4_MMP_MAGIC     0x004D4D50U /* ASCII for MMP */
+#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
+#define EXT4_MMP_SEQ_FSCK  0xE24D4D50U /* mmp_seq value when being fscked */
+#define EXT4_MMP_SEQ_MAX   0xE24D4D4FU /* maximum valid mmp_seq value */
+
+struct mmp_struct {
+       __le32  mmp_magic;              /* Magic number for MMP */
+       __le32  mmp_seq;                /* Sequence no. updated periodically */
+
+       /*
+        * mmp_time, mmp_nodename & mmp_bdevname are only used for information
+        * purposes and do not affect the correctness of the algorithm
+        */
+       __le64  mmp_time;               /* Time last updated */
+       char    mmp_nodename[64];       /* Node which last updated MMP block */
+       char    mmp_bdevname[32];       /* Bdev which last updated MMP block */
+
+       /*
+        * mmp_check_interval is used to verify if the MMP block has been
+        * updated on the block device. The value is updated based on the
+        * maximum time to write the MMP block during an update cycle.
+        */
+       __le16  mmp_check_interval;
+
+       __le16  mmp_pad1;
+       __le32  mmp_pad2[227];
+};
+
+/* arguments passed to the mmp thread */
+struct mmpd_data {
+       struct buffer_head *bh; /* bh from initial read_mmp_block() */
+       struct super_block *sb;  /* super block of the fs */
+};
+
+/*
+ * Check interval multiplier
+ * The MMP block is written every update interval and initially checked every
+ * update interval x the multiplier (the value is then adapted based on the
+ * write latency). The reason is that writes can be delayed under load and we
+ * don't want readers to incorrectly assume that the filesystem is no longer
+ * in use.
+ */
+#define EXT4_MMP_CHECK_MULT            2UL
+
+/*
+ * Minimum interval for MMP checking in seconds.
+ */
+#define EXT4_MMP_MIN_CHECK_INTERVAL    5UL
+
+/*
+ * Maximum interval for MMP checking in seconds.
+ */
+#define EXT4_MMP_MAX_CHECK_INTERVAL    300UL
+
+/*
  * Function prototypes
  */
 
@@ -1620,11 +1768,13 @@ extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
 extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
                        ext4_group_t group);
 extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t goal, unsigned long *count, int *errp);
-extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
-extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
-                               ext4_fsblk_t block, unsigned long count);
-extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
+                                        ext4_fsblk_t goal,
+                                        unsigned int flags,
+                                        unsigned long *count,
+                                        int *errp);
+extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
+                                   s64 nclusters, unsigned int flags);
+extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
 extern void ext4_check_blocks_bitmap(struct super_block *);
 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
                                                    ext4_group_t block_group,
@@ -1632,12 +1782,19 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
 struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
                                      ext4_group_t block_group);
-extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-                                      struct buffer_head *bh,
-                                      ext4_group_t group,
-                                      struct ext4_group_desc *desc);
-#define ext4_free_blocks_after_init(sb, group, desc)                   \
-               ext4_init_block_bitmap(sb, NULL, group, desc)
+extern void ext4_init_block_bitmap(struct super_block *sb,
+                                  struct buffer_head *bh,
+                                  ext4_group_t group,
+                                  struct ext4_group_desc *desc);
+extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
+                                             ext4_group_t block_group,
+                                             struct ext4_group_desc *gdp);
+extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+                                           ext4_group_t block_group);
+extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
+                                          ext4_group_t block_group,
+                                          struct ext4_group_desc *gdp);
+ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
 /* dir.c */
 extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
@@ -1653,7 +1810,8 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 extern void ext4_htree_free_dir_info(struct dir_private_info *p);
 
 /* fsync.c */
-extern int ext4_sync_file(struct file *, int);
+extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
+extern int ext4_flush_completed_IO(struct inode *);
 
 /* hash.c */
 extern int ext4fs_dirhash(const char *name, int len, struct
@@ -1661,7 +1819,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
 
 /* ialloc.c */
 extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
-                                   const struct qstr *qstr, __u32 goal);
+                                   const struct qstr *qstr, __u32 goal,
+                                   uid_t *owner);
 extern void ext4_free_inode(handle_t *, struct inode *);
 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1687,6 +1846,8 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
                             unsigned long count, int flags);
 extern int ext4_mb_add_groupinfo(struct super_block *sb,
                ext4_group_t i, struct ext4_group_desc *desc);
+extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
+                               ext4_fsblk_t block, unsigned long count);
 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
 
 /* inode.c */
@@ -1705,11 +1866,12 @@ extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 extern void ext4_evict_inode(struct inode *);
 extern void ext4_clear_inode(struct inode *);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
-extern void ext4_dirty_inode(struct inode *);
+extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
 extern int ext4_can_truncate(struct inode *inode);
 extern void ext4_truncate(struct inode *);
+extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
 extern void ext4_set_inode_flags(struct inode *);
 extern void ext4_get_inode_flags(struct ext4_inode_info *);
@@ -1719,10 +1881,29 @@ extern int ext4_writepage_trans_blocks(struct inode *);
 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_block_truncate_page(handle_t *handle,
                struct address_space *mapping, loff_t from);
+extern int ext4_block_zero_page_range(handle_t *handle,
+               struct address_space *mapping, loff_t from, loff_t length);
+extern int ext4_discard_partial_page_buffers(handle_t *handle,
+               struct address_space *mapping, loff_t from,
+               loff_t length, int flags);
+extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+               struct inode *inode, struct page *page, loff_t from,
+               loff_t length, int flags);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern void ext4_da_update_reserve_space(struct inode *inode,
                                        int used, int quota_claim);
+
+/* indirect.c */
+extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+                               struct ext4_map_blocks *map, int flags);
+extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
+                               const struct iovec *iov, loff_t offset,
+                               unsigned long nr_segs);
+extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
+extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
+extern void ext4_ind_truncate(struct inode *inode);
+
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
@@ -1744,36 +1925,43 @@ extern int ext4_group_extend(struct super_block *sb,
                                ext4_fsblk_t n_blocks_count);
 
 /* super.c */
-extern void __ext4_error(struct super_block *, const char *, unsigned int,
-                        const char *, ...)
-       __attribute__ ((format (printf, 4, 5)));
+extern void *ext4_kvmalloc(size_t size, gfp_t flags);
+extern void *ext4_kvzalloc(size_t size, gfp_t flags);
+extern void ext4_kvfree(void *ptr);
+extern __printf(4, 5)
+void __ext4_error(struct super_block *, const char *, unsigned int,
+                 const char *, ...);
 #define ext4_error(sb, message...)     __ext4_error(sb, __func__,      \
                                                     __LINE__, ## message)
-extern void ext4_error_inode(struct inode *, const char *, unsigned int,
-                            ext4_fsblk_t, const char *, ...)
-       __attribute__ ((format (printf, 5, 6)));
-extern void ext4_error_file(struct file *, const char *, unsigned int,
-                           ext4_fsblk_t, const char *, ...)
-       __attribute__ ((format (printf, 5, 6)));
+extern __printf(5, 6)
+void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t,
+                     const char *, ...);
+extern __printf(5, 6)
+void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
+                    const char *, ...);
 extern void __ext4_std_error(struct super_block *, const char *,
                             unsigned int, int);
-extern void __ext4_abort(struct super_block *, const char *, unsigned int,
-                      const char *, ...)
-       __attribute__ ((format (printf, 4, 5)));
+extern __printf(4, 5)
+void __ext4_abort(struct super_block *, const char *, unsigned int,
+                 const char *, ...);
 #define ext4_abort(sb, message...)     __ext4_abort(sb, __func__, \
                                                       __LINE__, ## message)
-extern void __ext4_warning(struct super_block *, const char *, unsigned int,
-                         const char *, ...)
-       __attribute__ ((format (printf, 4, 5)));
+extern __printf(4, 5)
+void __ext4_warning(struct super_block *, const char *, unsigned int,
+                   const char *, ...);
 #define ext4_warning(sb, message...)   __ext4_warning(sb, __func__, \
                                                       __LINE__, ## message)
-extern void ext4_msg(struct super_block *, const char *, const char *, ...)
-       __attribute__ ((format (printf, 3, 4)));
-extern void __ext4_grp_locked_error(const char *, unsigned int, \
-                                   struct super_block *, ext4_group_t, \
-                                   unsigned long, ext4_fsblk_t, \
-                                   const char *, ...)
-       __attribute__ ((format (printf, 7, 8)));
+extern __printf(3, 4)
+void ext4_msg(struct super_block *, const char *, const char *, ...);
+extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
+                          const char *, unsigned int, const char *);
+#define dump_mmp_msg(sb, mmp, msg)     __dump_mmp_msg(sb, mmp, __func__, \
+                                                      __LINE__, msg)
+extern __printf(7, 8)
+void __ext4_grp_locked_error(const char *, unsigned int,
+                            struct super_block *, ext4_group_t,
+                            unsigned long, ext4_fsblk_t,
+                            const char *, ...);
 #define ext4_grp_locked_error(sb, grp, message...) \
        __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
 extern void ext4_update_dynamic_rev(struct super_block *sb);
@@ -1789,8 +1977,8 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
                                      struct ext4_group_desc *bg);
 extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                                     struct ext4_group_desc *bg);
-extern __u32 ext4_free_blks_count(struct super_block *sb,
-                               struct ext4_group_desc *bg);
+extern __u32 ext4_free_group_clusters(struct super_block *sb,
+                                     struct ext4_group_desc *bg);
 extern __u32 ext4_free_inodes_count(struct super_block *sb,
                                 struct ext4_group_desc *bg);
 extern __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -1803,8 +1991,9 @@ extern void ext4_inode_bitmap_set(struct super_block *sb,
                                  struct ext4_group_desc *bg, ext4_fsblk_t blk);
 extern void ext4_inode_table_set(struct super_block *sb,
                                 struct ext4_group_desc *bg, ext4_fsblk_t blk);
-extern void ext4_free_blks_set(struct super_block *sb,
-                              struct ext4_group_desc *bg, __u32 count);
+extern void ext4_free_group_clusters_set(struct super_block *sb,
+                                        struct ext4_group_desc *bg,
+                                        __u32 count);
 extern void ext4_free_inodes_set(struct super_block *sb,
                                struct ext4_group_desc *bg, __u32 count);
 extern void ext4_used_dirs_set(struct super_block *sb,
@@ -1913,13 +2102,13 @@ do {                                                            \
 } while (0)
 
 #ifdef CONFIG_SMP
-/* Each CPU can accumulate percpu_counter_batch blocks in their local
- * counters. So we need to make sure we have free blocks more
+/* Each CPU can accumulate percpu_counter_batch clusters in their local
+ * counters. So we need to make sure we have free clusters more
  * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
  */
-#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
+#define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
 #else
-#define EXT4_FREEBLOCKS_WATERMARK 0
+#define EXT4_FREECLUSTERS_WATERMARK 0
 #endif
 
 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
@@ -1952,11 +2141,19 @@ struct ext4_group_info {
                                         * 5 free 8-block regions. */
 };
 
-#define EXT4_GROUP_INFO_NEED_INIT_BIT  0
+#define EXT4_GROUP_INFO_NEED_INIT_BIT          0
+#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT                1
 
 #define EXT4_MB_GRP_NEED_INIT(grp)     \
        (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
 
+#define EXT4_MB_GRP_WAS_TRIMMED(grp)   \
+       (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+#define EXT4_MB_GRP_SET_TRIMMED(grp)   \
+       (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \
+       (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+
 #define EXT4_MAX_CONTENTION            8
 #define EXT4_CONTENTION_THRESHOLD      2
 
@@ -2008,6 +2205,19 @@ static inline void ext4_mark_super_dirty(struct super_block *sb)
 }
 
 /*
+ * Block validity checking
+ */
+#define ext4_check_indirect_blockref(inode, bh)                                \
+       ext4_check_blockref(__func__, __LINE__, inode,                  \
+                           (__le32 *)(bh)->b_data,                     \
+                           EXT4_ADDR_PER_BLOCK((inode)->i_sb))
+
+#define ext4_ind_check_inode(inode)                                    \
+       ext4_check_blockref(__func__, __LINE__, inode,                  \
+                           EXT4_I(inode)->i_data,                      \
+                           EXT4_NDIR_BLOCKS)
+
+/*
  * Inodes and files operations
  */
 
@@ -2036,6 +2246,8 @@ extern void ext4_exit_system_zone(void);
 extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
                                 ext4_fsblk_t start_blk,
                                 unsigned int count);
+extern int ext4_check_blockref(const char *, unsigned int,
+                              struct inode *, __le32 *, unsigned int);
 
 /* extents.c */
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
@@ -2045,9 +2257,11 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
 extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                               struct ext4_map_blocks *map, int flags);
 extern void ext4_ext_truncate(struct inode *);
+extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
+                               loff_t length);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
-extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
+extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
                          loff_t len);
 extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                          ssize_t len);
@@ -2073,14 +2287,26 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
                               int len,
                               struct writeback_control *wbc);
 
+/* mmp.c */
+extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
+
 /* BH_Uninit flag: blocks are allocated but uninitialized on disk */
 enum ext4_state_bits {
        BH_Uninit       /* blocks are allocated but uninitialized on disk */
          = BH_JBDPrivateStart,
+       BH_AllocFromCluster,    /* allocated blocks were part of already
+                                * allocated cluster. Note that this flag will
+                                * never, ever appear in a buffer_head's state
+                                * flag. See EXT4_MAP_FROM_CLUSTER to see where
+                                * this is used. */
+       BH_Da_Mapped,   /* Delayed allocated block that now has a mapping. This
+                        * flag is set when ext4_map_blocks is called on a
+                        * delayed allocated block to get its real mapping. */
 };
 
 BUFFER_FNS(Uninit, uninit)
 TAS_BUFFER_FNS(Uninit, uninit)
+BUFFER_FNS(Da_Mapped, da_mapped)
 
 /*
  * Add new method to test wether block and inode bitmaps are properly
@@ -2101,6 +2327,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
 
 #define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
 
+/* For ioend & aio unwritten conversion wait queues */
+#define EXT4_WQ_HASH_SZ                37
+#define ext4_ioend_wq(v)   (&ext4__ioend_wq[((unsigned long)(v)) %\
+                                           EXT4_WQ_HASH_SZ])
+#define ext4_aio_mutex(v)  (&ext4__aio_mutex[((unsigned long)(v)) %\
+                                            EXT4_WQ_HASH_SZ])
+extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
+
+#define EXT4_RESIZING  0
+extern int ext4_resize_begin(struct super_block *sb);
+extern void ext4_resize_end(struct super_block *sb);
+
 #endif /* __KERNEL__ */
 
+#include "ext4_extents.h"
+
 #endif /* _EXT4_H */