ext4: Fix max file size and logical block counting of extent format file, CVE-2011...

[linux-flexiantxendom0-natty.git] / fs / ext4 / super.c
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 48ce561..64bf573 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -75,8 +75,10 @@ static void ext4_write_super(struct super_block *sb);
  static int ext4_freeze(struct super_block *sb);
  static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
                        const char *dev_name, void *data);
+static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  static void ext4_destroy_lazyinit_thread(void);
  static void ext4_unregister_li_request(struct super_block *sb);
+static void ext4_clear_request_list(void);
  
  #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
  static struct file_system_type ext3_fs_type = {
@@ -832,6 +834,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
         ei->i_sync_tid = 0;
         ei->i_datasync_tid = 0;
         atomic_set(&ei->i_ioend_count, 0);
+       atomic_set(&ei->i_aiodio_unwritten, 0);
  
         return &ei->vfs_inode;
  }
@@ -2118,6 +2121,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                 return;
         }
  
+       /* Check if feature set would not allow a r/w mount */
+       if (!ext4_feature_set_ok(sb, 0)) {
+               ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
+                        "unknown ROCOMPAT features");
+               return;
+       }
+
         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
                 if (es->s_last_orphan)
                         jbd_debug(1, "Errors on filesystem, "
@@ -2201,6 +2211,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
   * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
   * so that won't be a limiting factor.
   *
+ * However there is other limiting factor. We do store extents in the form
+ * of starting block and length, hence the resulting length of the extent
+ * covering maximum file size must fit into on-disk format containers as
+ * well. Given that length is always by 1 unit bigger than max unit (because
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
+ *
   * Note, this does *not* consider any metadata overhead for vfs i_blocks.
   */
  static loff_t ext4_max_size(int blkbits, int has_huge_files)
@@ -2222,10 +2238,13 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
                 upper_limit <<= blkbits;
         }
  
-       /* 32-bit extent-start container, ee_block */
-       res = 1LL << 32;
+       /*
+        * 32-bit extent-start container, ee_block. We lower the maxbytes
+        * by one fs block, so ee_len can cover the extent of maximum file
+        * size
+        */
+       res = (1LL << 32) - 1;
         res <<= blkbits;
-       res -= 1;
  
         /* Sanity check against vm- & vfs- imposed limits */
         if (res > upper_limit)
@@ -2635,12 +2654,6 @@ static void print_daily_error_info(unsigned long arg)
         mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
  }
  
-static void ext4_lazyinode_timeout(unsigned long data)
-{
-       struct task_struct *p = (struct task_struct *)data;
-       wake_up_process(p);
-}
-
  /* Find next suitable group and run ext4_init_inode_table */
  static int ext4_run_li_request(struct ext4_li_request *elr)
  {
@@ -2688,7 +2701,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
  
  /*
   * Remove lr_request from the list_request and free the
- * request tructure. Should be called with li_list_mtx held
+ * request structure. Should be called with li_list_mtx held
   */
  static void ext4_remove_li_request(struct ext4_li_request *elr)
  {
@@ -2706,16 +2719,20 @@ static void ext4_remove_li_request(struct ext4_li_request *elr)
  
  static void ext4_unregister_li_request(struct super_block *sb)
  {
-       struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
-
-       if (!ext4_li_info)
+       mutex_lock(&ext4_li_mtx);
+       if (!ext4_li_info) {
+               mutex_unlock(&ext4_li_mtx);
                 return;
+       }
  
         mutex_lock(&ext4_li_info->li_list_mtx);
-       ext4_remove_li_request(elr);
+       ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
         mutex_unlock(&ext4_li_info->li_list_mtx);
+       mutex_unlock(&ext4_li_mtx);
  }
  
+static struct task_struct *ext4_lazyinit_task;
+
  /*
   * This is the function where ext4lazyinit thread lives. It walks
   * through the request list searching for next scheduled filesystem.
@@ -2730,14 +2747,10 @@ static int ext4_lazyinit_thread(void *arg)
         struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
         struct list_head *pos, *n;
         struct ext4_li_request *elr;
-       unsigned long next_wakeup;
-       DEFINE_WAIT(wait);
+       unsigned long next_wakeup, cur;
  
         BUG_ON(NULL == eli);
  
-       eli->li_timer.data = (unsigned long)current;
-       eli->li_timer.function = ext4_lazyinode_timeout;
-
         eli->li_task = current;
         wake_up(&eli->li_wait_task);
  
@@ -2771,19 +2784,19 @@ cont_thread:
                 if (freezing(current))
                         refrigerator();
  
-               if ((time_after_eq(jiffies, next_wakeup)) ||
+               cur = jiffies;
+               if ((time_after_eq(cur, next_wakeup)) ||
                     (MAX_JIFFY_OFFSET == next_wakeup)) {
                         cond_resched();
                         continue;
                 }
  
-               eli->li_timer.expires = next_wakeup;
-               add_timer(&eli->li_timer);
-               prepare_to_wait(&eli->li_wait_daemon, &wait,
-                               TASK_INTERRUPTIBLE);
-               if (time_before(jiffies, next_wakeup))
-                       schedule();
-               finish_wait(&eli->li_wait_daemon, &wait);
+               schedule_timeout_interruptible(next_wakeup - cur);
+
+               if (kthread_should_stop()) {
+                       ext4_clear_request_list();
+                       goto exit_thread;
+               }
         }
  
  exit_thread:
@@ -2803,7 +2816,6 @@ exit_thread:
                 goto cont_thread;
         }
         mutex_unlock(&eli->li_list_mtx);
-       del_timer_sync(&ext4_li_info->li_timer);
         eli->li_task = NULL;
         wake_up(&eli->li_wait_task);
  
@@ -2830,13 +2842,11 @@ static void ext4_clear_request_list(void)
  
  static int ext4_run_lazyinit_thread(void)
  {
-       struct task_struct *t;
-
-       t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
-       if (IS_ERR(t)) {
-               int err = PTR_ERR(t);
+       ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
+                                        ext4_li_info, "ext4lazyinit");
+       if (IS_ERR(ext4_lazyinit_task)) {
+               int err = PTR_ERR(ext4_lazyinit_task);
                 ext4_clear_request_list();
-               del_timer_sync(&ext4_li_info->li_timer);
                 kfree(ext4_li_info);
                 ext4_li_info = NULL;
                 printk(KERN_CRIT "EXT4: error %d creating inode table "
@@ -2885,9 +2895,7 @@ static int ext4_li_info_new(void)
         INIT_LIST_HEAD(&eli->li_request_list);
         mutex_init(&eli->li_list_mtx);
  
-       init_waitqueue_head(&eli->li_wait_daemon);
         init_waitqueue_head(&eli->li_wait_task);
-       init_timer(&eli->li_timer);
         eli->li_state |= EXT4_LAZYINIT_QUIT;
  
         ext4_li_info = eli;
@@ -2962,6 +2970,12 @@ static int ext4_register_li_request(struct super_block *sb,
         mutex_unlock(&ext4_li_info->li_list_mtx);
  
         sbi->s_li_request = elr;
+       /*
+        * set elr to NULL here since it has been inserted to
+        * the request_list and the removal and free of it is
+        * handled by ext4_clear_request_list from now on.
+        */
+       elr = NULL;
  
         if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
                 ret = ext4_run_lazyinit_thread();
@@ -2985,16 +2999,10 @@ static void ext4_destroy_lazyinit_thread(void)
          * If thread exited earlier
          * there's nothing to be done.
          */
-       if (!ext4_li_info)
+       if (!ext4_li_info || !ext4_lazyinit_task)
                 return;
  
-       ext4_clear_request_list();
-
-       while (ext4_li_info->li_task) {
-               wake_up(&ext4_li_info->li_wait_daemon);
-               wait_event(ext4_li_info->li_wait_task,
-                          ext4_li_info->li_task == NULL);
-       }
+       kthread_stop(ext4_lazyinit_task);
  }
  
  static int ext4_fill_super(struct super_block *sb, void *data, int silent)
@@ -3378,6 +3386,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
         spin_lock_init(&sbi->s_next_gen_lock);
  
+       init_timer(&sbi->s_err_report);
+       sbi->s_err_report.function = print_daily_error_info;
+       sbi->s_err_report.data = (unsigned long) sb;
+
         err = percpu_counter_init(&sbi->s_freeblocks_counter,
                         ext4_count_free_blocks(sb));
         if (!err) {
@@ -3633,9 +3645,6 @@ no_journal:
                  "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
                  *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
  
-       init_timer(&sbi->s_err_report);
-       sbi->s_err_report.function = print_daily_error_info;
-       sbi->s_err_report.data = (unsigned long) sb;
         if (es->s_error_count)
                 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
  
@@ -3657,6 +3666,7 @@ failed_mount_wq:
                 sbi->s_journal = NULL;
         }
  failed_mount3:
+       del_timer(&sbi->s_err_report);
         if (sbi->s_flex_groups) {
                 if (is_vmalloc_addr(sbi->s_flex_groups))
                         vfree(sbi->s_flex_groups);
@@ -4768,7 +4778,7 @@ static struct file_system_type ext4_fs_type = {
         .fs_flags       = FS_REQUIRES_DEV,
  };
  
-int __init ext4_init_feat_adverts(void)
+static int __init ext4_init_feat_adverts(void)
  {
         struct ext4_features *ef;
         int ret = -ENOMEM;
@@ -4792,23 +4802,44 @@ out:
         return ret;
  }
  
+static void ext4_exit_feat_adverts(void)
+{
+       kobject_put(&ext4_feat->f_kobj);
+       wait_for_completion(&ext4_feat->f_kobj_unregister);
+       kfree(ext4_feat);
+}
+
+/* Shared across all ext4 file systems */
+wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
+
  static int __init ext4_init_fs(void)
  {
-       int err;
+       int i, err;
  
         ext4_check_flag_values();
+
+       for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
+               mutex_init(&ext4__aio_mutex[i]);
+               init_waitqueue_head(&ext4__ioend_wq[i]);
+       }
+
         err = ext4_init_pageio();
         if (err)
                 return err;
         err = ext4_init_system_zone();
         if (err)
-               goto out5;
+               goto out7;
         ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
         if (!ext4_kset)
-               goto out4;
+               goto out6;
         ext4_proc_root = proc_mkdir("fs/ext4", NULL);
+       if (!ext4_proc_root)
+               goto out5;
  
         err = ext4_init_feat_adverts();
+       if (err)
+               goto out4;
  
         err = ext4_init_mballoc();
         if (err)
@@ -4838,12 +4869,14 @@ out1:
  out2:
         ext4_exit_mballoc();
  out3:
-       kfree(ext4_feat);
+       ext4_exit_feat_adverts();
+out4:
         remove_proc_entry("fs/ext4", NULL);
+out5:
         kset_unregister(ext4_kset);
-out4:
+out6:
         ext4_exit_system_zone();
-out5:
+out7:
         ext4_exit_pageio();
         return err;
  }
@@ -4857,6 +4890,7 @@ static void __exit ext4_exit_fs(void)
         destroy_inodecache();
         ext4_exit_xattr();
         ext4_exit_mballoc();
+       ext4_exit_feat_adverts();
         remove_proc_entry("fs/ext4", NULL);
         kset_unregister(ext4_kset);
         ext4_exit_system_zone();