2 * JFFS2 -- Journalling Flash File System, Version 2.
4 * Copyright (C) 2001, 2002 Red Hat, Inc.
6 * Created by David Woodhouse <dwmw2@cambridge.redhat.com>
8 * For licensing information, see the file 'LICENCE' in this directory.
10 * $Id: gc.c,v 1.103 2003/05/22 18:01:02 dwmw2 Exp $
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
23 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
24 struct jffs2_inode_cache *ic,
25 struct jffs2_raw_node_ref *raw);
26 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
27 struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
28 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
29 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
30 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
31 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
34 uint32_t start, uint32_t end);
35 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
36 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
37 uint32_t start, uint32_t end);
38 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
39 struct jffs2_raw_node_ref *raw, struct jffs2_inode_cache *ic);
41 /* Called with erase_completion_lock held */
42 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44 struct jffs2_eraseblock *ret;
45 struct list_head *nextlist = NULL;
46 int n = jiffies % 128;
48 /* Pick an eraseblock to garbage collect next. This is where we'll
49 put the clever wear-levelling algorithms. Eventually. */
50 /* We possibly want to favour the dirtier blocks more when the
51 number of free blocks is low. */
52 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > JFFS2_RESERVED_BLOCKS_GCBAD) {
53 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
54 nextlist = &c->bad_used_list;
55 } else if (n < 50 && !list_empty(&c->erasable_list)) {
56 /* Note that most of them will have gone directly to be erased.
57 So don't favour the erasable_list _too_ much. */
58 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
59 nextlist = &c->erasable_list;
60 } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
61 /* Most of the time, pick one off the very_dirty list */
62 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
63 nextlist = &c->very_dirty_list;
64 } else if (n < 126 && !list_empty(&c->dirty_list)) {
65 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
66 nextlist = &c->dirty_list;
67 } else if (!list_empty(&c->clean_list)) {
68 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
69 nextlist = &c->clean_list;
70 } else if (!list_empty(&c->dirty_list)) {
71 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
73 nextlist = &c->dirty_list;
74 } else if (!list_empty(&c->very_dirty_list)) {
75 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
76 nextlist = &c->very_dirty_list;
77 } else if (!list_empty(&c->erasable_list)) {
78 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
80 nextlist = &c->erasable_list;
82 /* Eep. All were empty */
83 printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
87 ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
90 ret->gc_node = ret->first_node;
92 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
96 /* Have we accidentally picked a clean block with wasted space ? */
97 if (ret->wasted_size) {
98 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
99 ret->dirty_size += ret->wasted_size;
100 c->wasted_size -= ret->wasted_size;
101 c->dirty_size += ret->wasted_size;
102 ret->wasted_size = 0;
105 D1(jffs2_dump_block_lists(c));
109 /* jffs2_garbage_collect_pass
110 * Make a single attempt to progress GC. Move one node, and possibly
111 * start erasing one eraseblock.
113 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
115 struct jffs2_inode_cache *ic;
116 struct jffs2_eraseblock *jeb;
117 struct jffs2_raw_node_ref *raw;
121 if (down_interruptible(&c->alloc_sem))
125 spin_lock(&c->erase_completion_lock);
126 if (!c->unchecked_size)
129 /* We can't start doing GC yet. We haven't finished checking
130 the node CRCs etc. Do it now. */
132 /* checked_ino is protected by the alloc_sem */
133 if (c->checked_ino > c->highest_ino) {
134 printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
136 D1(jffs2_dump_block_lists(c));
137 spin_unlock(&c->erase_completion_lock);
141 spin_unlock(&c->erase_completion_lock);
143 spin_lock(&c->inocache_lock);
145 ic = jffs2_get_ino_cache(c, c->checked_ino++);
148 spin_unlock(&c->inocache_lock);
153 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
155 spin_unlock(&c->inocache_lock);
159 case INO_STATE_CHECKEDABSENT:
160 case INO_STATE_PRESENT:
161 D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
162 spin_unlock(&c->inocache_lock);
166 case INO_STATE_CHECKING:
167 printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
168 spin_unlock(&c->inocache_lock);
171 case INO_STATE_READING:
172 /* We need to wait for it to finish, lest we move on
173 and trigger the BUG() above while we haven't yet
174 finished checking all its nodes */
175 D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
177 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
183 case INO_STATE_UNCHECKED:
186 ic->state = INO_STATE_CHECKING;
187 spin_unlock(&c->inocache_lock);
189 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%d\n", ic->ino));
191 ret = jffs2_do_crccheck_inode(c, ic);
193 printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
195 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
200 /* First, work out which block we're garbage-collecting */
204 jeb = jffs2_find_gc_block(c);
207 printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n");
208 spin_unlock(&c->erase_completion_lock);
213 D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
215 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
217 if (!jeb->used_size) {
224 while(ref_obsolete(raw)) {
225 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
226 jeb->gc_node = raw = raw->next_phys;
228 printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
229 printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
230 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
231 spin_unlock(&c->erase_completion_lock);
236 D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
237 if (!raw->next_in_ino) {
238 /* Inode-less node. Clean marker, snapshot or something like that */
239 /* FIXME: If it's something that needs to be copied, including something
240 we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
241 spin_unlock(&c->erase_completion_lock);
242 jffs2_mark_node_obsolete(c, raw);
247 inum = jffs2_raw_ref_to_inum(raw);
248 D1(printk(KERN_DEBUG "Inode number is #%u\n", inum));
250 spin_unlock(&c->erase_completion_lock);
252 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), inum));
254 /* Three possibilities:
255 1. Inode is already in-core. We must iget it and do proper
256 updating to its fragtree, etc.
257 2. Inode is not in-core, node is REF_PRISTINE. We lock the
258 inocache to prevent a read_inode(), copy the node intact.
259 3. Inode is not in-core, node is not pristine. We must iget()
260 and take the slow path.
262 spin_lock(&c->inocache_lock);
263 ic = jffs2_get_ino_cache(c, inum);
265 /* This should never fail unless I'm particularly stupid.
266 So we don't check before dereferencing it */
269 case INO_STATE_CHECKEDABSENT:
270 /* It's been checked, but it's not currently in-core.
271 We can just copy any pristine nodes, but have
272 to prevent anyone else from doing read_inode() while
273 we're at it, so we set the state accordingly */
274 if (ref_flags(raw) == REF_PRISTINE)
275 ic->state = INO_STATE_GC;
277 D1(printk("Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
282 case INO_STATE_PRESENT:
283 case INO_STATE_UNCHECKED:
284 /* It's in-core or hasn't been checked. GC must iget() it. */
287 case INO_STATE_CHECKING:
288 /* Should never happen. We should have finished checking
289 by the time we actually start doing any GC. */
294 /* Should never happen. We are holding the alloc_sem,
295 no other garbage collection can happen. Note that we
296 do depend on this later when deciding to do a simple
300 case INO_STATE_READING:
301 /* Someone's currently trying to read it. We must wait for
302 them to finish and then go through the full iget() route
303 to do the GC. However, sometimes read_inode() needs to get
304 the alloc_sem() (for marking nodes invalid) so we must
305 drop the alloc_sem before sleeping. */
308 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
310 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
311 /* And because we dropped the alloc_sem we must start again from the
312 beginning. Ponder chance of livelock here -- we're returning success
313 without actually making any progress.
315 Q: What are the chances that the inode is back in INO_STATE_READING
316 again by the time we next enter this function? And that this happens
317 enough times to cause a real delay?
319 A: Small enough that I don't care :)
325 spin_unlock(&c->inocache_lock);
327 /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
328 node intact, and we don't have to muck about with the fragtree etc.
329 because we know it's not in-core. If it _was_ in-core, we go through
330 all the iget() crap anyway */
332 if (ic->state == INO_STATE_GC) {
333 ret = jffs2_garbage_collect_pristine(c, ic, raw);
334 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
339 /* Fall through if it wanted us to */
342 ret = jffs2_garbage_collect_live(c, jeb, raw, ic);
348 /* If we've finished this block, start it erasing */
349 spin_lock(&c->erase_completion_lock);
352 if (c->gcblock && !c->gcblock->used_size) {
353 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
354 /* We're GC'ing an empty block? */
355 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
357 c->nr_erasing_blocks++;
358 jffs2_erase_pending_trigger(c);
360 spin_unlock(&c->erase_completion_lock);
366 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
367 struct jffs2_raw_node_ref *raw, struct jffs2_inode_cache *ic)
369 struct jffs2_inode_info *f;
370 struct jffs2_node_frag *frag;
371 struct jffs2_full_dnode *fn = NULL;
372 struct jffs2_full_dirent *fd;
373 uint32_t start = 0, end = 0, nrfrags = 0;
377 inode = iget(OFNI_BS_2SFFJ(c), ic->ino);
378 if (is_bad_inode(inode)) {
379 printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u\n", ic->ino);
380 /* NB. This will happen again. We need to do something appropriate here. */
386 f = JFFS2_INODE_INFO(inode);
389 /* Now we have the lock for this inode. Check that it's still the one at the head
392 if (ref_obsolete(raw)) {
393 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
394 /* They'll call again */
397 /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
398 if (f->metadata && f->metadata->raw == raw) {
400 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
404 /* FIXME. Read node and do lookup? */
405 for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
406 if (frag->node && frag->node->raw == raw) {
408 end = frag->ofs + frag->size;
409 #if 1 /* Temporary debugging sanity checks, till we're ready to _trust_ the REF_PRISTINE flag stuff */
410 if (!nrfrags && ref_flags(fn->raw) == REF_PRISTINE) {
412 printk(KERN_WARNING "REF_PRISTINE node at 0x%08x had %d frags. Tell dwmw2\n", ref_offset(raw), fn->frags);
413 mark_ref_normal(raw);
415 /* A hole node which isn't multi-page should be garbage-collected
416 and merged anyway, so we just check for the frag size here,
417 rather than mucking around with actually reading the node
418 and checking the compression type, which is the real way
419 to tell a hole node. */
420 if (frag->ofs & (PAGE_CACHE_SIZE-1) && frag_prev(frag) && frag_prev(frag)->size < PAGE_CACHE_SIZE) {
421 printk(KERN_WARNING "REF_PRISTINE node at 0x%08x had a previous non-hole frag in the same page. Tell dwmw2\n",
423 mark_ref_normal(raw);
426 if ((frag->ofs+frag->size) & (PAGE_CACHE_SIZE-1) && frag_next(frag) && frag_next(frag)->size < PAGE_CACHE_SIZE) {
427 printk(KERN_WARNING "REF_PRISTINE node at 0x%08x (%08x-%08x) had a following non-hole frag in the same page. Tell dwmw2\n",
428 ref_offset(raw), frag->ofs, frag->ofs+frag->size);
429 mark_ref_normal(raw);
435 if (nrfrags == frag->node->frags)
436 break; /* We've found them all */
440 if (ref_flags(raw) == REF_PRISTINE) {
441 ret = jffs2_garbage_collect_pristine(c, ic, raw);
443 /* Urgh. Return it sensibly. */
444 frag->node->raw = ic->nodes;
449 /* We found a datanode. Do the GC */
450 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
451 /* It crosses a page boundary. Therefore, it must be a hole. */
452 ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
454 /* It could still be a hole. But we GC the page this way anyway */
455 ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
460 /* Wasn't a dnode. Try dirent */
461 for (fd = f->dents; fd; fd=fd->next) {
467 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
469 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
471 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
472 ref_offset(raw), f->inocache->ino);
473 if (ref_obsolete(raw)) {
474 printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
486 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
487 struct jffs2_inode_cache *ic,
488 struct jffs2_raw_node_ref *raw)
490 union jffs2_node_union *node;
491 struct jffs2_raw_node_ref *nraw;
494 uint32_t phys_ofs, alloclen;
497 D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
499 /* Ask for a small amount of space (or the totlen if smaller) because we
500 don't want to force wastage of the end of a block if splitting would
502 ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN, raw->totlen),
503 &phys_ofs, &alloclen);
507 if (alloclen < raw->totlen) {
508 /* Doesn't fit untouched. We'll go the old route and split it */
512 node = kmalloc(raw->totlen, GFP_KERNEL);
516 ret = jffs2_flash_read(c, ref_offset(raw), raw->totlen, &retlen, (char *)node);
517 if (!ret && retlen != raw->totlen)
522 crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
523 if (je32_to_cpu(node->u.hdr_crc) != crc) {
524 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
525 ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
529 switch(je16_to_cpu(node->u.nodetype)) {
530 case JFFS2_NODETYPE_INODE:
531 crc = crc32(0, node, sizeof(node->i)-8);
532 if (je32_to_cpu(node->i.node_crc) != crc) {
533 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
534 ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
538 if (je32_to_cpu(node->i.dsize)) {
539 crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
540 if (je32_to_cpu(node->i.data_crc) != crc) {
541 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
542 ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
548 case JFFS2_NODETYPE_DIRENT:
549 crc = crc32(0, node, sizeof(node->d)-8);
550 if (je32_to_cpu(node->d.node_crc) != crc) {
551 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
552 ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
557 crc = crc32(0, node->d.name, node->d.nsize);
558 if (je32_to_cpu(node->d.name_crc) != crc) {
559 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
560 ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
566 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
567 ref_offset(raw), je16_to_cpu(node->u.nodetype));
571 nraw = jffs2_alloc_raw_node_ref();
576 nraw->flash_offset = phys_ofs;
577 nraw->totlen = raw->totlen;
578 nraw->next_phys = NULL;
580 /* OK, all the CRCs are good; this node can just be copied as-is. */
582 ret = jffs2_flash_write(c, phys_ofs, raw->totlen, &retlen, (char *)node);
583 if (ret || (retlen != raw->totlen)) {
584 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
585 raw->totlen, phys_ofs, ret, retlen);
587 /* Doesn't belong to any inode */
588 nraw->next_in_ino = NULL;
590 nraw->flash_offset |= REF_OBSOLETE;
591 jffs2_add_physical_node_ref(c, nraw);
592 jffs2_mark_node_obsolete(c, nraw);
594 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
595 jffs2_free_raw_node_ref(raw);
601 nraw->flash_offset |= REF_PRISTINE;
602 jffs2_add_physical_node_ref(c, nraw);
604 /* Link into per-inode list. This is safe because of the ic
605 state being INO_STATE_GC. Note that if we're doing this
606 for an inode which is in-code, the 'nraw' pointer is then
607 going to be fetched from ic->nodes by our caller. */
608 nraw->next_in_ino = ic->nodes;
611 jffs2_mark_node_obsolete(c, raw);
612 D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
622 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
623 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
625 struct jffs2_full_dnode *new_fn;
626 struct jffs2_raw_inode ri;
628 char *mdata = NULL, mdatalen = 0;
629 uint32_t alloclen, phys_ofs;
632 if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
633 S_ISCHR(JFFS2_F_I_MODE(f)) ) {
634 /* For these, we don't actually need to read the old node */
635 /* FIXME: for minor or major > 255. */
636 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) |
637 JFFS2_F_I_RDEV_MIN(f)));
638 mdata = (char *)&dev;
639 mdatalen = sizeof(dev);
640 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
641 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
643 mdata = kmalloc(fn->size, GFP_KERNEL);
645 printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
648 ret = jffs2_read_dnode(c, fn, mdata, 0, mdatalen);
650 printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
654 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
658 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen);
660 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
661 sizeof(ri)+ mdatalen, ret);
665 memset(&ri, 0, sizeof(ri));
666 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
667 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
668 ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
669 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
671 ri.ino = cpu_to_je32(f->inocache->ino);
672 ri.version = cpu_to_je32(++f->highest_version);
673 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
674 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
675 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
676 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
677 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
678 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
679 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
680 ri.offset = cpu_to_je32(0);
681 ri.csize = cpu_to_je32(mdatalen);
682 ri.dsize = cpu_to_je32(mdatalen);
683 ri.compr = JFFS2_COMPR_NONE;
684 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
685 ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
687 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, NULL);
689 if (IS_ERR(new_fn)) {
690 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
691 ret = PTR_ERR(new_fn);
694 jffs2_mark_node_obsolete(c, fn->raw);
695 jffs2_free_full_dnode(fn);
696 f->metadata = new_fn;
698 if (S_ISLNK(JFFS2_F_I_MODE(f)))
703 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
704 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
706 struct jffs2_full_dirent *new_fd;
707 struct jffs2_raw_dirent rd;
708 uint32_t alloclen, phys_ofs;
711 rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
712 rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
713 rd.nsize = strlen(fd->name);
714 rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
715 rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
717 rd.pino = cpu_to_je32(f->inocache->ino);
718 rd.version = cpu_to_je32(++f->highest_version);
719 rd.ino = cpu_to_je32(fd->ino);
720 rd.mctime = cpu_to_je32(max(JFFS2_F_I_MTIME(f), JFFS2_F_I_CTIME(f)));
722 rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
723 rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
725 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen);
727 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
728 sizeof(rd)+rd.nsize, ret);
731 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, NULL);
733 if (IS_ERR(new_fd)) {
734 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
735 return PTR_ERR(new_fd);
737 jffs2_add_fd_to_list(c, new_fd, &f->dents);
741 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
742 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
744 struct jffs2_full_dirent **fdp = &f->dents;
747 /* On a medium where we can't actually mark nodes obsolete
748 pernamently, such as NAND flash, we need to work out
749 whether this deletion dirent is still needed to actively
750 delete a 'real' dirent with the same name that's still
751 somewhere else on the flash. */
752 if (!jffs2_can_mark_obsolete(c)) {
753 struct jffs2_raw_dirent rd;
754 struct jffs2_raw_node_ref *raw;
757 int name_len = strlen(fd->name);
758 uint32_t name_crc = crc32(0, fd->name, name_len);
759 char *namebuf = NULL;
761 /* Prevent the erase code from nicking the obsolete node refs while
762 we're looking at them. I really don't like this extra lock but
763 can't see any alternative. Suggestions on a postcard to... */
764 down(&c->erase_free_sem);
766 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
767 /* We only care about obsolete ones */
768 if (!(ref_obsolete(raw)))
771 /* Doesn't matter if there's one in the same erase block. We're going to
772 delete it too at the same time. */
773 if ((raw->flash_offset & ~(c->sector_size-1)) ==
774 (fd->raw->flash_offset & ~(c->sector_size-1)))
777 /* This is an obsolete node belonging to the same directory */
778 ret = jffs2_flash_read(c, ref_offset(raw), sizeof(struct jffs2_unknown_node), &retlen, (char *)&rd);
780 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading header from obsolete node at %08x\n", ret, ref_offset(raw));
781 /* If we can't read it, we don't need to continue to obsolete it. Continue */
784 if (retlen != sizeof(struct jffs2_unknown_node)) {
785 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %zd) reading header from obsolete node at %08x\n",
786 retlen, sizeof(struct jffs2_unknown_node), ref_offset(raw));
789 if (je16_to_cpu(rd.nodetype) != JFFS2_NODETYPE_DIRENT ||
790 PAD(je32_to_cpu(rd.totlen)) != PAD(sizeof(rd) + name_len))
793 /* OK, it's a dirent node, it's the right length. We have to take a
794 closer look at it... */
795 ret = jffs2_flash_read(c, ref_offset(raw), sizeof(rd), &retlen, (char *)&rd);
797 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading from obsolete node at %08x\n", ret, ref_offset(raw));
798 /* If we can't read it, we don't need to continune to obsolete it. Continue */
801 if (retlen != sizeof(rd)) {
802 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %zd) reading from obsolete node at %08x\n",
803 retlen, sizeof(rd), ref_offset(raw));
807 /* If the name CRC doesn't match, skip */
808 if (je32_to_cpu(rd.name_crc) != name_crc)
810 /* If the name length doesn't match, or it's another deletion dirent, skip */
811 if (rd.nsize != name_len || !je32_to_cpu(rd.ino))
814 /* OK, check the actual name now */
816 namebuf = kmalloc(name_len + 1, GFP_KERNEL);
818 up(&c->erase_free_sem);
822 /* We read the extra byte before it so it's a word-aligned read */
823 ret = jffs2_flash_read(c, (ref_offset(raw))+sizeof(rd)-1, name_len+1, &retlen, namebuf);
825 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading name from obsolete node at %08x\n", ret, ref_offset(raw));
826 /* If we can't read it, we don't need to continune to obsolete it. Continue */
829 if (retlen != name_len+1) {
830 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %d) reading name from obsolete node at %08x\n",
831 retlen, name_len+1, ref_offset(raw));
834 if (memcmp(namebuf+1, fd->name, name_len))
837 /* OK. The name really does match. There really is still an older node on
838 the flash which our deletion dirent obsoletes. So we have to write out
839 a new deletion dirent to replace it */
844 up(&c->erase_free_sem);
845 return jffs2_garbage_collect_dirent(c, jeb, f, fd);
848 up(&c->erase_free_sem);
854 /* No need for it any more. Just mark it obsolete and remove it from the list */
864 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
866 jffs2_mark_node_obsolete(c, fd->raw);
867 jffs2_free_full_dirent(fd);
871 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
872 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
873 uint32_t start, uint32_t end)
875 struct jffs2_raw_inode ri;
876 struct jffs2_node_frag *frag;
877 struct jffs2_full_dnode *new_fn;
878 uint32_t alloclen, phys_ofs;
881 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
882 f->inocache->ino, start, end));
884 memset(&ri, 0, sizeof(ri));
889 /* It's partially obsoleted by a later write. So we have to
890 write it out again with the _same_ version as before */
891 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
892 if (readlen != sizeof(ri) || ret) {
893 printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
896 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
897 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
899 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
902 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
903 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
905 je32_to_cpu(ri.totlen), sizeof(ri));
908 crc = crc32(0, &ri, sizeof(ri)-8);
909 if (crc != je32_to_cpu(ri.node_crc)) {
910 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
912 je32_to_cpu(ri.node_crc), crc);
913 /* FIXME: We could possibly deal with this by writing new holes for each frag */
914 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
915 start, end, f->inocache->ino);
918 if (ri.compr != JFFS2_COMPR_ZERO) {
919 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
920 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
921 start, end, f->inocache->ino);
926 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
927 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
928 ri.totlen = cpu_to_je32(sizeof(ri));
929 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
931 ri.ino = cpu_to_je32(f->inocache->ino);
932 ri.version = cpu_to_je32(++f->highest_version);
933 ri.offset = cpu_to_je32(start);
934 ri.dsize = cpu_to_je32(end - start);
935 ri.csize = cpu_to_je32(0);
936 ri.compr = JFFS2_COMPR_ZERO;
938 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
939 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
940 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
941 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
942 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
943 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
944 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
945 ri.data_crc = cpu_to_je32(0);
946 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
948 ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen);
950 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
954 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, NULL);
956 if (IS_ERR(new_fn)) {
957 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
958 return PTR_ERR(new_fn);
960 if (je32_to_cpu(ri.version) == f->highest_version) {
961 jffs2_add_full_dnode_to_inode(c, f, new_fn);
963 jffs2_mark_node_obsolete(c, f->metadata->raw);
964 jffs2_free_full_dnode(f->metadata);
971 * We should only get here in the case where the node we are
972 * replacing had more than one frag, so we kept the same version
973 * number as before. (Except in case of error -- see 'goto fill;'
976 D1(if(unlikely(fn->frags <= 1)) {
977 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
978 fn->frags, je32_to_cpu(ri.version), f->highest_version,
979 je32_to_cpu(ri.ino));
982 for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
983 frag; frag = frag_next(frag)) {
984 if (frag->ofs > fn->size + fn->ofs)
986 if (frag->node == fn) {
993 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
996 if (!new_fn->frags) {
997 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1001 jffs2_mark_node_obsolete(c, fn->raw);
1002 jffs2_free_full_dnode(fn);
1007 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1008 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1009 uint32_t start, uint32_t end)
1011 struct jffs2_full_dnode *new_fn;
1012 struct jffs2_raw_inode ri;
1013 uint32_t alloclen, phys_ofs, offset, orig_end;
1015 unsigned char *comprbuf = NULL, *writebuf;
1017 unsigned char *pg_ptr;
1018 /* FIXME: */ struct inode *inode = OFNI_EDONI_2SFFJ(f);
1020 memset(&ri, 0, sizeof(ri));
1022 D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1023 f->inocache->ino, start, end));
1027 /* If we're looking at the last node in the block we're
1028 garbage-collecting, we allow ourselves to merge as if the
1029 block was already erasing. We're likely to be GC'ing a
1030 partial page, and the next block we GC is likely to have
1031 the other half of this page right at the beginning, which
1032 means we'd expand it _then_, as nr_erasing_blocks would have
1033 increased since we checked, and in doing so would obsolete
1034 the partial node which we'd have written here. Meaning that
1035 the GC would churn and churn, and just leave dirty blocks in
1038 if(c->nr_free_blocks + c->nr_erasing_blocks > JFFS2_RESERVED_BLOCKS_GCMERGE - (fn->raw->next_phys?0:1)) {
1039 /* Shitloads of space */
1040 /* FIXME: Integrate this properly with GC calculations */
1041 start &= ~(PAGE_CACHE_SIZE-1);
1042 end = min_t(uint32_t, start + PAGE_CACHE_SIZE, JFFS2_F_I_SIZE(f));
1043 D1(printk(KERN_DEBUG "Plenty of free space, so expanding to write from offset 0x%x to 0x%x\n",
1045 if (end < orig_end) {
1046 printk(KERN_WARNING "Eep. jffs2_garbage_collect_dnode extended node to write, but it got smaller: start 0x%x, orig_end 0x%x, end 0x%x\n", start, orig_end, end);
1051 /* First, use readpage() to read the appropriate page into the page cache */
1052 /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1053 * triggered garbage collection in the first place?
1054 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1055 * page OK. We'll actually write it out again in commit_write, which is a little
1056 * suboptimal, but at least we're correct.
1059 pg = read_cache_page(start >> PAGE_CACHE_SHIFT, (void *)jffs2_do_readpage_unlock, inode);
1061 pg = read_cache_page(inode->i_mapping, start >> PAGE_CACHE_SHIFT, (void *)jffs2_do_readpage_unlock, inode);
1064 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg));
1067 pg_ptr = (char *)kmap(pg);
1068 comprbuf = kmalloc(end - start, GFP_KERNEL);
1071 while(offset < orig_end) {
1074 char comprtype = JFFS2_COMPR_NONE;
1076 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, &alloclen);
1079 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1080 sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1083 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1084 datalen = end - offset;
1086 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1089 comprtype = jffs2_compress(writebuf, comprbuf, &datalen, &cdatalen);
1092 writebuf = comprbuf;
1096 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1097 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1098 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1099 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1101 ri.ino = cpu_to_je32(f->inocache->ino);
1102 ri.version = cpu_to_je32(++f->highest_version);
1103 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1104 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1105 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1106 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1107 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1108 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1109 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1110 ri.offset = cpu_to_je32(offset);
1111 ri.csize = cpu_to_je32(cdatalen);
1112 ri.dsize = cpu_to_je32(datalen);
1113 ri.compr = comprtype;
1114 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1115 ri.data_crc = cpu_to_je32(crc32(0, writebuf, cdatalen));
1117 new_fn = jffs2_write_dnode(c, f, &ri, writebuf, cdatalen, phys_ofs, NULL);
1119 if (IS_ERR(new_fn)) {
1120 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1121 ret = PTR_ERR(new_fn);
1124 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1127 jffs2_mark_node_obsolete(c, f->metadata->raw);
1128 jffs2_free_full_dnode(f->metadata);
1132 if (comprbuf) kfree(comprbuf);
1135 /* XXX: Does the page get freed automatically? */
1136 /* AAA: Judging by the unmount getting stuck in __wait_on_page, nope. */
1137 page_cache_release(pg);