2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
28 * For further information regarding this notice, see:
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
34 * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI
37 #ifndef __PAGE_BUF_H__
38 #define __PAGE_BUF_H__
40 #include <linux/version.h>
41 #include <linux/config.h>
42 #include <linux/list.h>
43 #include <linux/types.h>
44 #include <linux/spinlock.h>
45 #include <asm/system.h>
48 #include <linux/buffer_head.h>
49 #include <linux/uio.h>
52 * Turn this on to get pagebuf lock ownership
53 #define PAGEBUF_LOCK_TRACKING
60 /* daddr must be signed since -1 is used for bmaps that are not yet allocated */
61 typedef loff_t page_buf_daddr_t;
63 #define PAGE_BUF_DADDR_NULL ((page_buf_daddr_t) (-1LL))
65 typedef size_t page_buf_dsize_t; /* size of buffer in blocks */
67 #define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
68 #define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
69 #define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
70 #define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
72 typedef enum page_buf_rw_e {
73 PBRW_READ = 1, /* transfer into target memory */
74 PBRW_WRITE = 2, /* transfer from target memory */
75 PBRW_ZERO = 3 /* Zero target memory */
78 typedef enum { /* pbm_flags values */
79 PBMF_EOF = 0x01, /* mapping contains EOF */
80 PBMF_HOLE = 0x02, /* mapping covers a hole */
81 PBMF_DELAY = 0x04, /* mapping covers delalloc region */
82 PBMF_UNWRITTEN = 0x20 /* mapping covers allocated */
83 /* but uninitialized file data */
87 /* base extent manipulation calls */
88 BMAP_READ = (1 << 0), /* read extents */
89 BMAP_WRITE = (1 << 1), /* create extents */
90 BMAP_ALLOCATE = (1 << 2), /* delayed allocate to real extents */
91 BMAP_UNWRITTEN = (1 << 3), /* unwritten extents to real extents */
93 BMAP_IGNSTATE = (1 << 4), /* ignore unwritten state on read */
94 BMAP_DIRECT = (1 << 5), /* direct instead of buffered write */
95 BMAP_MMAP = (1 << 6), /* allocate for mmap write */
96 BMAP_SYNC = (1 << 7), /* sync write */
97 BMAP_TRYLOCK = (1 << 8), /* non-blocking request */
100 typedef enum page_buf_flags_e { /* pb_flags values */
101 PBF_READ = (1 << 0), /* buffer intended for reading from device */
102 PBF_WRITE = (1 << 1), /* buffer intended for writing to device */
103 PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */
104 PBF_PARTIAL = (1 << 3), /* buffer partially read */
105 PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
106 PBF_NONE = (1 << 5), /* buffer not read at all */
107 PBF_DELWRI = (1 << 6), /* buffer has dirty pages */
108 PBF_FREED = (1 << 7), /* buffer has been freed and is invalid */
109 PBF_SYNC = (1 << 8), /* force updates to disk */
110 PBF_MAPPABLE = (1 << 9),/* use directly-addressable pages */
111 PBF_STALE = (1 << 10), /* buffer has been staled, do not find it */
112 PBF_FS_MANAGED = (1 << 11), /* filesystem controls freeing memory */
113 PBF_FS_DATAIOD = (1 << 12), /* schedule IO completion on fs datad */
115 /* flags used only as arguments to access routines */
116 PBF_LOCK = (1 << 13), /* lock requested */
117 PBF_TRYLOCK = (1 << 14), /* lock requested, but do not wait */
118 PBF_DONT_BLOCK = (1 << 15), /* do not block in current thread */
120 /* flags used only internally */
121 _PBF_LOCKABLE = (1 << 16), /* page_buf_t may be locked */
122 _PBF_PRIVATE_BH = (1 << 17), /* do not use public buffer heads */
123 _PBF_ALL_PAGES_MAPPED = (1 << 18), /* all pages in range mapped */
124 _PBF_ADDR_ALLOCATED = (1 << 19), /* pb_addr space was allocated */
125 _PBF_MEM_ALLOCATED = (1 << 20), /* pb_mem+underlying pages alloc'd */
127 PBF_FORCEIO = (1 << 21),
128 PBF_FLUSH = (1 << 22), /* flush disk write cache */
129 PBF_READ_AHEAD = (1 << 23),
133 #define PBF_UPDATE (PBF_READ | PBF_WRITE)
134 #define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
135 #define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
137 #define PBR_SECTOR_ONLY 1 /* only use sector size buffer heads */
138 #define PBR_ALIGNED_ONLY 2 /* only use aligned I/O */
140 typedef struct pb_target {
142 struct block_device *pbr_bdev;
143 struct address_space *pbr_mapping;
144 unsigned int pbr_bsize;
145 unsigned int pbr_sshift;
150 * page_buf_bmap_t: File system I/O map
152 * The pbm_bn, pbm_offset and pbm_length fields are expressed in disk blocks.
153 * The pbm_length field specifies the size of the underlying backing store
154 * for the particular mapping.
156 * The pbm_bsize, pbm_size and pbm_delta fields are in bytes and indicate
157 * the size of the mapping, the number of bytes that are valid to access
158 * (read or write), and the offset into the mapping, given the offset
159 * supplied to the file I/O map routine. pbm_delta is the offset of the
160 * desired data from the beginning of the mapping.
162 * When a request is made to read beyond the logical end of the object,
163 * pbm_size may be set to 0, but pbm_offset and pbm_length should be set to
164 * the actual amount of underlying storage that has been allocated, if any.
167 typedef struct page_buf_bmap_s {
168 page_buf_daddr_t pbm_bn; /* block number in file system */
169 pb_target_t *pbm_target; /* device to do I/O to */
170 loff_t pbm_offset; /* byte offset of mapping in file */
171 size_t pbm_delta; /* offset of request into bmap */
172 size_t pbm_bsize; /* size of this mapping in bytes */
173 bmap_flags_t pbm_flags; /* options flags for mapping */
176 typedef page_buf_bmap_t pb_bmap_t;
180 * page_buf_t: Buffer structure for page cache-based buffers
182 * This buffer structure is used by the page cache buffer management routines
183 * to refer to an assembly of pages forming a logical buffer. The actual
184 * I/O is performed with buffer_head or bio structures, as required by drivers,
185 * for drivers which do not understand this structure. The buffer structure is
186 * used on temporary basis only, and discarded when released.
188 * The real data storage is recorded in the page cache. Metadata is
189 * hashed to the inode for the block device on which the file system resides.
190 * File data is hashed to the inode for the file. Pages which are only
191 * partially filled with data have bits set in their block_map entry
192 * to indicate which disk blocks in the page are not valid.
196 typedef void (*page_buf_iodone_t)(struct page_buf_s *);
197 /* call-back function on I/O completion */
198 typedef void (*page_buf_relse_t)(struct page_buf_s *);
199 /* call-back function on I/O completion */
200 typedef int (*page_buf_bdstrat_t)(struct page_buf_s *);
204 typedef struct page_buf_s {
205 struct semaphore pb_sema; /* semaphore for lockables */
206 unsigned long pb_flushtime; /* time to flush pagebuf */
207 atomic_t pb_pin_count; /* pin count */
208 wait_queue_head_t pb_waiters; /* unpin waiters */
209 struct list_head pb_list;
210 page_buf_flags_t pb_flags; /* status flags */
211 struct list_head pb_hash_list;
212 struct pb_target *pb_target; /* logical object */
213 atomic_t pb_hold; /* reference count */
214 page_buf_daddr_t pb_bn; /* block number for I/O */
215 loff_t pb_file_offset; /* offset in file */
216 size_t pb_buffer_length; /* size of buffer in bytes */
217 size_t pb_count_desired; /* desired transfer size */
218 void *pb_addr; /* virtual address of buffer */
219 struct work_struct pb_iodone_work;
220 atomic_t pb_io_remaining;/* #outstanding I/O requests */
221 page_buf_iodone_t pb_iodone; /* I/O completion function */
222 page_buf_relse_t pb_relse; /* releasing function */
223 page_buf_bdstrat_t pb_strat; /* pre-write function */
224 struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */
228 unsigned short pb_error; /* error code on I/O */
229 unsigned short pb_page_count; /* size of page array */
230 unsigned short pb_offset; /* page offset in first page */
231 unsigned char pb_locked; /* page array is locked */
232 unsigned char pb_hash_index; /* hash table index */
233 struct page **pb_pages; /* array of page pointers */
234 struct page *pb_page_array[PB_PAGES]; /* inline pages */
235 #ifdef PAGEBUF_LOCK_TRACKING
242 * page_buf module entry points
245 /* Finding and Reading Buffers */
247 extern page_buf_t *pagebuf_find( /* find buffer for block if */
248 /* the block is in memory */
249 struct pb_target *, /* inode for block */
250 loff_t, /* starting offset of range */
251 size_t, /* length of range */
252 page_buf_flags_t); /* PBF_LOCK */
254 extern page_buf_t *pagebuf_get( /* allocate a buffer */
255 struct pb_target *, /* inode for buffer */
256 loff_t, /* starting offset of range */
257 size_t, /* length of range */
258 page_buf_flags_t); /* PBF_LOCK, PBF_READ, */
261 extern page_buf_t *pagebuf_lookup(
263 loff_t, /* starting offset of range */
264 size_t, /* length of range */
265 page_buf_flags_t); /* PBF_READ, PBF_WRITE, */
266 /* PBF_FORCEIO, _PBF_LOCKABLE */
268 extern page_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */
269 /* no memory or disk address */
270 struct pb_target *); /* mount point "fake" inode */
272 extern page_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */
273 /* without disk address */
275 struct pb_target *); /* mount point "fake" inode */
277 extern int pagebuf_associate_memory(
283 extern void pagebuf_hold( /* increment reference count */
284 page_buf_t *); /* buffer to hold */
286 extern void pagebuf_readahead( /* read ahead into cache */
287 struct pb_target *, /* target for buffer (or NULL) */
288 loff_t, /* starting offset of range */
289 size_t, /* length of range */
290 page_buf_flags_t); /* additional read flags */
292 /* Writing and Releasing Buffers */
294 extern void pagebuf_free( /* deallocate a buffer */
295 page_buf_t *); /* buffer to deallocate */
297 extern void pagebuf_rele( /* release hold on a buffer */
298 page_buf_t *); /* buffer to release */
300 /* Locking and Unlocking Buffers */
302 extern int pagebuf_cond_lock( /* lock buffer, if not locked */
303 /* (returns -EBUSY if locked) */
304 page_buf_t *); /* buffer to lock */
306 extern int pagebuf_lock_value( /* return count on lock */
307 page_buf_t *); /* buffer to check */
309 extern int pagebuf_lock( /* lock buffer */
310 page_buf_t *); /* buffer to lock */
312 extern void pagebuf_unlock( /* unlock buffer */
313 page_buf_t *); /* buffer to unlock */
315 /* Buffer Utility Routines */
316 static inline int pagebuf_geterror(page_buf_t *pb)
318 return (pb ? pb->pb_error : ENOMEM);
321 extern void pagebuf_iodone( /* mark buffer I/O complete */
322 page_buf_t *, /* buffer to mark */
323 int, /* use data/log helper thread. */
324 int); /* run completion locally, or in
325 * a helper thread. */
327 extern void pagebuf_ioerror( /* mark buffer in error (or not) */
328 page_buf_t *, /* buffer to mark */
329 unsigned int); /* error to store (0 if none) */
331 extern int pagebuf_iostart( /* start I/O on a buffer */
332 page_buf_t *, /* buffer to start */
333 page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */
334 /* PBF_READ, PBF_WRITE, */
335 /* PBF_DELWRI, PBF_SYNC */
337 extern int pagebuf_iorequest( /* start real I/O */
338 page_buf_t *); /* buffer to convey to device */
341 * pagebuf_iorequest is the core I/O request routine.
342 * It assumes that the buffer is well-formed and
343 * mapped and ready for physical I/O, unlike
344 * pagebuf_iostart() and pagebuf_iophysio(). Those
345 * routines call the inode pagebuf_ioinitiate routine to start I/O,
346 * if it is present, or else call pagebuf_iorequest()
347 * directly if the inode pagebuf_ioinitiate routine is not present.
350 extern int pagebuf_iowait( /* wait for buffer I/O done */
351 page_buf_t *); /* buffer to wait on */
353 extern caddr_t pagebuf_offset(page_buf_t *, size_t);
355 extern void pagebuf_iomove( /* move data in/out of pagebuf */
356 page_buf_t *, /* buffer to manipulate */
357 size_t, /* starting buffer offset */
358 size_t, /* length in buffer */
359 caddr_t, /* data pointer */
360 page_buf_rw_t); /* direction */
362 /* Pinning Buffer Storage in Memory */
364 extern void pagebuf_pin( /* pin buffer in memory */
365 page_buf_t *); /* buffer to pin */
367 extern void pagebuf_unpin( /* unpin buffered data */
368 page_buf_t *); /* buffer to unpin */
370 extern int pagebuf_ispin( page_buf_t *); /* check if pagebuf is pinned */
372 /* Reading and writing pages */
374 extern void pagebuf_delwri_dequeue(page_buf_t *);
376 #define PBDF_WAIT 0x01
377 #define PBDF_TRYLOCK 0x02
378 extern void pagebuf_delwri_flush(
383 extern int pagebuf_init(void);
384 extern void pagebuf_terminate(void);
386 static __inline__ int __pagebuf_iorequest(page_buf_t *pb)
389 return pb->pb_strat(pb);
390 return pagebuf_iorequest(pb);
393 static __inline__ void pagebuf_run_queues(page_buf_t *pb)
395 if (!pb || atomic_read(&pb->pb_io_remaining))
399 #endif /* __PAGE_BUF_H__ */