Newer
Older
/*
* Resizable virtual memory filesystem for Linux.
*
* Copyright (C) 2000 Linus Torvalds.
* 2000 Transmeta Corp.
* 2000-2001 Christoph Rohland
* 2000-2001 SAP AG
* 2002 Red Hat Inc.
* Copyright (C) 2002-2005 Hugh Dickins.
* Copyright (C) 2002-2005 VERITAS Software Corporation.
* Copyright (C) 2004 Andi Kleen, SuSE Labs
*
* Extended attribute support for tmpfs:
* Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
* Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
*
* This file is released under the GPL.
*/
/*
* This virtual memory filesystem is heavily based on the ramfs. It
* extends ramfs by the ability to use swap and honor resource limits
* which makes it a completely usable filesystem.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/generic_acl.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/file.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/shmem_fs.h>
#include <linux/mount.h>
#include <linux/writeback.h>
#include <linux/vfs.h>
#include <linux/blkdev.h>
#include <linux/security.h>
#include <linux/swapops.h>
#include <linux/mempolicy.h>
#include <linux/namei.h>
#include <linux/migrate.h>
#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/div64.h>
#include <asm/pgtable.h>
/* This magic number is used in glibc for posix shared memory */
#define TMPFS_MAGIC 0x01021994
#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
#define SHMEM_PAGEIN VM_READ
#define SHMEM_TRUNCATE VM_WRITE
/* Definition to limit shmem_truncate's steps between cond_rescheds */
#define LATENCY_LIMIT 64
/* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20
/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */
SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
SGP_WRITE, /* may exceed i_size, may allocate page */
};
static unsigned long shmem_default_max_blocks(void)
{
return totalram_pages / 2;
}
static unsigned long shmem_default_max_inodes(void)
{
return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
}
static int shmem_getpage(struct inode *inode, unsigned long idx,
struct page **pagep, enum sgp_type sgp, int *type);
static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
{
/*
* The above definition of ENTRIES_PER_PAGE, and the use of
* BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
* might be reconsidered if it ever diverges from PAGE_SIZE.
Mel Gorman
committed
*
* Mobility flags are masked out as swap vectors cannot move
return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
Mel Gorman
committed
PAGE_CACHE_SHIFT-PAGE_SHIFT);
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
}
static inline void shmem_dir_free(struct page *page)
{
__free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
}
static struct page **shmem_dir_map(struct page *page)
{
return (struct page **)kmap_atomic(page, KM_USER0);
}
static inline void shmem_dir_unmap(struct page **dir)
{
kunmap_atomic(dir, KM_USER0);
}
static swp_entry_t *shmem_swp_map(struct page *page)
{
return (swp_entry_t *)kmap_atomic(page, KM_USER1);
}
static inline void shmem_swp_balance_unmap(void)
{
/*
* When passing a pointer to an i_direct entry, to code which
* also handles indirect entries and so will shmem_swp_unmap,
* we must arrange for the preempt count to remain in balance.
* What kmap_atomic of a lowmem page does depends on config
* and architecture, so pretend to kmap_atomic some lowmem page.
*/
(void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
}
static inline void shmem_swp_unmap(swp_entry_t *entry)
{
kunmap_atomic(entry, KM_USER1);
}
static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
{
return sb->s_fs_info;
}
/*
* shmem_file_setup pre-accounts the whole fixed size of a VM object,
* for shared memory and for shared anonymous (/dev/zero) mappings
* (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
* consistent with the pre-accounting of private mappings ...
*/
static inline int shmem_acct_size(unsigned long flags, loff_t size)
{
return (flags & VM_ACCOUNT)?
security_vm_enough_memory(VM_ACCT(size)): 0;
}
static inline void shmem_unacct_size(unsigned long flags, loff_t size)
{
if (flags & VM_ACCOUNT)
vm_unacct_memory(VM_ACCT(size));
}
/*
* ... whereas tmpfs objects are accounted incrementally as
* pages are allocated, in order to allow huge sparse files.
* shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
* so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
*/
static inline int shmem_acct_block(unsigned long flags)
{
return (flags & VM_ACCOUNT)?
0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE));
}
static inline void shmem_unacct_blocks(unsigned long flags, long pages)
{
if (!(flags & VM_ACCOUNT))
vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
}
static const struct super_operations shmem_ops;
static const struct address_space_operations shmem_aops;
static const struct file_operations shmem_file_operations;
static const struct inode_operations shmem_inode_operations;
static const struct inode_operations shmem_dir_inode_operations;
static const struct inode_operations shmem_special_inode_operations;
static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
.unplug_io_fn = default_unplug_io_fn,
};
static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);
static void shmem_free_blocks(struct inode *inode, long pages)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
spin_lock(&sbinfo->stat_lock);
sbinfo->free_blocks += pages;
inode->i_blocks -= pages*BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
}
}
static int shmem_reserve_inode(struct super_block *sb)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
if (sbinfo->max_inodes) {
spin_lock(&sbinfo->stat_lock);
if (!sbinfo->free_inodes) {
spin_unlock(&sbinfo->stat_lock);
return -ENOSPC;
}
sbinfo->free_inodes--;
spin_unlock(&sbinfo->stat_lock);
}
return 0;
}
static void shmem_free_inode(struct super_block *sb)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
if (sbinfo->max_inodes) {
spin_lock(&sbinfo->stat_lock);
sbinfo->free_inodes++;
spin_unlock(&sbinfo->stat_lock);
}
}
* shmem_recalc_inode - recalculate the size of an inode
* @inode: inode to recalc
*
* We have to calculate the free blocks since the mm can drop
* undirtied hole pages behind our back.
*
* But normally info->alloced == inode->i_mapping->nrpages + info->swapped
* So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
*
* It has to be called with the spinlock held.
*/
static void shmem_recalc_inode(struct inode *inode)
{
struct shmem_inode_info *info = SHMEM_I(inode);
long freed;
freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
if (freed > 0) {
info->alloced -= freed;
shmem_unacct_blocks(info->flags, freed);
shmem_free_blocks(inode, freed);
}
}
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
* shmem_swp_entry - find the swap vector position in the info structure
* @info: info structure for the inode
* @index: index of the page to find
* @page: optional page to add to the structure. Has to be preset to
* all zeros
*
* If there is no space allocated yet it will return NULL when
* page is NULL, else it will use the page for the needed block,
* setting it to NULL on return to indicate that it has been used.
*
* The swap vector is organized the following way:
*
* There are SHMEM_NR_DIRECT entries directly stored in the
* shmem_inode_info structure. So small files do not need an addional
* allocation.
*
* For pages with index > SHMEM_NR_DIRECT there is the pointer
* i_indirect which points to a page which holds in the first half
* doubly indirect blocks, in the second half triple indirect blocks:
*
* For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
* following layout (for SHMEM_NR_DIRECT == 16):
*
* i_indirect -> dir --> 16-19
* | +-> 20-23
* |
* +-->dir2 --> 24-27
* | +-> 28-31
* | +-> 32-35
* | +-> 36-39
* |
* +-->dir3 --> 40-43
* +-> 44-47
* +-> 48-51
* +-> 52-55
*/
static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
{
unsigned long offset;
struct page **dir;
struct page *subdir;
if (index < SHMEM_NR_DIRECT) {
shmem_swp_balance_unmap();
return info->i_direct+index;
}
if (!info->i_indirect) {
if (page) {
info->i_indirect = *page;
*page = NULL;
}
return NULL; /* need another page */
}
index -= SHMEM_NR_DIRECT;
offset = index % ENTRIES_PER_PAGE;
index /= ENTRIES_PER_PAGE;
dir = shmem_dir_map(info->i_indirect);
if (index >= ENTRIES_PER_PAGE/2) {
index -= ENTRIES_PER_PAGE/2;
dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
index %= ENTRIES_PER_PAGE;
subdir = *dir;
if (!subdir) {
if (page) {
*dir = *page;
*page = NULL;
}
shmem_dir_unmap(dir);
return NULL; /* need another page */
}
shmem_dir_unmap(dir);
dir = shmem_dir_map(subdir);
}
dir += index;
subdir = *dir;
if (!subdir) {
if (!page || !(subdir = *page)) {
shmem_dir_unmap(dir);
return NULL; /* need a page */
}
*dir = subdir;
*page = NULL;
}
shmem_dir_unmap(dir);
return shmem_swp_map(subdir) + offset;
}
static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
{
long incdec = value? 1: -1;
entry->val = value;
info->swapped += incdec;
if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
struct page *page = kmap_atomic_to_page(entry);
set_page_private(page, page_private(page) + incdec);
}
* shmem_swp_alloc - get the position of the swap entry for the page.
* @info: info structure for the inode
* @index: index of the page to find
* @sgp: check and recheck i_size? skip allocation?
*
* If the entry does not exist, allocate it.
*/
static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
{
struct inode *inode = &info->vfs_inode;
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
struct page *page = NULL;
swp_entry_t *entry;
if (sgp != SGP_WRITE &&
((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
return ERR_PTR(-EINVAL);
while (!(entry = shmem_swp_entry(info, index, &page))) {
if (sgp == SGP_READ)
return shmem_swp_map(ZERO_PAGE(0));
/*
* Test free_blocks against 1 not 0, since we have 1 data
* page (and perhaps indirect index pages) yet to allocate:
* a waste to allocate index if we cannot allocate data.
*/
spin_lock(&sbinfo->stat_lock);
if (sbinfo->free_blocks <= 1) {
spin_unlock(&sbinfo->stat_lock);
return ERR_PTR(-ENOSPC);
}
sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
}
spin_unlock(&info->lock);
Mel Gorman
committed
page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
if (page)
set_page_private(page, 0);
spin_lock(&info->lock);
if (!page) {
shmem_free_blocks(inode, 1);
return ERR_PTR(-ENOMEM);
}
if (sgp != SGP_WRITE &&
((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
entry = ERR_PTR(-EINVAL);
break;
}
if (info->next_index <= index)
info->next_index = index + 1;
}
if (page) {
/* another task gave its page, or truncated the file */
shmem_free_blocks(inode, 1);
shmem_dir_free(page);
}
if (info->next_index <= index && !IS_ERR(entry))
info->next_index = index + 1;
return entry;
}
* shmem_free_swp - free some swap entries in a directory
* @dir: pointer to the directory
* @edir: pointer after last entry of the directory
* @punch_lock: pointer to spinlock when needed for the holepunch case
static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
spinlock_t *punch_lock)
spinlock_t *punch_unlock = NULL;
swp_entry_t *ptr;
int freed = 0;
for (ptr = dir; ptr < edir; ptr++) {
if (ptr->val) {
if (unlikely(punch_lock)) {
punch_unlock = punch_lock;
punch_lock = NULL;
spin_lock(punch_unlock);
if (!ptr->val)
continue;
}
free_swap_and_cache(*ptr);
*ptr = (swp_entry_t){0};
freed++;
}
}
if (punch_unlock)
spin_unlock(punch_unlock);
static int shmem_map_and_free_swp(struct page *subdir, int offset,
int limit, struct page ***dir, spinlock_t *punch_lock)
{
swp_entry_t *ptr;
int freed = 0;
ptr = shmem_swp_map(subdir);
for (; offset < limit; offset += LATENCY_LIMIT) {
int size = limit - offset;
if (size > LATENCY_LIMIT)
size = LATENCY_LIMIT;
freed += shmem_free_swp(ptr+offset, ptr+offset+size,
punch_lock);
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
if (need_resched()) {
shmem_swp_unmap(ptr);
if (*dir) {
shmem_dir_unmap(*dir);
*dir = NULL;
}
cond_resched();
ptr = shmem_swp_map(subdir);
}
}
shmem_swp_unmap(ptr);
return freed;
}
static void shmem_free_pages(struct list_head *next)
{
struct page *page;
int freed = 0;
do {
page = container_of(next, struct page, lru);
next = next->next;
shmem_dir_free(page);
freed++;
if (freed >= LATENCY_LIMIT) {
cond_resched();
freed = 0;
}
} while (next);
}
static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
{
struct shmem_inode_info *info = SHMEM_I(inode);
unsigned long idx;
unsigned long size;
unsigned long limit;
unsigned long stage;
unsigned long diroff;
struct page **dir;
struct page *topdir;
struct page *middir;
struct page *subdir;
swp_entry_t *ptr;
LIST_HEAD(pages_to_free);
long nr_pages_to_free = 0;
long nr_swaps_freed = 0;
int offset;
int freed;
int punch_hole;
spinlock_t *needs_lock;
spinlock_t *punch_lock;
unsigned long upper_limit;
idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (idx >= info->next_index)
return;
spin_lock(&info->lock);
info->flags |= SHMEM_TRUNCATE;
if (likely(end == (loff_t) -1)) {
limit = info->next_index;
upper_limit = SHMEM_MAX_INDEX;
info->next_index = idx;
needs_lock = NULL;
punch_hole = 0;
} else {
if (end + 1 >= inode->i_size) { /* we may free a little more */
limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
upper_limit = SHMEM_MAX_INDEX;
} else {
limit = (end + 1) >> PAGE_CACHE_SHIFT;
upper_limit = limit;
}
needs_lock = &info->lock;
punch_hole = 1;
}
if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
info->i_indirect = NULL;
nr_pages_to_free++;
list_add(&topdir->lru, &pages_to_free);
}
spin_unlock(&info->lock);
if (info->swapped && idx < SHMEM_NR_DIRECT) {
ptr = info->i_direct;
size = limit;
if (size > SHMEM_NR_DIRECT)
size = SHMEM_NR_DIRECT;
nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
/*
* If there are no indirect blocks or we are punching a hole
* below indirect blocks, nothing to be done.
*/
if (!topdir || limit <= SHMEM_NR_DIRECT)
/*
* The truncation case has already dropped info->lock, and we're safe
* because i_size and next_index have already been lowered, preventing
* access beyond. But in the punch_hole case, we still need to take
* the lock when updating the swap directory, because there might be
* racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
* shmem_writepage. However, whenever we find we can remove a whole
* directory page (not at the misaligned start or end of the range),
* we first NULLify its pointer in the level above, and then have no
* need to take the lock when updating its contents: needs_lock and
* punch_lock (either pointing to info->lock or NULL) manage this.
*/
upper_limit -= SHMEM_NR_DIRECT;
limit -= SHMEM_NR_DIRECT;
idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
offset = idx % ENTRIES_PER_PAGE;
idx -= offset;
dir = shmem_dir_map(topdir);
stage = ENTRIES_PER_PAGEPAGE/2;
if (idx < ENTRIES_PER_PAGEPAGE/2) {
middir = topdir;
diroff = idx/ENTRIES_PER_PAGE;
} else {
dir += ENTRIES_PER_PAGE/2;
dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
while (stage <= idx)
stage += ENTRIES_PER_PAGEPAGE;
middir = *dir;
if (*dir) {
diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
if (!diroff && !offset && upper_limit >= stage) {
if (needs_lock) {
spin_lock(needs_lock);
*dir = NULL;
spin_unlock(needs_lock);
needs_lock = NULL;
} else
*dir = NULL;
nr_pages_to_free++;
list_add(&middir->lru, &pages_to_free);
}
shmem_dir_unmap(dir);
dir = shmem_dir_map(middir);
} else {
diroff = 0;
offset = 0;
idx = stage;
}
}
for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
if (unlikely(idx == stage)) {
shmem_dir_unmap(dir);
dir = shmem_dir_map(topdir) +
ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
while (!*dir) {
dir++;
idx += ENTRIES_PER_PAGEPAGE;
if (idx >= limit)
goto done1;
}
stage = idx + ENTRIES_PER_PAGEPAGE;
middir = *dir;
if (punch_hole)
needs_lock = &info->lock;
if (upper_limit >= stage) {
if (needs_lock) {
spin_lock(needs_lock);
*dir = NULL;
spin_unlock(needs_lock);
needs_lock = NULL;
} else
*dir = NULL;
nr_pages_to_free++;
list_add(&middir->lru, &pages_to_free);
}
shmem_dir_unmap(dir);
cond_resched();
dir = shmem_dir_map(middir);
diroff = 0;
}
punch_lock = needs_lock;
if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
if (needs_lock) {
spin_lock(needs_lock);
dir[diroff] = NULL;
spin_unlock(needs_lock);
punch_lock = NULL;
} else
dir[diroff] = NULL;
nr_pages_to_free++;
list_add(&subdir->lru, &pages_to_free);
}
if (subdir && page_private(subdir) /* has swap entries */) {
size = limit - idx;
if (size > ENTRIES_PER_PAGE)
size = ENTRIES_PER_PAGE;
freed = shmem_map_and_free_swp(subdir,
offset, size, &dir, punch_lock);
if (!dir)
dir = shmem_dir_map(middir);
nr_swaps_freed += freed;
if (offset || punch_lock) {
set_page_private(subdir,
page_private(subdir) - freed);
} else
BUG_ON(page_private(subdir) != freed);
}
done1:
shmem_dir_unmap(dir);
done2:
if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
/*
* Call truncate_inode_pages again: racing shmem_unuse_inode
* may have swizzled a page in from swap since vmtruncate or
* generic_delete_inode did it, before we lowered next_index.
* Also, though shmem_getpage checks i_size before adding to
* cache, no recheck after: so fix the narrow window there too.
*
* Recalling truncate_inode_pages_range and unmap_mapping_range
* every time for punch_hole (which never got a chance to clear
* SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
* yet hardly ever necessary: try to optimize them out later.
truncate_inode_pages_range(inode->i_mapping, start, end);
if (punch_hole)
unmap_mapping_range(inode->i_mapping, start,
end - start, 1);
}
spin_lock(&info->lock);
info->flags &= ~SHMEM_TRUNCATE;
info->swapped -= nr_swaps_freed;
if (nr_pages_to_free)
shmem_free_blocks(inode, nr_pages_to_free);
shmem_recalc_inode(inode);
spin_unlock(&info->lock);
/*
* Empty swap vector directory pages to be freed?
*/
if (!list_empty(&pages_to_free)) {
pages_to_free.prev->next = NULL;
shmem_free_pages(pages_to_free.next);
}
}
static void shmem_truncate(struct inode *inode)
{
shmem_truncate_range(inode, inode->i_size, (loff_t)-1);
}
static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
struct page *page = NULL;
int error;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
if (attr->ia_size < inode->i_size) {
/*
* If truncating down to a partial page, then
* if that page is already allocated, hold it
* in memory until the truncation is over, so
* truncate_partial_page cannnot miss it were
* it assigned to swap.
*/
if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
(void) shmem_getpage(inode,
attr->ia_size>>PAGE_CACHE_SHIFT,
&page, SGP_READ, NULL);
}
/*
* Reset SHMEM_PAGEIN flag so that shmem_truncate can
* detect if any pages might have been added to cache
* after truncate_inode_pages. But we needn't bother
* if it's being fully truncated to zero-length: the
* nrpages check is efficient enough in that case.
*/
if (attr->ia_size) {
struct shmem_inode_info *info = SHMEM_I(inode);
spin_lock(&info->lock);
info->flags &= ~SHMEM_PAGEIN;
spin_unlock(&info->lock);
}
}
}
error = inode_change_ok(inode, attr);
if (!error)
error = inode_setattr(inode, attr);
#ifdef CONFIG_TMPFS_POSIX_ACL
if (!error && (attr->ia_valid & ATTR_MODE))
error = generic_acl_chmod(inode, &shmem_acl_ops);
#endif
if (page)
page_cache_release(page);
return error;
}
static void shmem_delete_inode(struct inode *inode)
{
struct shmem_inode_info *info = SHMEM_I(inode);
if (inode->i_op->truncate == shmem_truncate) {
truncate_inode_pages(inode->i_mapping, 0);
shmem_unacct_size(info->flags, inode->i_size);
inode->i_size = 0;
shmem_truncate(inode);
if (!list_empty(&info->swaplist)) {
mutex_lock(&shmem_swaplist_mutex);
mutex_unlock(&shmem_swaplist_mutex);
shmem_free_inode(inode->i_sb);
clear_inode(inode);
}
static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
{
swp_entry_t *ptr;
for (ptr = dir; ptr < edir; ptr++) {
if (ptr->val == entry.val)
return ptr - dir;
}
return -1;
}
static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
{
struct inode *inode;
unsigned long idx;
unsigned long size;
unsigned long limit;
unsigned long stage;
struct page **dir;
struct page *subdir;
swp_entry_t *ptr;
int offset;
idx = 0;
ptr = info->i_direct;
spin_lock(&info->lock);
if (!info->swapped) {
list_del_init(&info->swaplist);
goto lost2;
}
limit = info->next_index;
size = limit;
if (size > SHMEM_NR_DIRECT)
size = SHMEM_NR_DIRECT;
offset = shmem_find_swp(entry, ptr, ptr+size);
goto found;
if (!info->i_indirect)
goto lost2;
dir = shmem_dir_map(info->i_indirect);
stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
if (unlikely(idx == stage)) {
shmem_dir_unmap(dir-1);
if (cond_resched_lock(&info->lock)) {
/* check it has not been truncated */
if (limit > info->next_index) {
limit = info->next_index;
if (idx >= limit)
goto lost2;
}
}
dir = shmem_dir_map(info->i_indirect) +
ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
while (!*dir) {
dir++;
idx += ENTRIES_PER_PAGEPAGE;
if (idx >= limit)
goto lost1;
}
stage = idx + ENTRIES_PER_PAGEPAGE;
subdir = *dir;
shmem_dir_unmap(dir);
dir = shmem_dir_map(subdir);
}
subdir = *dir;
ptr = shmem_swp_map(subdir);
size = limit - idx;
if (size > ENTRIES_PER_PAGE)
size = ENTRIES_PER_PAGE;
offset = shmem_find_swp(entry, ptr, ptr+size);
if (offset >= 0) {
shmem_dir_unmap(dir);
goto found;
}
}
}
lost1:
shmem_dir_unmap(dir-1);
lost2:
spin_unlock(&info->lock);
return 0;
found:
idx += offset;
inode = igrab(&info->vfs_inode);
spin_unlock(&info->lock);
/*
* Move _head_ to start search for next from here.
* But be careful: shmem_delete_inode checks list_empty without taking
* mutex, and there's an instant in list_move_tail when info->swaplist
* would appear empty, if it were the only one on shmem_swaplist. We
* could avoid doing it if inode NULL; or use this minor optimization.
*/
if (shmem_swaplist.next != &info->swaplist)
list_move_tail(&shmem_swaplist, &info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
error = 1;
if (!inode)
goto out;
/* Precharge page while we can wait, compensate afterwards */
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
error = radix_tree_preload(GFP_KERNEL);
if (error)
goto uncharge;
spin_lock(&info->lock);
ptr = shmem_swp_entry(info, idx, NULL);
if (ptr && ptr->val == entry.val)
error = add_to_page_cache(page, inode->i_mapping,
if (error == -EEXIST) {
struct page *filepage = find_get_page(inode->i_mapping, idx);
if (filepage) {
/*
* There might be a more uptodate page coming down
* from a stacked writepage: forget our swappage if so.
*/
if (PageUptodate(filepage))
error = 0;
page_cache_release(filepage);
}
}
if (!error) {
delete_from_swap_cache(page);
set_page_dirty(page);
shmem_swp_set(info, ptr, 0);
swap_free(entry);
error = 1; /* not an error, but entry was found */
if (ptr)
shmem_swp_unmap(ptr);
uncharge:
mem_cgroup_uncharge_page(page);
out:
unlock_page(page);
page_cache_release(page);
iput(inode); /* allows for NULL */
return error;
}
/*
* shmem_unuse() search for an eventually swapped out shmem page.
*/
int shmem_unuse(swp_entry_t entry, struct page *page)
{
struct list_head *p, *next;
struct shmem_inode_info *info;
int found = 0;
mutex_lock(&shmem_swaplist_mutex);
list_for_each_safe(p, next, &shmem_swaplist) {
info = list_entry(p, struct shmem_inode_info, swaplist);
found = shmem_unuse_inode(info, entry, page);
mutex_unlock(&shmem_swaplist_mutex);
out: return found; /* 0 or 1 or -ENOMEM */