From: Eric Sandeen <sandeen@redhat.com> Date: Fri, 17 Apr 2009 13:37:51 -0500 Subject: [fs] update write_cache_pages Message-id: 49E8CC7F.6010801@redhat.com O-Subject: [RHEL5.4 PATCH 2/6] update write_cache_pages Bugzilla: 485315 RH-Acked-by: Rik van Riel <riel@redhat.com> RH-Acked-by: Josef Bacik <josef@redhat.com> This is for bug Bug 485315 - ext4 kernelspace rebase for RHEL5.4 This is essentially a rebase of write_cache_pages() to what's upstream. ext4 is the only rhel5 caller of this function. diff --git a/fs/mpage.c b/fs/mpage.c index 44215a9..ae9b0d5 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -694,7 +694,7 @@ EXPORT_SYMBOL(__mpage_writepage_mpd); /** * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write - * @range_cont: same as @wbc->range_cont upstream ... + * @no_nrwrite_index_update : same as @wbc->no_nrwrite_index_update upstream ... * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @writepage: function called for each page * @data: data passed to writepage function @@ -707,10 +707,9 @@ EXPORT_SYMBOL(__mpage_writepage_mpd); * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. */ - -/* range_cont is part of wbc upstream, but we cannot easily add to that - KABI */ +/* no_nrwrite_index_update is part of wbc upstream, but we cannot easily add to that - KABI */ int -write_cache_pages(struct address_space *mapping, int range_cont, +write_cache_pages(struct address_space *mapping, int no_nrwrite_index_update, struct writeback_control *wbc, writepage_data_t writepage, void *data) { @@ -719,10 +718,13 @@ write_cache_pages(struct address_space *mapping, int range_cont, int done = 0; struct pagevec pvec; int nr_pages; + pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; /* Inclusive */ - int scanned = 0; + pgoff_t done_index; + int cycled; int range_whole = 0; + long nr_to_write = wbc->nr_to_write; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; @@ -731,85 +733,149 @@ write_cache_pages(struct address_space *mapping, int range_cont, pagevec_init(&pvec, 0); if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ + writeback_index = mapping->writeback_index; /* prev offset */ + index = writeback_index; + if (index == 0) + cycled = 1; + else + cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - scanned = 1; + cycled = 1; /* ignore range_cyclic tests */ } retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; + done_index = index; + while (!done && (index <= end)) { + int i; + + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; - scanned = 1; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping + * At this point, the page may be truncated or + * invalidated (changing page->mapping to NULL), or + * even swizzled back from swapper_space to tmpfs file + * mapping. However, page->index will not change + * because we have a reference on the page. */ + if (page->index > end) { + /* + * can't be range_cyclic (1st pass) because + * end == -1 in that case. + */ + done = 1; + break; + } + + done_index = page->index + 1; + lock_page(page); + /* + * Page truncated or invalidated. We can freely skip it + * then, even for data integrity operations: the page + * has disappeared concurrently, so there could be no + * real expectation of this data interity operation + * even if there is now a new, dirty page at the same + * pagecache address. + */ if (unlikely(page->mapping != mapping)) { +continue_unlock: unlock_page(page); continue; } - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; } - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; + if (PageWriteback(page)) { + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + else + goto continue_unlock; } - ret = (*writepage)(page, wbc, data); + BUG_ON(PageWriteback(page)); + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { - unlock_page(page); - ret = 0; + ret = (*writepage)(page, wbc, data); + if (unlikely(ret)) { + if (ret == AOP_WRITEPAGE_ACTIVATE) { + unlock_page(page); + ret = 0; + } else { + /* + * done_index is set past this page, + * so media errors will not choke + * background writeout for the entire + * file. This has consequences for + * range_cyclic semantics (ie. it may + * not be suitable for data integrity + * writeout). + */ + done = 1; + break; + } + } + + if (nr_to_write > 0) { + nr_to_write--; + if (nr_to_write == 0 && + wbc->sync_mode == WB_SYNC_NONE) { + /* + * We stop writing back only if we are + * not doing integrity sync. In case of + * integrity sync we have to keep going + * because someone may be concurrently + * dirtying pages, and we might have + * synced a lot of newly appeared dirty + * pages, but have not synced all of the + * old dirty pages. + */ + done = 1; + break; + } } - if (ret || (--(wbc->nr_to_write) <= 0)) - done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; done = 1; + break; } } pagevec_release(&pvec); cond_resched(); } - if (!scanned && !done) { + if (!cycled && !done) { /* + * range_cyclic: * We hit the last page and there is more work to be done: wrap * back to the start of the file */ - scanned = 1; + cycled = 1; index = 0; + end = writeback_index - 1; goto retry; } - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; + if (!no_nrwrite_index_update) { + if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) + mapping->writeback_index = done_index; + wbc->nr_to_write = nr_to_write; + } - if (range_cont) - wbc->range_start = index << PAGE_CACHE_SHIFT; return ret; } EXPORT_SYMBOL(write_cache_pages);