From 639fe3c8c22cf307547d945a833f6999d4dd819e Mon Sep 17 00:00:00 2001 From: Scott Worley Date: Thu, 11 May 2017 22:21:52 -0700 Subject: [PATCH 1/1] Mmap in 512MB chunks This allows processing large files on 32-bit machines that don't have the address space to map entire files. --- reverse_lib.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/reverse_lib.c b/reverse_lib.c index f217b1c..f55bedf 100644 --- a/reverse_lib.c +++ b/reverse_lib.c @@ -12,7 +12,13 @@ #include #include +static off_t ceil_div(off_t dividend, off_t divisor) { + return (dividend - 1) / divisor + 1; +} + void reverse_file(const char* input_filename, FILE* output_stream) { + const off_t mmap_chunk_size = 512 << 20; + int fd = open(input_filename, O_RDONLY); if (fd == -1) err(EX_NOINPUT, "Could not open specified file"); @@ -20,16 +26,25 @@ void reverse_file(const char* input_filename, FILE* output_stream) { if (fstat(fd, &stats) == -1) err(EX_NOINPUT, "Could not stat input"); long page_size = sysconf(_SC_PAGE_SIZE); - off_t pages = (stats.st_size - 1) / page_size + 1; - long map_size = pages * page_size; - char *m = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, 0); - if (m == MAP_FAILED) err(EX_NOINPUT, "Could not mmap input"); + off_t num_chunks = ceil_div(stats.st_size, mmap_chunk_size); + for (off_t chunk = num_chunks - 1; chunk >= 0; chunk--) { + off_t start_offset = chunk * mmap_chunk_size; + off_t end_offset = (chunk + 1) * mmap_chunk_size; + if (end_offset > stats.st_size) { + end_offset = stats.st_size; + } + off_t pages = ceil_div(end_offset - start_offset, page_size); + long map_size = pages * page_size; + char *m = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, start_offset); + if (m == MAP_FAILED) err(EX_NOINPUT, "Could not mmap chunk %lld of %lld", chunk, num_chunks); + + for (off_t p = (end_offset - start_offset) - 1; p >= 0; p--) { + if (fputc(m[p], output_stream) == EOF) errx(EX_IOERR, "Could not write"); + } - for (off_t p = stats.st_size - 1; p >= 0; p--) { - if (fputc(m[p], output_stream) == EOF) errx(EX_IOERR, "Could not write"); + if (munmap(m, map_size) == -1) err(EX_IOERR, "Could not unmap chunk %lld of %lld", chunk, num_chunks); } - if (munmap(m, map_size) == -1) err(EX_IOERR, "Could not unmap input"); if (close(fd) == -1) err(EX_IOERR, "Could not close input"); } -- 2.44.1