src/source/file/seek.rs

//! Line-aware seeking for the `--from-end` flag.
//!
//! We want to start reading at the beginning of the Nth-from-last line. The
//! file may be enormous, so we do it backwards in 8 KiB chunks and count
//! newlines.

use anyhow::Result;
use tokio::fs::File;
use tokio::io::{AsyncReadExt, AsyncSeekExt, SeekFrom};

const CHUNK: u64 = 8 * 1024;

/// Seek `file` to the start of the `lines`-th line counted from the end.
///
/// If the file has fewer than `lines` newlines the seek ends at offset 0.
pub async fn from_end(file: &mut File, lines: u64) -> Result<u64> {
    if lines == 0 {
        let end = file.seek(SeekFrom::End(0)).await?;
        return Ok(end);
    }

    let size = file.seek(SeekFrom::End(0)).await?;
    if size == 0 {
        return Ok(0);
    }

    let mut pos = size;
    let mut seen = 0u64;
    let mut buf = vec![0u8; CHUNK as usize];

    while pos > 0 {
        let read_size = pos.min(CHUNK);
        pos -= read_size;
        file.seek(SeekFrom::Start(pos)).await?;
        let slice = &mut buf[..read_size as usize];
        file.read_exact(slice).await?;
        for (i, b) in slice.iter().enumerate().rev() {
            if *b == b'\n' {
                // Only count a newline that is NOT the final byte of the
                // file; trailing newlines don't start new lines.
                let abs = pos + i as u64;
                if abs == size - 1 {
                    continue;
                }
                seen += 1;
                if seen == lines {
                    file.seek(SeekFrom::Start(abs + 1)).await?;
                    return Ok(abs + 1);
                }
            }
        }
    }
    file.seek(SeekFrom::Start(0)).await?;
    Ok(0)
}

/// Convenience wrapper that opens `path` and seeks `lines` from the end.
pub async fn open_from_end(
    path: impl AsRef<std::path::Path>,
    lines: u64,
) -> Result<File> {
    let mut file = File::open(path).await?;
    from_end(&mut file, lines).await?;
    Ok(file)
}