Skip to content

Commit

Permalink
Merge pull request #111 from bodo-run/glob-list
Browse files Browse the repository at this point in the history
feat: allow globs to be passed into `yek`
  • Loading branch information
mohsen1 authored Feb 23, 2025
2 parents 9a2e358 + 3769aeb commit 5037fb4
Show file tree
Hide file tree
Showing 8 changed files with 266 additions and 15 deletions.
28 changes: 28 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.21.0] - 2025-02-23
[0.21.0]: https://github.com/bodo-run/yek/compare/v0.20.0...v0.21.0
### Bug Fixes

- Glob pattern handling in e2e tests

### Documentation

- Update README with glob pattern and file selection support

### Features

- Handle glob patterns in input paths

### Testing

- Add comprehensive tests for glob pattern support

### Ci

- Run release and publish jobs on main branch
- Only run release and publish on tag pushes
- Trigger release on tag merge to main

## [0.20.0] - 2025-02-22
[0.20.0]: https://github.com/bodo-run/yek/compare/v0.19.0...v0.20.0
### Bug Fixes
Expand All @@ -24,6 +48,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Add integration test

### Release

- V0.20.0

## [0.19.0] - 2025-02-19
[0.19.0]: https://github.com/bodo-run/yek/compare/v0.18.0...v0.19.0
### Bug Fixes
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "yek"
version = "0.20.0"
version = "0.21.0"
edition = "2021"
description = "A tool to serialize a repository into chunks of text files"
license = "MIT"
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ By default:
- Infers additional ignore patterns (binary, large, etc.).
- Automatically detects if output is being piped and streams content instead of writing to files.
- Supports processing multiple directories in a single command.
- Supports glob patterns and individual file selection.
- Configurable via a `yek.yaml` file.

Yek <a href="https://fa.wikipedia.org/wiki/۱">يک</a> means "One" in Farsi/Persian.
Expand Down Expand Up @@ -113,6 +114,25 @@ Process multiple directories:
yek src/ tests/
```

Porcess multiple files

```bash
yek file1.txt file2.txt file3.txt
```

Use glob patterns:

```bash
yek "src/**/*.ts"
```

```bash
yek "src/main.rs" "tests/*.rs" "docs/README.md"
```

> [!NOTE]
> When using glob patterns, make sure to quote them to prevent shell expansion.
### CLI Reference

```bash
Expand Down
39 changes: 35 additions & 4 deletions src/parallel.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::{config::YekConfig, priority::get_file_priority, Result};
use content_inspector::{inspect, ContentType};
use glob::glob;
use ignore::gitignore::GitignoreBuilder;
use path_slash::PathBufExt;
use rayon::prelude::*;
Expand Down Expand Up @@ -81,12 +82,42 @@ pub fn process_files_parallel(
config: &YekConfig,
boost_map: &HashMap<String, i32>,
) -> Result<Vec<ProcessedFile>> {
// If it's a file, process it directly
if base_path.is_file() {
return process_single_file(base_path, config, boost_map);
// Expand globs into a list of paths
let mut expanded_paths = Vec::new();
let path_str = base_path.to_string_lossy();
for entry in glob(&path_str)? {
match entry {
Ok(path) => expanded_paths.push(path),
Err(e) => debug!("Glob entry error: {:?}", e),
}
}

// If it's a single file (no glob expansion or single file result), process it directly
if expanded_paths.len() == 1 && expanded_paths[0].is_file() {
return process_single_file(&expanded_paths[0], config, boost_map);
}

// Iterate over expanded paths, handling files and directories
let mut all_processed_files = Vec::new();
for path in expanded_paths {
if path.is_file() {
all_processed_files.extend(process_single_file(&path, config, boost_map)?);
} else if path.is_dir() {
// For directories, use the original recursive logic
all_processed_files.extend(process_files_parallel_internal(&path, config, boost_map)?);
}
}

// Otherwise, it's a directory, so walk it
Ok(all_processed_files)
}

/// Internal function to handle directory recursion (separated for clarity)
fn process_files_parallel_internal(
base_path: &Path,
config: &YekConfig,
boost_map: &HashMap<String, i32>,
) -> Result<Vec<ProcessedFile>> {
// It's a directory, so walk it
let mut walk_builder = ignore::WalkBuilder::new(base_path);

// Standard filters + no follow symlinks
Expand Down
10 changes: 5 additions & 5 deletions tests/config_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,11 +338,11 @@ fn test_extend_config_with_defaults() {

assert_eq!(cfg.input_paths, input_paths);
assert_eq!(cfg.output_dir, Some(output_dir));
assert_eq!(cfg.version, false);
assert!(!cfg.version);
assert_eq!(cfg.max_size, "10MB".to_string());
assert_eq!(cfg.tokens, String::new());
assert_eq!(cfg.json, false);
assert_eq!(cfg.debug, false);
assert!(!cfg.json);
assert!(!cfg.debug);
assert_eq!(cfg.output_template, DEFAULT_OUTPUT_TEMPLATE.to_string());
assert_eq!(cfg.ignore_patterns, Vec::<String>::new());
assert_eq!(cfg.unignore_patterns, Vec::<String>::new());
Expand All @@ -355,8 +355,8 @@ fn test_extend_config_with_defaults() {
.collect::<Vec<_>>()
);
assert_eq!(cfg.git_boost_max, Some(100));
assert_eq!(cfg.stream, false);
assert_eq!(cfg.token_mode, false);
assert!(!cfg.stream);
assert!(!cfg.token_mode);
assert_eq!(cfg.output_file_full_path, None);
assert_eq!(cfg.max_git_depth, 100);
}
Expand Down
71 changes: 70 additions & 1 deletion tests/e2e_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ mod e2e_tests {
assert!(output.status.success());

// Ensure output dir is printed in stdout
let stdout = String::from_utf8_lossy(&output.stdout);
let stdout = String::from_utf8(output.stdout)?;
assert!(
stdout.contains(&output_dir.display().to_string()),
"Expected output directory `{}` to be printed in stdout, but it was {}",
Expand Down Expand Up @@ -215,6 +215,75 @@ mod e2e_tests {
Ok(())
}

#[test]
fn test_glob_pattern() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
fs::write(temp_dir.path().join("test.txt"), "Test content")?;

let output = Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.output()?;
let stdout = String::from_utf8(output.stdout)?;
assert!(output.status.success());
assert!(stdout.contains("Test content"));
Ok(())
}

#[test]
fn test_mix_of_files_and_dirs() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
fs::write(temp_dir.path().join("test.txt"), "Test content")?;
fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?;
let dir = temp_dir.path().join("dir");
fs::create_dir(&dir)?;
fs::write(dir.join("test3"), "Test content 3")?;

Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.assert()
.success();

let output = Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.output()?;
let stdout = String::from_utf8(output.stdout)?;
assert!(stdout.contains("Test content"));
assert!(stdout.contains("Test content 2"));
assert!(!stdout.contains("Test content 3"));
Ok(())
}

#[test]
fn test_mix_of_files_and_dirs_with_glob_pattern() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
fs::write(temp_dir.path().join("test.txt"), "Test content")?;
fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?;
fs::write(temp_dir.path().join("code.rs"), "use std::fs;")?;
let dir = temp_dir.path().join("dir");
fs::create_dir(&dir)?;
fs::write(dir.join("test4"), "Test content 4")?;

Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.args(["*.txt", "code.rs"])
.assert()
.success();

let output = Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.args(["*.txt", "code.rs"])
.output()?;
let stdout = String::from_utf8(output.stdout)?;
assert!(stdout.contains("Test content"));
assert!(stdout.contains("Test content 2"));
assert!(!stdout.contains("Test content 4"));
assert!(stdout.contains("use std::fs;"));
Ok(())
}

#[test]
fn test_config_file() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
Expand Down
109 changes: 106 additions & 3 deletions tests/parallel_test.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use anyhow::Result;
use normalize_path::NormalizePath;
use std::collections::HashMap;
use std::fs;
use std::fs::{self, File};
use std::io::Write;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::path::{Path, PathBuf};
use tempfile::tempdir;
use yek::config::YekConfig;
use yek::parallel::process_files_parallel;
Expand Down Expand Up @@ -62,7 +64,6 @@ fn test_process_files_parallel_empty() {

#[test]
fn test_process_files_parallel_with_files() {
use std::fs;
let temp_dir = tempdir().expect("failed to create temp dir");
let file_names = vec!["a.txt", "b.txt", "c.txt"];
for &file in &file_names {
Expand Down Expand Up @@ -135,3 +136,105 @@ fn test_process_files_parallel_walk_error() {
let processed_files = result.unwrap();
assert_eq!(processed_files.len(), 0); // No files processed due to walk error
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_glob_pattern_single_file() -> Result<()> {
let temp_dir = tempdir()?;
let file_path = temp_dir.path().join("test.txt");
let mut file = File::create(&file_path)?;
writeln!(file, "Test content")?;

let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert_eq!(result.len(), 1);
assert_eq!(result[0].rel_path, "test.txt");

Ok(())
}

#[test]
fn test_glob_pattern_multiple_files() -> Result<()> {
let temp_dir = tempdir()?;

// Create multiple test files
let files = vec!["test1.txt", "test2.txt", "other.md"];
for fname in &files {
let file_path = temp_dir.path().join(fname);
let mut file = File::create(&file_path)?;
writeln!(file, "Test content for {}", fname)?;
}

let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert_eq!(result.len(), 2); // Should only match .txt files

let paths: Vec<String> = result.iter().map(|f| f.rel_path.clone()).collect();
assert!(paths.contains(&"test1.txt".to_string()));
assert!(paths.contains(&"test2.txt".to_string()));

Ok(())
}

#[test]
fn test_glob_pattern_nested_directories() -> Result<()> {
let temp_dir = tempdir()?;

// Create nested directory structure
let nested_dir = temp_dir.path().join("nested");
fs::create_dir(&nested_dir)?;

// Create files in both root and nested directory
let root_file = temp_dir.path().join("root.txt");
let nested_file = nested_dir.join("nested.txt");
let other_file = temp_dir.path().join("other.md");

for (path, content) in [
(&root_file, "Root content"),
(&nested_file, "Nested content"),
(&other_file, "Other content"),
] {
let mut file = File::create(path)?;
writeln!(file, "{}", content)?;
}

let glob_pattern = temp_dir
.path()
.join("**/*.txt")
.to_string_lossy()
.to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert_eq!(result.len(), 2); // Should match both .txt files

let paths: Vec<String> = result.iter().map(|f| f.rel_path.clone()).collect();
assert!(paths.contains(&"root.txt".to_string()));
assert!(paths.contains(&"nested.txt".to_string()));

Ok(())
}

#[test]
fn test_glob_pattern_no_matches() -> Result<()> {
let temp_dir = tempdir()?;
let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert!(result.is_empty());

Ok(())
}
}

0 comments on commit 5037fb4

Please sign in to comment.