From cc12cc7920c3efb7b505affd3e775e1afa8a7b01 Mon Sep 17 00:00:00 2001 From: Todd Lewis Date: Apr 23 2017 22:25:35 +0000 Subject: exclude files via pcre Signed-off-by: Francisco Javier Tsao Santín --- diff --git a/hardlink.1 b/hardlink.1 index 04228f4..b8bfe9d 100644 --- a/hardlink.1 +++ b/hardlink.1 @@ -3,7 +3,7 @@ hardlink \- Consolidate duplicate files via hardlinks .SH "SYNOPSIS" .PP -\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-h\fP] directory1 [ directory2 ... ] +\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-x pattern\fP] [\fB-h\fP] directory1 [ directory2 ... ] .SH "DESCRIPTION" .PP This manual page documents \fBhardlink\fP, a @@ -32,8 +32,14 @@ Do not perform the consolidation; only print what would be changed. Print summary after hardlinking. .IP "\fB-vv\fP" 10 Print every hardlinked file and bytes saved. Also print summary after hardlinking. +.IP "\fB-x pattern\fP" 10 +Exclude files and directories matching pattern from hardlinking. .IP "\fB-h\fP" 10 Show help. +.PP +The optional pattern for excluding files and directories must be a PCRE2 +compatible regular expression. Only the basename of the file or directory +is checked, not its path. Excluded directories' contents will not be examined. .SH "AUTHOR" .PP \fBhardlink\fP was written by Jakub Jelinek . @@ -48,3 +54,9 @@ it. If a directory tree does change, this may result in \fBhardlink\fP accessing files and/or directories outside of the intended directory tree. Thus, you must avoid running \fBhardlink\fP on potentially changing directory trees, and especially on directory trees under control of another user. +.PP +Historically \fBhardlink\fP silently excluded any names beginning with +".in.", as well as any names beginning with "." followed by exactly 6 +other characters. That prior behavior can be achieved by specifying +.br +-x '^(\.in\.|\.[^.]{6}$)' diff --git a/hardlink.c b/hardlink.c index 16d8163..69f6a46 100644 --- a/hardlink.c +++ b/hardlink.c @@ -21,6 +21,7 @@ /* Changes by Travers Carter to make atomic hardlinking */ #define _GNU_SOURCE +#define PCRE2_CODE_UNIT_WIDTH 8 #include #include #include @@ -31,12 +32,17 @@ #include #include #include +#include #define NHASH (1<<17) /* Must be a power of 2! */ #define NIOBUF (1<<12) #define NAMELEN 4096 #define NBUF 64 +pcre2_code *re; +PCRE2_SPTR exclude_pattern; +pcre2_match_data *match_data; + struct _f; typedef struct _h { struct _h *next; @@ -99,12 +105,13 @@ void doexit(int i) void usage(char *prog) { - fprintf (stderr, "Usage: %s [-cnvhf] directories...\n", prog); + fprintf (stderr, "Usage: %s [-cnvhf] [-x pat] directories...\n", prog); fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); fprintf (stderr, " -v Print summary after hardlinking.\n"); fprintf (stderr, " -vv Print every hardlinked file and bytes saved + summary.\n"); fprintf (stderr, " -f Force hardlinking across filesystems.\n"); + fprintf (stderr, " -x pat Exclude files matching pattern.\n"); fprintf (stderr, " -h Show help.\n"); exit(255); } @@ -328,8 +335,10 @@ int main(int argc, char **argv) { int ch; int i; + int errornumber; + PCRE2_SIZE erroroffset; dynstr nam1 = {NULL, 0}; - while ((ch = getopt (argc, argv, "cnvhf")) != -1) { + while ((ch = getopt (argc, argv, "cnvhfx:")) != -1) { switch (ch) { case 'n': no_link++; @@ -343,6 +352,9 @@ int main(int argc, char **argv) case 'f': force=1; break; + case 'x': + exclude_pattern = (PCRE2_SPTR)optarg; + break; case 'h': default: usage(argv[0]); @@ -350,6 +362,22 @@ int main(int argc, char **argv) } if (optind >= argc) usage(argv[0]); + if (exclude_pattern) { + re = pcre2_compile( + exclude_pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminate */ + 0, /* default options */ + &errornumber, + &erroroffset, + NULL); /* use default compile context */ + if (!re) { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); + fprintf(stderr, "pattern error at offset %d: %s\n", (int)erroroffset, buffer); + usage(argv[0]); + } + match_data = pcre2_match_data_create_from_pattern(re, NULL); + } for (i = optind; i < argc; i++) rf(argv[i]); while (dirs) { @@ -371,16 +399,23 @@ int main(int argc, char **argv) if (!di->d_name[0]) continue; if (di->d_name[0] == '.') { - char *q; - if (!di->d_name[1] || !strcmp (di->d_name, "..") || !strncmp (di->d_name, ".in.", 4)) + if (!di->d_name[1] || !strcmp(di->d_name, "..")) continue; - q = strrchr (di->d_name, '.'); - if (q && strlen (q) == 7 && q != di->d_name) { + } + if (re && pcre2_match( + re, /* compiled regex */ + (PCRE2_SPTR)di->d_name, + strlen(di->d_name), + 0, /* start at offset 0 */ + 0, /* default options */ + match_data, /* block for storing the result */ + NULL) /* use default match context */ + >= 0) { + if (verbose) { nam1.buf[nam1baselen] = 0; - if (verbose) - fprintf(stderr, "Skipping %s%s\n", nam1.buf, di->d_name); - continue; + fprintf(stderr,"Skipping %s%s\n", nam1.buf, di->d_name); } + continue; } { size_t subdirlen; diff --git a/hardlink.spec b/hardlink.spec index 0794ac5..03d9a90 100644 --- a/hardlink.spec +++ b/hardlink.spec @@ -1,7 +1,7 @@ Summary: Create a tree of hardlinks Name: hardlink Version: 1.2 -Release: 1%{?dist} +Release: 2%{?dist} Epoch: 1 Group: System Environment/Base URL: https://pagure.io/hardlink.git @@ -10,6 +10,7 @@ Source0: hardlink.c Source1: hardlink.1 Source2: https://www.gnu.org/licenses/old-licenses/gpl-2.0.txt Buildroot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) +Requires: pcre2-devel %description hardlink is used to create a tree of hard links. @@ -22,7 +23,7 @@ cp %{SOURCE2} . install -pm 644 %{SOURCE0} hardlink.c %build -gcc $RPM_OPT_FLAGS hardlink.c -o hardlink +gcc $RPM_OPT_FLAGS hardlink.c -o hardlink -lpcre2-8 %install install -D -m 644 %{SOURCE1} $RPM_BUILD_ROOT%{_mandir}/man1/hardlink.1