diff -urPN 0.1.1/LICENSE 1.0.0/LICENSE --- 0.1.1/LICENSE Fri Jan 28 02:44:40 2022 +++ 1.0.0/LICENSE Fri Mar 18 12:18:24 2022 @@ -1,6 +1,6 @@ ISC License -Copyright (c) 2020, Adrian Emil Grigore +Copyright (c) 2022, Adrian Emil Grigore Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above diff -urPN 0.1.1/Makefile 1.0.0/Makefile --- 0.1.1/Makefile Fri Jan 28 02:44:40 2022 +++ 1.0.0/Makefile Fri Mar 18 12:18:24 2022 @@ -1,3 +1,4 @@ +VERSION=1.0.0 PREFIX = /usr/local MANPREFIX = $(PREFIX)/share/man @@ -13,14 +14,13 @@ clu.1 \ clp.1 \ clvv.1 \ + clvu.1 \ clvp.1 install: mkdir -p $(DESTDIR)$(PREFIX)/bin/ - mkdir -p $(DESTDIR)$(PREFIX)/share/cl mkdir -p $(DESTDIR)$(MANPREFIX)/man1/ cp -f $(BIN) $(DESTDIR)$(PREFIX)/bin/ - cp -r share/*.awk $(DESTDIR)$(PREFIX)/share/cl cp -f $(MAN) $(DESTDIR)$(MANPREFIX)/man1/ for bin in $(BIN); do \ chmod 755 $(DESTIR)$(PREFIX)/bin/$$bin; \ @@ -31,10 +31,17 @@ uninstall: - rm -rf $(DESTDIR$(PREFIX)/share/cl for bin in $(BIN); do \ rm -f $(DESTDIR)$(PREFIX)/bin/$$bin; \ done for man in $(MAN); do \ rm -f $(MANPREFIX)/man1/$$man; \ done + +dist: + mkdir -p cl@$(VERSION)/cl + cp $(BIN) cl@$(VERSION)/cl + cp $(MAN) cl@$(VERSION)/cl + cp Makefile README NEWS LICENSE cl@$(VERSION)/cl + tar -C cl@$(VERSION) -czvf cl@$(VERSION).tgz cl + rm -rf cl@$(VERSION) diff -urPN 0.1.1/NEWS 1.0.0/NEWS --- 0.1.1/NEWS Thu Jan 1 01:00:00 1970 +++ 1.0.0/NEWS Fri Mar 18 12:18:24 2022 @@ -0,0 +1,40 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [1.0.0] - 2022-03-04 + +### Added + +- Add `NEWS` file +- Add `dist` rule to `Makefile` +- Add missing `clvu.1` `man` file to dist package + +### Changed + +- Hardcode all log fields in scripts +- Write simple description in README rather than `man` contents +- Update `man` pages to reflect new path patterns + +### Removed + +- Delete `share` directory and references + +## [0.1.1] + +### Added + +- Missing newline to `share/c.awk` + +## [0.0.1] + +### Added + +- Common Log analytics scripts + +[1.0.0]: https://adi.onl/cl/0.1.1..1.0.0 +[0.1.1]: https://adi.onl/cl/0.0.1..0.1.1 +[0.0.1]: https://adi.onl/cl/0.0.1 diff -urPN 0.1.1/README 1.0.0/README --- 0.1.1/README Fri Jan 28 02:44:40 2022 +++ 1.0.0/README Fri Mar 18 12:18:24 2022 @@ -1,376 +1,45 @@ # cl -common log +Analytics for `HTTP` logs in Common Log Format -## clv(1) +## Usage -### NAME +The package provides 6 scripts `clp`, `clu`, `clv` and `clvp`, `clvu` +and `clvv`. -**clv** - common log views +The **cl** prefix stands for _Common Log_ and the suffixes stand for +_**p**ages_, _**u**nique_ and _**v**iews_. -### SYNOPSIS +The middle **v** in the second set stands for _virtual host_. The second +set of 3 scripts is for the Common Log with virtual hosts format. It's +identical to the normal Common Log except the name of the virtual host +is prepended to each line. -**clv** -\[*file ...*] +Use the appropriate script for the statistic and for the log format: -### DESCRIPTION +### Pages -The -**clv** -utility reads -each -\[*file ...*] -in -*Common Log Format* -and prints only the -*views* -to standard output. + $ clp /var/www/access.log + /docs.html 641 + / 3227 -*views* -are defined as -*GET* -requests -to paths ending in -*/* -or -*.html* -that returned a -*200* -HTTP response status code. +Use `sort -nrk 2` to sort on second column: -If -\[*file ...*] -is omitted, -**clv** -reads from standard input. + $ clp /var/www/access.log | sort -nrk 2 + / 3227 + /docs.html 641 -### EXIT STATUS +### Uniques and Visits -The **clv** utility exits 0 on success, and >0 if an error occurs. +Uniques and visits scripts output the actual unique and visit requests +by default. Use `wc -l` to count: -### EXAMPLES + $ clu /var/www/access.log | wc -l + 2000 -Print all -*views* -from -*/var/www/logs/access.log:* +The scripts operate on `GET` requests to paths with no extension or +ending in `.html` that returned a 200 HTTP response status code. - $ clv /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) - -## clu(1) - -### NAME - -**clu** - common log uniques - -### SYNOPSIS - -**clu** -\[*file ...*] - -### DESCRIPTION - -The -**clu** -utility reads -each -\[*file ...*] -in -*Common Log Format* -and prints only the -*uniques* -to standard output. - -*uniques* -are defined as unique remote hosts -that made at least one -*GET* -request to paths ending in -*/* -or -*.html* -that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**clu** -reads from standard input. - -### EXIT STATUS - -The **clu** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*uniques* -from -*/var/www/logs/access.log:* - - $ clu /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) - -## clp(1) - -### NAME - -**clp** - common log pages - -### SYNOPSIS - -**clp** -\[*file ...*] - -### DESCRIPTION - -The -**clp** -utility reads -each -\[*file ...*] -in -*Common Log Format* -and prints a list of -*pages* -to standard output including the request count. - -*pages* -are defined as -*paths* -ending in -*/* -or -*.html* -to which -*GET* -requests have been made that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**clp** -reads from standard input. - -### EXIT STATUS - -The **clp** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*pages* -and their respective request count -from -*/var/www/logs/access.log*: - - $ clp /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) - -## clvv(1) - -### NAME - -**clvv** - common log (virtual host) views - -### SYNOPSIS - -**clvv** -\[*file ...*] - -### DESCRIPTION - -The -**clvv** -utility reads -each -\[*file ...*] -in -*Common Log Format with Virtual Host* -and prints only the -*views* -to standard output. - -*views* -are defined as -*GET* -requests to paths ending in -*/* -or -*.html* -that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**clvv** -reads from standard input. - -### EXIT STATUS - -The **clvv** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*views* -from -*/var/www/logs/access.log:* - - $ clvv /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) - -## clvu(1) - -### NAME - -**clvu** - common log (virtual host) uniques - -### SYNOPSIS - -**clvu** -\[*file ...*] - -### DESCRIPTION - -The -**clvu** -utility reads -each -\[*file ...*] -in -*Common Log Format with Virtual Hosts* -and prints only the -*uniques* -to standard output. - -*uniques* -to standard output. - -*uniques* -are defined as unique remote hosts -that made at least one -*GET* -request -for files with an -*.html* -extension, including the -*/* -path that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**clvu** -reads from standard input. - -### EXIT STATUS - -The **clvu** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*uniques* -from -*/var/www/logs/access.log:* - - $ clvu /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) - -## clvp(1) - -### NAME - -**clvp** - common log (virtual host) pages - -### SYNOPSIS - -**clvp** -\[*file ...*] - -### DESCRIPTION - -The -**clvp** -utility reads -each -\[*file ...*] -in -*Common Log Format with Virtual Host* -and prints a list of -*pages* -to standard output including the request count. - -*pages* -are defined as -*paths* -ending in -*/* -or -*.html* -to which -*GET* -requests have been made that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**clvp** -reads from standard input. - -### EXIT STATUS - -The **clvp** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*pages* -and their respective request count -from -*/var/www/logs/access.log*: - - $ clvp /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) + https://example.com/docs + https://example.com/docs/ + https://examples.com/docs.html diff -urPN 0.1.1/clp 1.0.0/clp --- 0.1.1/clp Fri Jan 28 02:44:40 2022 +++ 1.0.0/clp Fri Mar 18 12:18:24 2022 @@ -1,5 +1,13 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/c.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/p.awk "$@" +#!/usr/bin/awk -f +$6 !~ /GET/ || +$9 != 200 { +next +} +$7 !~ /\/*\.[^.]*$/ || +$7 ~ /\.html$/ { +ps[$7]++ +} +END { +for (p in ps) +print p" "ps[p] +} diff -urPN 0.1.1/clp.1 1.0.0/clp.1 --- 0.1.1/clp.1 Fri Jan 28 02:44:40 2022 +++ 1.0.0/clp.1 Fri Mar 18 12:18:24 2022 @@ -22,9 +22,7 @@ .Em pages are defined as .Em paths -ending in -.Pa / -or +with no extension or ending in .Pa .html to which .Em GET diff -urPN 0.1.1/clu 1.0.0/clu --- 0.1.1/clu Fri Jan 28 02:44:40 2022 +++ 1.0.0/clu Fri Mar 18 12:18:24 2022 @@ -1,5 +1,8 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/c.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/u.awk "$@" +#!/usr/bin/awk -f +$6 !~ /GET/ || +$9 != 200 { +next +} +($7 !~ /\/*\.[^.]*$/ || +$7 ~ /\.html$/) && +!us[$1]++ diff -urPN 0.1.1/clu.1 1.0.0/clu.1 --- 0.1.1/clu.1 Fri Jan 28 02:44:40 2022 +++ 1.0.0/clu.1 Fri Mar 18 12:18:24 2022 @@ -23,9 +23,7 @@ are defined as unique remote hosts that made at least one .Em GET -request to paths ending in -.Pa / -or +request to paths with no extension or ending in .Pa .html that returned a .Em 200 diff -urPN 0.1.1/clv 1.0.0/clv --- 0.1.1/clv Fri Jan 28 02:44:40 2022 +++ 1.0.0/clv Fri Mar 18 12:18:24 2022 @@ -1,5 +1,7 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/c.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/v.awk "$@" +#!/usr/bin/awk -f +$6 !~ /GET/ || +$9 != 200 { +next +} +$7 !~ /\/*\.[^.]*$/ || +$7 ~ /\.html$/ diff -urPN 0.1.1/clv.1 1.0.0/clv.1 --- 0.1.1/clv.1 Fri Jan 28 02:44:40 2022 +++ 1.0.0/clv.1 Fri Mar 18 12:18:24 2022 @@ -23,9 +23,7 @@ are defined as .Em GET requests -to paths ending in -.Pa / -or +to paths ending with no extension or ending in .Pa .html that returned a .Em 200 diff -urPN 0.1.1/clvp 1.0.0/clvp --- 0.1.1/clvp Fri Jan 28 02:44:40 2022 +++ 1.0.0/clvp Fri Mar 18 12:18:24 2022 @@ -1,5 +1,13 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/cv.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/p.awk "$@" +#!/usr/bin/awk -f +$7 !~ /GET/ || +$10 != 200 { +next +} +$8 !~ /\/*\.[^.]*$/ || +$8 ~ /\.html$/ { +ps[$8]++ +} +END { +for (p in ps) +print p" "ps[p] +} diff -urPN 0.1.1/clvp.1 1.0.0/clvp.1 --- 0.1.1/clvp.1 Fri Jan 28 02:44:40 2022 +++ 1.0.0/clvp.1 Fri Mar 18 12:18:24 2022 @@ -22,9 +22,7 @@ .Em pages are defined as .Em paths -ending in -.Pa / -or +with no extension or ending in .Pa .html to which .Em GET diff -urPN 0.1.1/clvu 1.0.0/clvu --- 0.1.1/clvu Fri Jan 28 02:44:40 2022 +++ 1.0.0/clvu Fri Mar 18 12:18:24 2022 @@ -1,5 +1,8 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/cv.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/u.awk "$@" +#!/usr/bin/awk -f +$7 !~ /GET/ || +$10 != 200 { +next +} +($8 !~ /\/*\.[^.]*$/ || +$8 ~ /\.html$/) && +!us[$2]++ diff -urPN 0.1.1/clvu.1 1.0.0/clvu.1 --- 0.1.1/clvu.1 Fri Jan 28 02:44:40 2022 +++ 1.0.0/clvu.1 Fri Mar 18 12:18:24 2022 @@ -23,12 +23,9 @@ are defined as unique remote hosts that made at least one .Em GET -request -for files with an +request to paths with no extension or ending in .Pa .html -extension, including the -.Pa / -path that returned a +that returned a .Em 200 HTTP response status code. .Pp diff -urPN 0.1.1/clvv 1.0.0/clvv --- 0.1.1/clvv Fri Jan 28 02:44:40 2022 +++ 1.0.0/clvv Fri Mar 18 12:18:24 2022 @@ -1,5 +1,7 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/cv.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/v.awk "$@" +#!/usr/bin/awk -f +$7 !~ /GET/ || +$10 != 200 { + next +} +$8 !~ /\/*\.[^.]*$/ || +$8 ~ /\.html$/ diff -urPN 0.1.1/clvv.1 1.0.0/clvv.1 --- 0.1.1/clvv.1 Fri Jan 28 02:44:40 2022 +++ 1.0.0/clvv.1 Fri Mar 18 12:18:24 2022 @@ -22,9 +22,7 @@ .Em views are defined as .Em GET -requests to paths ending in -.Pa / -or +requests to paths with no extension or ending in .Pa .html that returned a .Em 200 diff -urPN 0.1.1/share/c.awk 1.0.0/share/c.awk --- 0.1.1/share/c.awk Fri Jan 28 02:45:14 2022 +++ 1.0.0/share/c.awk Thu Jan 1 01:00:00 1970 @@ -1,11 +0,0 @@ -{ -h=$1 -l=$2 -u=$3 -t=substr($4, 2)" "substr($5, 1, length($5)-1) -split($0, uq, "\"") -r=uq[2] -split(r, ra, " ") -s=$9 -b=$10 -} Binary files 0.1.1/share/c.awk.br and 1.0.0/share/c.awk.br differ Binary files 0.1.1/share/c.awk.gz and 1.0.0/share/c.awk.gz differ diff -urPN 0.1.1/share/cv.awk 1.0.0/share/cv.awk --- 0.1.1/share/cv.awk Fri Jan 28 02:44:40 2022 +++ 1.0.0/share/cv.awk Thu Jan 1 01:00:00 1970 @@ -1,12 +0,0 @@ -{ -v=$1 -h=$2 -l=$3 -u=$4 -t=substr($5, 2)" "substr($6, 1, length($6)-1) -split($0, a, "\"") -r=a[2] -split(r, ra, " ") -s=$10 -b=$11 -} Binary files 0.1.1/share/cv.awk.br and 1.0.0/share/cv.awk.br differ Binary files 0.1.1/share/cv.awk.gz and 1.0.0/share/cv.awk.gz differ diff -urPN 0.1.1/share/p.awk 1.0.0/share/p.awk --- 0.1.1/share/p.awk Fri Jan 28 02:44:40 2022 +++ 1.0.0/share/p.awk Thu Jan 1 01:00:00 1970 @@ -1,7 +0,0 @@ -#!/usr/bin/awk -f - -{ ra[1] == "GET" && s == 200 && ra[2] ~ /\/$|\.html$/ && ps[ra[2]]++ } - -END { - for (p in ps) print p" "ps[p] -} Binary files 0.1.1/share/p.awk.br and 1.0.0/share/p.awk.br differ Binary files 0.1.1/share/p.awk.gz and 1.0.0/share/p.awk.gz differ diff -urPN 0.1.1/share/u.awk 1.0.0/share/u.awk --- 0.1.1/share/u.awk Fri Jan 28 02:44:40 2022 +++ 1.0.0/share/u.awk Thu Jan 1 01:00:00 1970 @@ -1 +0,0 @@ -ra[1] == "GET" && s == 200 && ra[2] ~ /\/$|\.html$/ && !us[h]++ Binary files 0.1.1/share/u.awk.br and 1.0.0/share/u.awk.br differ Binary files 0.1.1/share/u.awk.gz and 1.0.0/share/u.awk.gz differ diff -urPN 0.1.1/share/v.awk 1.0.0/share/v.awk --- 0.1.1/share/v.awk Fri Jan 28 02:44:40 2022 +++ 1.0.0/share/v.awk Thu Jan 1 01:00:00 1970 @@ -1 +0,0 @@ -ra[1] == "GET" && s == 200 && ra[2] ~ /\/$|\.html$/ Binary files 0.1.1/share/v.awk.br and 1.0.0/share/v.awk.br differ Binary files 0.1.1/share/v.awk.gz and 1.0.0/share/v.awk.gz differ