diff -urPN 0.1.0/Makefile 1.0.0/Makefile --- 0.1.0/Makefile Mon Aug 10 21:15:30 2020 +++ 1.0.0/Makefile Fri Mar 18 15:46:19 2022 @@ -1,3 +1,4 @@ +VERSION = 1.0.0 PREFIX = /usr/local MANPREFIX = $(PREFIX)/share/man @@ -22,10 +23,8 @@ install: mkdir -p $(DESTDIR)$(PREFIX)/bin/ - mkdir -p $(DESTDIR)$(PREFIX)/share/cbl mkdir -p $(DESTDIR)$(MANPREFIX)/man1/ cp -f $(BIN) $(DESTDIR)$(PREFIX)/bin/ - cp -r share/*.awk $(DESTDIR)$(PREFIX)/share/cbl cp -f $(MAN) $(DESTDIR)$(MANPREFIX)/man1/ for bin in $(BIN); do \ chmod 755 $(DESTIR)$(PREFIX)/bin/$$bin; \ @@ -36,10 +35,16 @@ uninstall: - rm -rf $(DESTDIR$(PREFIX)/share/cbl for bin in $(BIN); do \ rm -f $(DESTDIR)$(PREFIX)/bin/$$bin; \ done for man in $(MAN); do \ rm -f $(MANPREFIX)/man1/$$man; \ done +dist: + mkdir -p cbl@$(VERSION)/cbl + cp $(BIN) cbl@$(VERSION)/cbl + cp $(MAN) cbl@$(VERSION)/cbl + cp Makefile README NEWS LICENSE cbl@$(VERSION)/cbl + tar -C cbl@$(VERSION) -czvf cbl@$(VERSION).tgz cbl + rm -rf cbl@$(VERSION) diff -urPN 0.1.0/NEWS 1.0.0/NEWS --- 0.1.0/NEWS Thu Jan 1 01:00:00 1970 +++ 1.0.0/NEWS Fri Mar 18 15:46:19 2022 @@ -0,0 +1,34 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [1.0.0] - 2022-03-04 + +### Added + +- Add `NEWS` file +- Add `dist` rule to `Makefile` +- Add missing `cblvu.1` `man` file to dist package + +### Changed + +- Hardcode all log fields in scripts +- Write simple description in README rather than `man` contents +- Update `man` pages to reflect new path patterns + +### Removed + +- Delete `share` directory and references +- Remove dependency on [`cl`](https://adi.onl/cl.html) + +## [0.1.0] + +### Added + +- Combined Log analytics scripts + +[1.0.0]: https://adi.onl/cbl/0.1.0..1.0.0 +[0.1.0]: https://adi.onl/cbl/0.1.0 diff -urPN 0.1.0/README 1.0.0/README --- 0.1.0/README Mon Aug 10 21:15:30 2020 +++ 1.0.0/README Fri Mar 18 15:46:19 2022 @@ -1,508 +1,80 @@ # cbl -combined log +Analytics for `HTTP` logs in Combined Log Format -## cblv(1) +## Install -### NAME +On a Linux or BSD machine, in a terminal, run: -**cblv** - combined log views + mkdir -p ~/src + cd ~/src + wget -qO - https://adi.onl/cbl/cbl@1.0.0.tgz | tar -xzvf - + cd cbl + make install PREFIX=$HOME -### SYNOPSIS +## Uninstall -**cblv** -\[*file ...*] + cd ~/src/cbl + make uninstall PREFIX=$HOME -### DESCRIPTION +## Usage -The -**cblv** -utility reads -each -\[*file ...*] -in -*Combined Log Format* -and prints only the -*views* -to standard output. +The package provides 8 scripts `cblp`, `cblu`, `cblv`, `cblr` and +`cblvp`, `cblvu` , `cblvv` and `cblvr`. -*views* -are defined as -*GET* -requests -to paths ending in -*/* -or -*.html* -that returned a -*200* -HTTP response status code. +The **cbl** prefix stands for _Combined Log_ and the suffixes stand for +_**p**ages_, _**u**nique_, _**v**iews_ and _**r**eferrers_. -If -\[*file ...*] -is omitted, -**cblv** -reads from standard input. +The middle **v** in the second set stands for _virtual host_. The second +set of 4 scripts is for the Combined Log with virtual hosts format. It's +identical to the normal Combined Log except the name of the virtual host +is prepended to each line. -### EXIT STATUS +Use the appropriate script for the statistic and for the log format: -The **cblv** utility exits 0 on success, and >0 if an error occurs. +### Pages + $ cblp /var/www/access.log + /docs.html 641 + / 3227 -### EXAMPLES +Use `sort -nrk 2` to sort on second column: -Print all -*views* -from -*/var/www/logs/access.log:* + $ clp /var/www/access.log | sort -nrk 2 + / 3227 + /docs.html 641 - $ cblv /var/www/logs/access.log +### Uniques and Visits -### SEE ALSO +Uniques and visits scripts output the actual unique and visit requests +by default. Use `wc -l` to count: -awk(1) -httpd(8) -httpd.conf(5) -clv(1) + $ cblu /var/www/access.log | wc -l + 2000 -## cblu(1) +### Referrers -### NAME + $ cblr /var/www/access.log + none 865 + https://example.com 1222 -**cblu** - combined log uniques +Use `sort -nrk 2` to sort on second column: -### SYNOPSIS + $ cblr /var/www/access.log | sort -nrk 2 + https://example.com 1222 + none 865 -**cblu** -\[*file ...*] +We recommend using [cblfb](http://adi.onl/cblfb.html) for +filtering bots. -### DESCRIPTION + $ cblr /var/www/access.log | cblfb + none 821 + https://example.com 1212 -The -**cblu** -utility reads -each -\[*file ...*] -in -*Combined Log Format* -and prints only the -*uniques* -to standard output. +Also, it's a good idea to enable log rotation. -*uniques* -are defined as unique remote hosts -that made at least one -*GET* -request to paths ending in -*/* -or -*.html* -that returned a -*200* -HTTP response status code. +The scripts operate on `GET` requests to paths with no extension or +ending in `.html` that returned a 200 HTTP response status code. -If -\[*file ...*] -is omitted, -**cblu** -reads from standard input. - -### EXIT STATUS - -The **cblu** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*uniques* -from -*/var/www/logs/access.log:* - - $ cblu /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) -clu(1) - -## cblp(1) - -### NAME - -**cblp** - combined log pages - -### SYNOPSIS - -**cblp** -\[*file ...*] - -### DESCRIPTION - -The -**cblp** -utility reads -each -\[*file ...*] -in -*Combined Log Format* -and prints a list of -*pages* -to standard output including the request count. - -*pages* -are defined as -*paths* -ending in -*/* -or -*.html* -to which -*GET* -requests have been made that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**cblp** -reads from standard input. - -### EXIT STATUS - -The **cblp** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*pages* -and their respective request count -from -*/var/www/logs/access.log*: - - $ cblp /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) -clp(1) - -## cblr(1) - -### NAME - -**cblr** - combined log referrer - -### SYNOPSIS - -**cblr** -\[*file ...*] - -### DESCRIPTION - -The -**cblr** -utility reads -each -\[*file ...*] -in -*Combined Log Format* -and prints a list of -*referrers* -to standard output including the request count. - -*referrers* -are defined as the address of the webpage which is linked to a -*path* -ending in -*/* -or -*.html* -to which -*GET* -requests have been made that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**cblr** -reads from standard input. - -### EXIT STATUS - -The **cblr** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*referrers* -and their respective request count -from -*/var/www/logs/access.log*: - - $ cblr /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) - -## cblvv(1) - -### NAME - -**cblvv** - combined log (virtual host) views - -### SYNOPSIS - -**cblvv** -\[*file ...*] - -### DESCRIPTION - -The -**cblvv** -utility reads -each -\[*file ...*] -in -*Combined Log Format with Virtual Host* -and prints only the -*views* -to standard output. - -*views* -are defined as -*GET* -requests to paths ending in -*/* -or -*.html* -that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**cblvv** -reads from standard input. - -### EXIT STATUS - -The **cblvv** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*views* -from -*/var/www/logs/access.log:* - - $ cblvv /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) -clv(1) - -## cblvu(1) - -### NAME - -**cblvu** - combined log (virtual host) uniques - -### SYNOPSIS - -**cblvu** -\[*file ...*] - -### DESCRIPTION - -The -**cblvu** -utility reads -each -\[*file ...*] -in -*Combined Log Format with Virtual Hosts* -and prints only the -*uniques* -to standard output. - -*uniques* -to standard output. - -*uniques* -are defined as unique remote hosts -that made at least one -*GET* -request -for files with an -*.html* -extension, including the -*/* -path that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**cblvu** -reads from standard input. - -### EXIT STATUS - -The **cblvu** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*uniques* -from -*/var/www/logs/access.log:* - - $ cblvu /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) -clu(1) - -## cblvp(1) - -### NAME - -**cblvp** - combined log (virtual host) pages - -### SYNOPSIS - -**cblvp** -\[*file ...*] - -### DESCRIPTION - -The -**cblvp** -utility reads -each -\[*file ...*] -in -*Combined Log Format with Virtual Host* -and prints a list of -*pages* -to standard output including the request count. - -*pages* -are defined as -*paths* -ending in -*/* -or -*.html* -to which -*GET* -requests have been made that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**cblvp** -reads from standard input. - -### EXIT STATUS - -The **cblvp** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*pages* -and their respective request count -from -*/var/www/logs/access.log*: - - $ cblvp /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) -clp(1) - -## cblvr(1) - -### NAME - -**cblvr** - combined log (virtual host) referrer - -### SYNOPSIS - -**cblvr** -\[*file ...*] - -### DESCRIPTION - -The -**cblvr** -utility reads -each -\[*file ...*] -in -*Combined Log Format Virtual Host* -and prints a list of -*referrers* -to standard output including the request count. - -*referrers* -are defined as the address of the webpage which is linked to a -*path* -ending in -*/* -or -*.html* -to which -*GET* -requests have been made that returned a -*200* -HTTP response status code. - -If -\[*file ...*] -is omitted, -**cblvr** -reads from standard input. - -### EXIT STATUS - -The **cblvr** utility exits 0 on success, and >0 if an error occurs. - -### EXAMPLES - -Print all -*referrers* -and their respective request count -from -*/var/www/logs/access.log*: - - $ cblvr /var/www/logs/access.log - -### SEE ALSO - -awk(1) -httpd(8) -httpd.conf(5) + https://example.com/docs + https://example.com/docs/ + https://examples.com/docs.html diff -urPN 0.1.0/cblp 1.0.0/cblp --- 0.1.0/cblp Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblp Fri Mar 18 15:46:19 2022 @@ -1,6 +1,13 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/c.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/cb.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/p.awk "$@" +#!/usr/bin/awk -f +$6 !~ /GET/ || +$9 != 200 { +next +} +$7 !~ /\/*\.[^.]*$/ || +$7 ~ /\.html$/ { +ps[$7]++ +} +END { +for (p in ps) +print p" "ps[p] +} diff -urPN 0.1.0/cblp.1 1.0.0/cblp.1 --- 0.1.0/cblp.1 Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblp.1 Fri Mar 18 15:46:19 2022 @@ -22,9 +22,7 @@ .Em pages are defined as .Em paths -ending in -.Pa / -or +with no extension or ending in .Pa .html to which .Em GET diff -urPN 0.1.0/cblr 1.0.0/cblr --- 0.1.0/cblr Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblr Fri Mar 18 15:46:19 2022 @@ -1,6 +1,16 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/c.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/cb.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/r.awk "$@" +#!/usr/bin/awk -f +$6 !~ /GET/ || +$9 != 200 { +next +} +{ +gsub("\"", "", $11) +if ($11 == "") $11="none" +} +$7 !~ /\/*\.[^.]*$/ || +$7 ~ /\.html$/ { +rs[$11]++ +} +END { +for (r in rs) print r" "rs[r] +} diff -urPN 0.1.0/cblr.1 1.0.0/cblr.1 --- 0.1.0/cblr.1 Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblr.1 Fri Mar 18 15:46:19 2022 @@ -22,9 +22,7 @@ .Em referrers are defined as the address of the webpage which is linked to a .Em path -ending in -.Pa / -or +with no extension or ending in .Pa .html to which .Em GET diff -urPN 0.1.0/cblu 1.0.0/cblu --- 0.1.0/cblu Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblu Fri Mar 18 15:46:19 2022 @@ -1,6 +1,8 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/c.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/cb.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/u.awk "$@" +#!/usr/bin/awk -f +$6 !~ /GET/ || +$9 != 200 { +next +} +($7 !~ /\/*\.[^.]*$/ || +$7 ~ /\.html$/) && +!us[$1]++ diff -urPN 0.1.0/cblu.1 1.0.0/cblu.1 --- 0.1.0/cblu.1 Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblu.1 Fri Mar 18 15:46:19 2022 @@ -23,9 +23,7 @@ are defined as unique remote hosts that made at least one .Em GET -request to paths ending in -.Pa / -or +request to paths with no extension or ending in .Pa .html that returned a .Em 200 diff -urPN 0.1.0/cblv 1.0.0/cblv --- 0.1.0/cblv Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblv Fri Mar 18 15:46:19 2022 @@ -1,7 +1,7 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/c.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/cb.awk \ --f "${FLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/fl}"/f.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/v.awk "$@" +#!/usr/bin/awk -f +$6 !~ /GET/ || +$9 != 200 { +next +} +$7 !~ /\/*\.[^.]*$/ || +$7 ~ /\.html$/ diff -urPN 0.1.0/cblv.1 1.0.0/cblv.1 --- 0.1.0/cblv.1 Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblv.1 Fri Mar 18 15:46:19 2022 @@ -23,9 +23,7 @@ are defined as .Em GET requests -to paths ending in -.Pa / -or +to paths with no extension or ending in .Pa .html that returned a .Em 200 diff -urPN 0.1.0/cblvp 1.0.0/cblvp --- 0.1.0/cblvp Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblvp Fri Mar 18 15:46:19 2022 @@ -1,6 +1,13 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/cv.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/cbv.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/p.awk "$@" +#!/usr/bin/awk -f +$7 !~ /GET/ || +$10 != 200 { +next +} +$8 !~ /\/*\.[^.]*$/ || +$8 ~ /\.html$/ { +ps[$8]++ +} +END { +for (p in ps) +print p" "ps[p] +} diff -urPN 0.1.0/cblvp.1 1.0.0/cblvp.1 --- 0.1.0/cblvp.1 Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblvp.1 Fri Mar 18 15:46:19 2022 @@ -22,9 +22,7 @@ .Em pages are defined as .Em paths -ending in -.Pa / -or +with no extension or ending in .Pa .html to which .Em GET diff -urPN 0.1.0/cblvr 1.0.0/cblvr --- 0.1.0/cblvr Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblvr Fri Mar 18 15:46:19 2022 @@ -1,6 +1,16 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/cv.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/cbv.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/r.awk "$@" +#!/usr/bin/awk -f +$7 !~ /GET/ || +$10 != 200 { +next +} +{ +gsub("\"", "", $12) +if ($12 == "") $12="none" +} +$8 !~ /\/*\.[^.]*$/ || +$8 ~ /\.html$/ { +rs[$12]++ +} +END { +for (r in rs) print r" "rs[r] +} diff -urPN 0.1.0/cblvr.1 1.0.0/cblvr.1 --- 0.1.0/cblvr.1 Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblvr.1 Fri Mar 18 15:46:19 2022 @@ -22,9 +22,7 @@ .Em referrers are defined as the address of the webpage which is linked to a .Em path -ending in -.Pa / -or +with no extension or ending in .Pa .html to which .Em GET diff -urPN 0.1.0/cblvu 1.0.0/cblvu --- 0.1.0/cblvu Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblvu Fri Mar 18 15:46:19 2022 @@ -1,6 +1,8 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/cv.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/cbv.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/u.awk "$@" +#!/usr/bin/awk -f +$7 !~ /GET/ || +$10 != 200 { +next +} +($8 !~ /\/*\.[^.]*$/ || +$8 ~ /\.html$/) && +!us[$2]++ diff -urPN 0.1.0/cblvu.1 1.0.0/cblvu.1 --- 0.1.0/cblvu.1 Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblvu.1 Fri Mar 18 15:46:19 2022 @@ -26,12 +26,9 @@ are defined as unique remote hosts that made at least one .Em GET -request -for files with an +request to paths with no extension or ending in .Pa .html -extension, including the -.Pa / -path that returned a +that returned a .Em 200 HTTP response status code. .Pp diff -urPN 0.1.0/cblvv 1.0.0/cblvv --- 0.1.0/cblvv Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblvv Fri Mar 18 15:46:19 2022 @@ -1,6 +1,7 @@ -#!/bin/sh - -awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/cv.awk \ --f "${CBLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cbl}"/cbv.awk \ --f "${CLSHAREPATH:-$(dirname "$(realpath "$0")")/../share/cl}"/v.awk "$@" +#!/usr/bin/awk -f +$7 !~ /GET/ || +$10 != 200 { +next +} +$8 !~ /\/*\.[^.]*$/ || +$8 ~ /\.html$/ diff -urPN 0.1.0/cblvv.1 1.0.0/cblvv.1 --- 0.1.0/cblvv.1 Mon Aug 10 21:15:30 2020 +++ 1.0.0/cblvv.1 Fri Mar 18 15:46:19 2022 @@ -22,9 +22,7 @@ .Em views are defined as .Em GET -requests to paths ending in -.Pa / -or +requests to paths with no extension or ending in .Pa .html that returned a .Em 200 diff -urPN 0.1.0/share/cb.awk 1.0.0/share/cb.awk --- 0.1.0/share/cb.awk Mon Aug 10 21:15:30 2020 +++ 1.0.0/share/cb.awk Thu Jan 1 01:00:00 1970 @@ -1,5 +0,0 @@ -{ -R=$11 -u=uq[6] -split(u, ua, " ") -} diff -urPN 0.1.0/share/cbv.awk 1.0.0/share/cbv.awk --- 0.1.0/share/cbv.awk Mon Aug 10 21:15:30 2020 +++ 1.0.0/share/cbv.awk Thu Jan 1 01:00:00 1970 @@ -1,5 +0,0 @@ -{ -R=$12 -u=a[6] -split(u, ua, " ") -} diff -urPN 0.1.0/share/r.awk 1.0.0/share/r.awk --- 0.1.0/share/r.awk Mon Aug 10 21:15:30 2020 +++ 1.0.0/share/r.awk Thu Jan 1 01:00:00 1970 @@ -1,7 +0,0 @@ -#!/usr/bin/awk -f - -{ ra[1] == "GET" && s == 200 && ra[2] ~ /\/$|\.html$/ && !rs[R]++ } - -END { - for (r in rs) print r" "rs[r] -}