initial commit. project created. it works.

This commit is contained in:
Mark Schmale 2018-09-11 17:20:25 +02:00
commit e22d94dd4c
7 changed files with 538 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/target
**/*.rs.bk
sample/

12
.travis.yml Normal file
View File

@ -0,0 +1,12 @@
language: rust
rust:
- stable
- beta
- nightly
cache: cargo
matrix:
allow_failures:
- rust: nightly
fast_finish: true

223
Cargo.lock generated Normal file
View File

@ -0,0 +1,223 @@
[[package]]
name = "aho-corasick"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "backtrace"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)",
"cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "backtrace-sys"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cc"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "failure"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
"failure_derive 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "failure_derive"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.14.9 (registry+https://github.com/rust-lang/crates.io-index)",
"synstructure 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"version_check 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "libc"
version = "0.2.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "memchr"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "nginx-log-parser"
version = "0.1.0"
dependencies = [
"failure 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "proc-macro2"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quote"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rustc-demangle"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "syn"
version = "0.14.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "synstructure"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.14.9 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ucd-util"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unicode-xid"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "utf8-ranges"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "version_check"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "68f56c7353e5a9547cbd76ed90f7bb5ffc3ba09d4ea9bd1d8c06c8b1142eeb5a"
"checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a"
"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
"checksum cc 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)" = "70f2a88c2e69ceee91c209d8ef25b81fc1a65f42c7f14dfd59d1fed189e514d1"
"checksum cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4e7bb64a8ebb0d856483e1e682ea3422f883c5f5615a90d51a2c82fe87fdd3"
"checksum failure 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7efb22686e4a466b1ec1a15c2898f91fa9cb340452496dca654032de20ff95b9"
"checksum failure_derive 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "946d0e98a50d9831f5d589038d2ca7f8f455b1c21028c0db0e84116a12696426"
"checksum lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca488b89a5657b0a2ecd45b95609b3e848cf1755da332a0da46e2b2b1cb371a7"
"checksum libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)" = "76e3a3ef172f1a0b9a9ff0dd1491ae5e6c948b94479a3021819ba7d860c8645d"
"checksum memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a3b4142ab8738a78c51896f704f83c11df047ff1bda9a92a661aa6361552d93d"
"checksum proc-macro2 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)" = "ffe022fb8c8bd254524b0b3305906c1921fa37a84a644e29079a9e62200c3901"
"checksum quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "dd636425967c33af890042c483632d33fa7a18f19ad1d7ea72e8998c6ef8dea5"
"checksum regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2069749032ea3ec200ca51e4a31df41759190a88edca0d2d86ee8bedf7073341"
"checksum regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "747ba3b235651f6e2f67dfa8bcdcd073ddb7c243cb21c442fc12395dfcac212d"
"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"
"checksum syn 0.14.9 (registry+https://github.com/rust-lang/crates.io-index)" = "261ae9ecaa397c42b960649561949d69311f08eeaea86a65696e6e46517cf741"
"checksum synstructure 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "85bb9b7550d063ea184027c9b8c20ac167cd36d3e06b3a40bceb9d746dc1a7b7"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd70f467df6810094968e2fce0ee1bd0e87157aceb026a8c083bcf5e25b9efe4"
"checksum version_check 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7716c242968ee87e5542f8021178248f267f295a5c4803beae8b8b7fd9bc6051"
"checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "nginx-log-parser"
version = "0.1.0"
authors = ["Mark Schmale <m.schmale@uniorg.de>"]
[dependencies]
regex = "1.0.5"
failure = "0.1.2"

39
examples/example.rs Normal file
View File

@ -0,0 +1,39 @@
extern crate nginx_log_parser;
use nginx_log_parser::Format;
fn main() {
let lines = vec![
r#"198.51.106.151 - - [11/Sep/2018:08:44:17 +0000] "GET / HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36""#,
r#"192.0.2.3 - - [11/Sep/2018:10:40:01 +0000] "GET / HTTP/1.0" 200 612 "-" "-""#,
r#"198.51.100.54 - - [11/Sep/2018:12:08:52 +0000] "GET / HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36""#,
r#"198.51.100.119 - - [11/Sep/2018:12:27:57 +0000] "GET / HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36""#,
r#"198.51.100.77 - - [11/Sep/2018:13:28:36 +0000] "GET / HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0""#,
r#"192.0.2.139 - - [11/Sep/2018:13:45:21 +0000] "GET / HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36""#,
r#"192.0.2.139 - - [11/Sep/2018:13:45:21 +0000] "GET /robots.txt HTTP/1.1" 404 169 "-" "-""#,
r#"192.0.2.139 - - [11/Sep/2018:13:45:22 +0000] "GET /sitemap.xml HTTP/1.1" 404 169 "-" "-""#,
r#"192.0.2.139 - - [11/Sep/2018:13:45:22 +0000] "GET /.well-known/security.txt HTTP/1.1" 404 169 "-" "-""#,
r#"192.0.2.139 - - [11/Sep/2018:13:45:22 +0000] "GET /favicon.ico HTTP/1.1" 404 142 "-" "python-requests/2.13.0""#,
r#"203.0.113.2 - - [11/Sep/2018:14:06:08 +0000] "GET / HTTP/1.1" 200 612 "-" "Mozilla/5.0 (compatible; Nimbostratus-Bot/v1.3.2; http://example.com)""#,
r#"203.0.113.17 - - [11/Sep/2018:14:12:09 +0000] "GET /manager/html HTTP/1.1" 404 169 "-" "Mozilla/3.0 (compatible; Indy Library)""#,
];
let format_input = r#"$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent""#;
let format = Format::from_str(format_input).expect("cannot parse format: ");
for line in lines {
match format.parse(line) {
Some(entry) => {
println!(
"{} from {}",
entry.get("request").unwrap(),
entry.get("remote_addr").unwrap()
);
}
None => {
eprintln!("error parsing line: {}", line);
}
}
}
}

241
src/format.rs Normal file
View File

@ -0,0 +1,241 @@
use regex::{escape, Captures, Error as RegexError, Regex};
#[derive(Debug, Fail)]
pub enum FormatParserError {
#[fail(display = "compiling the regular expression failed: {}", inner)]
CompilationFailed { inner: RegexError },
}
/// a single entry/line in a log.
pub struct Entry<'a> {
captures: Captures<'a>,
}
impl<'a> Entry<'a> {
///
/// accesses the value of a named field in a log
///
/// ```rust
/// # extern crate nginx_log_parser;
/// # use nginx_log_parser::Format;
/// #
/// let format = Format::from_str("$remote_addr [$time_local] $request").unwrap();
/// let entry = format.parse("1.2.3.4 [11/Sep/2018:08:44:17 +0000] GET / HTTP/1.1").unwrap();
/// assert_eq!(Some("GET / HTTP/1.1"), entry.get("request"));
/// ```
pub fn get(&'a self, key: &str) -> Option<&'a str> {
self.captures.name(key).map(|mat| mat.as_str())
}
/// checks if the log line contains a field named `key`
pub fn has(&self, key: &str) -> bool {
self.captures.name(key).is_some()
}
}
#[derive(Debug)]
/// Represents the parsed format of an nginx log line.
/// Can be obtained by parsing an nginx log format string using `Format::from_str`.
pub struct Format {
parts: Vec<FormatPart>,
re: Regex,
}
impl Format {
///
/// Reads a format string in nginx-like syntax and creates a Format from it
///
/// # Example
/// ```rust
/// # extern crate nginx_log_parser;
/// # use nginx_log_parser::Format;
/// #
/// let pattern = "$remote_addr [$time_local] $request";
/// let format = Format::from_str(pattern).expect("could not parse format string");
/// ```
pub fn from_str(input: &str) -> Result<Format, FormatParserError> {
read_format(input.as_bytes())
}
///
/// Reads an input line returning an optional `Entry` that contains the parsed result.
///
/// # Example
/// ```rust
/// # extern crate nginx_log_parser;
/// # use nginx_log_parser::Format;
/// #
/// let format = Format::from_str("$remote_addr [$time_local] $request").unwrap();
/// let entry = format.parse("1.2.3.4 [11/Sep/2018:08:44:17 +0000] GET / HTTP/1.1");
/// assert_eq!(Some("GET / HTTP/1.1"), entry.unwrap().get("request"));
/// ```
///
/// # Invalid input
/// May return `None` when the format did not match the input line.
///
/// ```rust
/// # extern crate nginx_log_parser;
/// # use nginx_log_parser::Format;
/// #
/// let format = Format::from_str("$remote_addr [$time_local] $request").unwrap();
/// assert!(format.parse("this does not work").is_none());
/// ```
pub fn parse<'a>(&self, line: &'a str) -> Option<Entry<'a>> {
// TODO: i do not want to use regex here but i'm not smart enough to write my own parser
self.re.captures(line).map(|captures| Entry { captures })
}
/// creates a format from a list of FormatParts. currently internal.
fn from_parts(parts: Vec<FormatPart>) -> Result<Format, FormatParserError> {
let pattern: String = parts.iter().map(|part| part.get_pattern()).collect();
let re = match Regex::new(&pattern) {
Ok(re) => re,
Err(err) => return Err(FormatParserError::CompilationFailed { inner: err }),
};
Ok(Format { parts, re })
}
}
#[derive(Debug, Eq, PartialEq)]
pub enum FormatPart {
Variable(String),
Fixed(String),
}
impl FormatPart {
fn get_pattern(&self) -> String {
use self::FormatPart::*;
match self {
Variable(name) => format!(
"(?P<{}>{})",
name.trim_left_matches('$'),
match name.as_str() {
"$status" => "\\d{3}",
"$body_bytes_sent" => "\\d+",
_ => ".*",
}
),
Fixed(fixed_part) => escape(&fixed_part),
}
}
}
enum FormatParserState {
Start,
Variable(usize, usize),
Fixed(usize, usize),
}
fn is_var_char(char: &u8) -> bool {
match char {
b'a'...b'z' | b'A'...b'Z' | b'_' => true,
_ => false,
}
}
fn read_byte(chr: &u8, index: usize, state: &FormatParserState) -> FormatParserState {
use format::FormatParserState::*;
match state {
Start => match chr {
b'$' => Variable(index, index + 1),
_ => Fixed(index, index + 1),
},
Variable(start, _end) => match chr {
x if is_var_char(x) => Variable(*start, index + 1),
_ => Fixed(index, index + 1),
},
Fixed(start, _end) => match chr {
b'$' => Variable(index, index + 1),
_ => Fixed(*start, index + 1),
},
}
}
fn read_format(bytes: &[u8]) -> Result<Format, FormatParserError> {
use format::FormatParserState::*;
let mut state = Start;
let mut stack = vec![];
for i in 0..bytes.len() {
let new_state = read_byte(&bytes[i], i, &state);
match (&state, &new_state) {
(Variable(start, end), Fixed(_, _)) => stack.push(FormatPart::Variable(
String::from_utf8(bytes[*start..*end].to_vec()).unwrap(),
)),
(Fixed(start, end), Variable(_, _)) => stack.push(FormatPart::Fixed(
String::from_utf8(bytes[*start..*end].to_vec()).unwrap(),
)),
_ => {}
};
state = new_state
}
match &state {
Variable(start, end) => stack.push(FormatPart::Variable(
String::from_utf8(bytes[*start..*end].to_vec()).unwrap(),
)),
Fixed(start, end) => stack.push(FormatPart::Fixed(
String::from_utf8(bytes[*start..*end].to_vec()).unwrap(),
)),
_ => {}
};
Format::from_parts(stack)
}
#[cfg(test)]
mod test {
use format::Format;
use format::FormatPart::Fixed;
use format::FormatPart::Variable;
#[test]
fn test_parse_format() {
let format_input = r#"$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for""#;
let format = Format::from_str(format_input).unwrap();
assert_eq!(
Some(&Variable(String::from("$remote_addr"))),
format.parts.get(0)
);
assert_eq!(Some(&Fixed(String::from(" - "))), format.parts.get(1));
assert_eq!(
Some(&Variable(String::from("$remote_user"))),
format.parts.get(2)
);
assert_eq!(Some(&Fixed(String::from(" ["))), format.parts.get(3));
assert_eq!(
Some(&Variable(String::from("$time_local"))),
format.parts.get(4)
);
assert_eq!(Some(&Fixed(String::from(r#"] ""#))), format.parts.get(5));
assert_eq!(Some(&Fixed(String::from(r#"] ""#))), format.parts.get(5));
assert_eq!(
Some(&Variable(String::from("$request"))),
format.parts.get(6)
);
assert_eq!(Some(&Fixed(String::from(r#"" ""#))), format.parts.get(15));
assert_eq!(Some(&Fixed(String::from(r#"""#))), format.parts.get(17));
}
#[test]
fn test_parse_main_format() {
let data = r#"192.0.2.139 - - [11/Sep/2018:13:45:22 +0000] "GET /favicon.ico HTTP/1.1" 404 142 "-" "python-requests/2.13.0""#;
let format_input = r#"$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent""#;
let format = Format::from_str(format_input).unwrap();
let result = format.parse(data).unwrap();
assert_eq!(Some("192.0.2.139"), result.get("remote_addr"));
assert_eq!(Some("-"), result.get("remote_user"));
assert_eq!(Some("11/Sep/2018:13:45:22 +0000"), result.get("time_local"));
assert_eq!(Some("GET /favicon.ico HTTP/1.1"), result.get("request"));
assert_eq!(Some("404"), result.get("status"));
assert_eq!(Some("142"), result.get("body_bytes_sent"));
assert_eq!(Some("-"), result.get("http_referer"));
assert_eq!(
Some("python-requests/2.13.0"),
result.get("http_user_agent")
);
}
}

12
src/lib.rs Normal file
View File

@ -0,0 +1,12 @@
//!
//! A library to read nginx log files
//!
extern crate regex;
#[macro_use]
extern crate failure;
///
pub mod format;
pub use format::Format;