aboutsummaryrefslogtreecommitdiff
path: root/src/http
diff options
context:
space:
mode:
Diffstat (limited to 'src/http')
-rw-r--r--src/http/flatten.rs37
-rw-r--r--src/http/mod.rs6
-rw-r--r--src/http/parser.rs523
-rw-r--r--src/http/rule.rs123
4 files changed, 689 insertions, 0 deletions
diff --git a/src/http/flatten.rs b/src/http/flatten.rs
new file mode 100644
index 0000000..51ead0c
--- /dev/null
+++ b/src/http/flatten.rs
@@ -0,0 +1,37 @@
+use std::collections::HashMap;
+
+use super::rule::{HTTPMessage, Method};
+
+#[derive(Debug)]
+pub enum Version {
+ Http0_9,
+ Http1_0,
+ Http1_1,
+ // HTTP/2 won't parse, anyway.
+}
+
+#[derive(Debug)]
+pub struct HTTPRequest {
+ pub method: Method,
+ pub version: Version,
+ pub requested_path: Vec<String>,
+ pub headers: HashMap<String, Vec<u8>>,
+}
+
+pub fn flatten(message: HTTPMessage) -> Option<HTTPRequest> {
+ let method = message.request_line.method;
+ let version = match (message.request_line.http_version.major, message.request_line.http_version.minor) {
+ (0, 9) => Version::Http0_9,
+ (1, 0) => Version::Http1_0,
+ (1, 1) => Version::Http1_1,
+ _ => return None,
+ };
+ let requested_path = message.request_line.request_target.absolute_path.segments.into_iter().map(|segment| segment.lexeme).collect();
+ let headers = message.header_fields.into_iter().map(|field| (field.name.lexeme, field.value.content)).collect();
+ Some(HTTPRequest{
+ method,
+ version,
+ requested_path,
+ headers,
+ })
+}
diff --git a/src/http/mod.rs b/src/http/mod.rs
new file mode 100644
index 0000000..d2fd5ac
--- /dev/null
+++ b/src/http/mod.rs
@@ -0,0 +1,6 @@
+mod parser;
+mod rule;
+mod flatten;
+
+pub use parser::Parser;
+pub use rule::Method;
diff --git a/src/http/parser.rs b/src/http/parser.rs
new file mode 100644
index 0000000..259ddf2
--- /dev/null
+++ b/src/http/parser.rs
@@ -0,0 +1,523 @@
+use async_std::io::Read;
+
+use super::rule::{HTTPMessage, HeaderField, FieldName, FieldValue, FieldContent, RequestLine, Method, OriginForm, HTTPVersion, AbsolutePath, Query, Segment};
+use super::flatten::{flatten, HTTPRequest};
+use crate::peekable_bufreader::PeekableBufReader;
+
+const HTAB: u8 = 0x09;
+const SPACE: u8 = 0x20;
+const PERCENT: u8 = 0x25;
+const SLASH: u8 = 0x2F;
+const DOT: u8 = 0x2E;
+const COLON: u8 = 0x3A;
+const QUESTION_MARK: u8 = 0x3F;
+const ATSIGN: u8 = 0x40;
+
+enum ErrorType {
+ Missing,
+ Malformed,
+}
+
+pub struct Parser<T>
+ where T: Read + Unpin {
+ source: PeekableBufReader<T>,
+}
+
+impl<T> Parser<T>
+ where T: Read + Unpin {
+ pub fn new(source: PeekableBufReader<T>) -> Self {
+ Self {
+ source,
+ }
+ }
+
+ pub async fn parse(self) -> Option<HTTPRequest> {
+ flatten(self.http_message().await?)
+ }
+
+ /*
+ * RFC 7230, Page 19
+ */
+ async fn http_message(mut self) -> Option<HTTPMessage> {
+ let start_line = self.start_line().await?;
+ let mut header_fields = Vec::new();
+ loop {
+ if let Some(_) = self.consume_carriage_return().await {
+ break;
+ }
+ header_fields.push(self.header_field().await?);
+ self.consume_carriage_return().await?;
+ }
+ // GET Requests don't have a message body, and we only really deal with GET requests.
+ // There's no need to examine the headers and attempt to read a message body.
+ Some(HTTPMessage {
+ request_line: start_line,
+ header_fields,
+ })
+ }
+
+ /*
+ * RFC 7230, Page 23
+ */
+ async fn header_field(&mut self) -> Option<HeaderField> {
+ let name = self.field_name().await?;
+ self.consume_char(&COLON).await?;
+ self.consume_optional_whitespace().await;
+ let value = self.field_value().await?;
+ self.consume_optional_whitespace().await;
+ Some(HeaderField {
+ name,
+ value,
+ })
+ }
+
+ /*
+ * RFC 7230, Page 23
+ */
+ async fn field_name(&mut self) -> Option<FieldName> {
+ Some(FieldName {
+ lexeme: self.logical_token().await?,
+ })
+ }
+
+ /*
+ * RFC 7230, Page 23
+ * obs-fold is deprecated except within message/http media. This isn't going to come up for us,
+ * so we deviate from the grammar slightly.
+ */
+ async fn field_value(&mut self) -> Option<FieldValue> {
+ let mut content = Vec::new();
+ loop {
+ // Look, no obs-fold!
+ if let Some(value) = self.field_content().await {
+ // Let's do some pre-emptive flattening here.
+ content.push(value.first_char);
+ if let Some(second_char) = value.second_char {
+ content.push(SPACE);
+ content.push(second_char);
+ }
+ } else {
+ break;
+ }
+ }
+ Some(FieldValue {
+ content,
+ })
+ }
+
+ /*
+ * RFC 7230, Page 23
+ */
+ async fn field_content(&mut self) -> Option<FieldContent> {
+ let first_char = self.field_vchar().await?;
+ let second_char = if let Some(_) = self.consume_required_whitespace().await {
+ Some(self.field_vchar().await?)
+ } else {
+ None
+ };
+ Some(FieldContent{
+ first_char,
+ second_char,
+ })
+ }
+
+ async fn field_vchar(&mut self) -> Option<u8> {
+ let next_char = self.source.peek().await?;
+ if Self::is_visible_char(next_char) || Self::is_obs_text_char(next_char) {
+ return self.source.next().await;
+ }
+ None
+ }
+
+ /*
+ * RFC 7230, Page 21
+ * This is a server, so the start-line is exclusively a request-line.
+ */
+ async fn start_line(&mut self) -> Option<RequestLine> {
+ self.request_line().await
+ }
+
+ /*
+ * RFC 7230, Page 21
+ */
+ async fn request_line(&mut self) -> Option<RequestLine> {
+ let method = self.method().await?;
+ self.consume_char(&SPACE).await?;
+ let request_target = self.request_target().await?;
+ self.consume_char(&SPACE).await?;
+ let http_version = self.http_version().await?;
+ self.consume_carriage_return().await?;
+ Some(RequestLine {
+ method,
+ request_target,
+ http_version,
+ })
+ }
+
+ /*
+ * RFC 7230, Page 41
+ * We only serve some static content; therefore we only need support origin-form.
+ */
+ async fn request_target(&mut self) -> Option<OriginForm> {
+ self.origin_form().await
+ }
+
+ /*
+ * RFC 7230, Page 42
+ */
+ async fn origin_form(&mut self) -> Option<OriginForm> {
+ let absolute_path = self.absolute_path().await?;
+ let query = if let Some(_) = self.consume_char(&QUESTION_MARK).await {
+ Some(self.query().await?)
+ } else {
+ None
+ };
+ Some(OriginForm {
+ absolute_path,
+ query,
+ })
+ }
+
+ /*
+ * RFC 7230, Page 16
+ */
+ async fn absolute_path(&mut self) -> Option<AbsolutePath> {
+ let mut segments = Vec::new();
+ self.consume_char(&SLASH).await?;
+ segments.push(self.segment().await?);
+ loop {
+ if let None = self.consume_char(&SLASH).await {
+ break;
+ }
+ if let Some(segment) = self.segment().await {
+ segments.push(segment);
+ } else {
+ return None;
+ }
+ }
+ Some(AbsolutePath {
+ segments,
+ })
+ }
+
+ /*
+ * RFC 3986, Page 23
+ */
+ async fn segment(&mut self) -> Option<Segment> {
+ let mut segment = Vec::new();
+ while self.source.peek().await.is_some() {
+ match self.consume_path_character().await {
+ Ok(character) => segment.push(character as char),
+ Err(ErrorType::Missing) => break,
+ Err(ErrorType::Malformed) => return None,
+ }
+ }
+ Some(Segment{
+ lexeme: segment.into_iter().collect(),
+ })
+ }
+
+ /*
+ * RFC 3986, Page 50
+ */
+ async fn query(&mut self) -> Option<Query> {
+ let mut query = Vec::new();
+ while self.source.peek().await.is_some() {
+ match self.consume_query_character().await {
+ Ok(character) => query.push(character as char),
+ Err(ErrorType::Missing) => break,
+ Err(ErrorType::Malformed) => return None,
+ }
+ }
+ Some(Query{
+ lexeme: query.into_iter().collect(),
+ })
+ }
+
+ /*
+ * RFC 7230, Page 14
+ */
+ async fn http_version(&mut self) -> Option<HTTPVersion> {
+ self.consume_logical_token("HTTP").await?;
+ self.consume_char(&SLASH).await?;
+ let major = Self::ascii_digit_to_value(&self.consume_digit().await?);
+ self.consume_char(&DOT).await?;
+ let minor = Self::ascii_digit_to_value(&self.consume_digit().await?);
+ Some(HTTPVersion{
+ major,
+ minor,
+ })
+ }
+
+ /*
+ * RFC 7230, Page 21
+ */
+ async fn method(&mut self) -> Option<Method> {
+ Method::from_string(&self.logical_token().await?)
+ }
+
+ /*
+ * RFC 7230, Page 27
+ */
+ async fn logical_token(&mut self) -> Option<String> {
+ let mut logical_token = Vec::new();
+ if !Self::is_logical_token_char(self.source.peek().await?) {
+ return None;
+ }
+ while self.source.peek().await.is_some() && Self::is_logical_token_char(self.source.peek().await.unwrap()) {
+ logical_token.push(self.source.next().await.unwrap() as char);
+ }
+ Some(logical_token.into_iter().collect())
+ }
+
+ async fn consume_char(&mut self, character: &u8) -> Option<u8> {
+ let next_char = self.source.peek().await?;
+ if *next_char == *character {
+ return self.source.next().await;
+ }
+ None
+ }
+
+ async fn consume_logical_token(&mut self, value: &str) -> Option<String> {
+ let logical_token = self.logical_token().await?;
+ if logical_token == value {
+ return Some(logical_token);
+ }
+ None
+ }
+
+ /*
+ * RFC 3986, Page 23
+ */
+ async fn consume_path_character(&mut self) -> Result<u8, ErrorType> {
+ match self.consume_unreserved_character().await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ match self.consume_sub_delim_character().await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ match self.consume_char(&COLON).await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ match self.consume_char(&ATSIGN).await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ self.consume_percent_encoded().await
+ }
+
+ /*
+ * RFC 3986, Page 50
+ */
+ async fn consume_query_character(&mut self) -> Result<u8, ErrorType> {
+ match self.consume_unreserved_character().await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ match self.consume_sub_delim_character().await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ match self.consume_char(&COLON).await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ match self.consume_char(&ATSIGN).await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ match self.consume_char(&SLASH).await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ match self.consume_char(&QUESTION_MARK).await {
+ Some(character) => return Ok(character),
+ _ => {}
+ }
+ self.consume_percent_encoded().await
+ }
+
+ /*
+ * RFC 5234, Page 5
+ */
+ async fn consume_carriage_return(&mut self) -> Option<()> {
+ self.consume_char(&0x0D).await?;
+ self.consume_char(&0x0A).await?;
+ Some(())
+ }
+
+ async fn consume_optional_whitespace(&mut self) {
+ loop {
+ if let None = self.consume_char(&SPACE).await {
+ if let None = self.consume_char(&HTAB).await {
+ break;
+ }
+ }
+ }
+ }
+
+ async fn consume_required_whitespace(&mut self) -> Option<()> {
+ if let None = self.consume_char(&SPACE).await {
+ if let None = self.consume_char(&HTAB).await {
+ return None;
+ }
+ }
+ loop {
+ if let None = self.consume_char(&SPACE).await {
+ if let None = self.consume_char(&HTAB).await {
+ break;
+ }
+ }
+ }
+ Some(())
+ }
+
+ /*
+ * RFC 5234, Page 14
+ */
+ async fn consume_digit(&mut self) -> Option<u8> {
+ let next_char = self.source.peek().await?;
+ if Self::is_digit_char(next_char) {
+ return self.source.next().await
+ }
+ None
+ }
+
+ async fn consume_unreserved_character(&mut self) -> Option<u8> {
+ let next_char = self.source.peek().await?;
+ if Self::is_unreserved_char(next_char) {
+ return self.source.next().await
+ }
+ None
+ }
+
+ async fn consume_sub_delim_character(&mut self) -> Option<u8> {
+ let next_char = self.source.peek().await?;
+ if Self::is_sub_delim_char(next_char) {
+ return self.source.next().await
+ }
+ None
+ }
+
+ async fn consume_percent_encoded(&mut self) -> Result<u8, ErrorType> {
+ self.consume_char(&PERCENT).await.ok_or(ErrorType::Missing)?;
+ let high_word = self.consume_hex_digit().await.ok_or(ErrorType::Malformed)?;
+ let low_word = self.consume_hex_digit().await.ok_or(ErrorType::Malformed)?;
+ Self::hex_digits_to_byte(high_word, low_word).ok_or(ErrorType::Malformed)
+ }
+
+ async fn consume_hex_digit(&mut self) -> Option<u8> {
+ let next_char = self.source.peek().await?;
+ if Self::is_hex_digit_char(next_char) {
+ return self.source.next().await
+ }
+ None
+ }
+
+ fn ascii_digit_to_value(character: &u8) -> u32 {
+ *character as u32 - 0x30
+ }
+
+ fn hex_digits_to_byte(high_word: u8, low_word: u8) -> Option<u8> {
+ u8::from_str_radix(&((high_word as char).to_string() + &(low_word as char).to_string()), 16).ok()
+ }
+
+ /*
+ * RFC 7230, Page 27
+ */
+ fn is_logical_token_char(character: &u8) -> bool {
+ *character == 0x21 || // !
+ *character == 0x23 || // #
+ *character == 0x24 || // $
+ *character == 0x25 || // %
+ *character == 0x26 || // &
+ *character == 0x27 || // '
+ *character == 0x2A || // *
+ *character == 0x2B || // +
+ *character == 0x2D || // -
+ *character == 0x2E || // .
+ *character == 0x5E || // ^
+ *character == 0x5F || // _
+ *character == 0x60 || // `
+ *character == 0x7C || // |
+ *character == 0x7E || // ~
+ Self::is_digit_char(character) ||
+ Self::is_alpha_char(character)
+ }
+
+ /*
+ * RFC 3986, Page 13
+ */
+ fn is_unreserved_char(character: &u8) -> bool {
+ Self::is_alpha_char(character) ||
+ Self::is_digit_char(character) ||
+ *character == 0x2D || // -
+ *character == 0x2E || // .
+ *character == 0x5F || // _
+ *character == 0x7E // ~
+ }
+
+ /*
+ * RFC 3986, Page 13
+ */
+ fn is_sub_delim_char(character: &u8) -> bool {
+ *character == 0x21 || // !
+ *character == 0x24 || // $
+ *character == 0x26 || // &
+ *character == 0x27 || // '
+ *character == 0x28 || // (
+ *character == 0x29 || // )
+ *character == 0x2A || // *
+ *character == 0x2B || // +
+ *character == 0x2C || // ,
+ *character == 0x3B || // ;
+ *character == 0x3D // =
+ }
+
+ /*
+ * RFC 5234, Page 13
+ */
+ fn is_alpha_char(character: &u8) -> bool {
+ (*character >= 0x41 && *character <= 0x5A) || (*character >= 0x61 && *character <= 0x7A)
+ }
+
+ /*
+ * RFC 5234, Page 14
+ */
+ fn is_digit_char(character: &u8) -> bool {
+ *character >= 0x30 && *character <= 0x39
+ }
+
+ /*
+ * RFC 5234, Page 14
+ */
+ fn is_hex_digit_char(character: &u8) -> bool {
+ Self::is_digit_char(character) ||
+ *character == 0x41 || // A
+ *character == 0x42 || // B
+ *character == 0x43 || // C
+ *character == 0x44 || // D
+ *character == 0x45 || // E
+ *character == 0x46 || // F
+ *character == 0x61 || // a
+ *character == 0x62 || // b
+ *character == 0x63 || // c
+ *character == 0x64 || // d
+ *character == 0x65 || // e
+ *character == 0x66 // f
+ }
+
+ /*
+ * RFC 5234, Page 14
+ */
+ fn is_visible_char(character: &u8) -> bool {
+ *character >= 0x21 && *character <= 0x7E
+ }
+
+ fn is_obs_text_char(character: &u8) -> bool {
+ *character >= 0x80
+ }
+}
diff --git a/src/http/rule.rs b/src/http/rule.rs
new file mode 100644
index 0000000..1672d99
--- /dev/null
+++ b/src/http/rule.rs
@@ -0,0 +1,123 @@
+use std::collections::HashMap;
+
+lazy_static! {
+ static ref METHODS: HashMap<&'static str, Method> = {
+ let mut m = HashMap::new();
+ m.insert("GET", Method::GET);
+ m.insert("HEAD", Method::HEAD);
+ m.insert("POST", Method::POST);
+ m.insert("PUT", Method::PUT);
+ m.insert("DELETE", Method::DELETE);
+ m.insert("CONNECT", Method::CONNECT);
+ m.insert("OPTIONS", Method::OPTIONS);
+ m.insert("TRACE", Method::TRACE);
+ m
+ };
+}
+
+/*
+* RFC 7230, Page 19
+*/
+#[derive(Debug)]
+pub struct HTTPMessage {
+ pub request_line: RequestLine,
+ pub header_fields: Vec<HeaderField>,
+}
+
+/*
+* RFC 7230, Page 23
+*/
+#[derive(Debug)]
+pub struct HeaderField {
+ pub name: FieldName,
+ pub value: FieldValue,
+}
+
+/*
+* RFC 7230, Page 23
+*/
+#[derive(Debug)]
+pub struct FieldName {
+ pub lexeme: String,
+}
+
+/*
+* RFC 7230, Page 23
+*/
+#[derive(Debug)]
+pub struct FieldValue {
+ pub content: Vec<u8>,
+}
+
+/*
+* RFC 7230, Page 23
+*/
+#[derive(Debug)]
+pub struct FieldContent {
+ pub first_char: u8,
+ pub second_char: Option<u8>,
+}
+
+/*
+* RFC 7230, Page 21
+*/
+#[derive(Debug)]
+pub struct RequestLine {
+ pub method: Method,
+ pub request_target: OriginForm,
+ pub http_version: HTTPVersion,
+}
+
+/*
+* RFC 7231, Page 22
+*/
+#[derive(Debug, Clone)]
+pub enum Method {
+ GET,
+ HEAD,
+ POST,
+ PUT,
+ DELETE,
+ CONNECT,
+ OPTIONS,
+ TRACE,
+}
+
+impl Method {
+ pub fn from_string(string: &str) -> Option<Method> {
+ METHODS.get(string).cloned()
+ }
+}
+
+/*
+* RFC 7230, Page 41
+*/
+#[derive(Debug)]
+pub struct OriginForm {
+ pub absolute_path: AbsolutePath,
+ pub query: Option<Query>,
+}
+
+/*
+* RFC 7230, Page 14
+*/
+#[derive(Debug)]
+pub struct HTTPVersion {
+ pub major: u32,
+ pub minor: u32,
+}
+
+#[derive(Debug)]
+pub struct AbsolutePath {
+ pub segments: Vec<Segment>,
+}
+
+#[derive(Debug)]
+pub struct Query {
+ pub lexeme: String,
+}
+
+#[derive(Debug)]
+pub struct Segment {
+ pub lexeme: String,
+}