You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
rubygem-faraday/SOURCES/parser.rb

247 lines
7.6 KiB

module MultipartParser
# A low level parser for multipart messages,
# based on the node-formidable parser.
class Parser
def initialize
@boundary = nil
@boundary_chars = nil
@lookbehind = nil
@state = :parser_uninitialized
@index = 0 # Index into boundary or header
@flags = {}
@marks = {} # Keep track of different parts
@callbacks = {}
end
# Initializes the parser, using the given boundary
def init_with_boundary(boundary)
@boundary = "\r\n--" + boundary
@lookbehind = "\0"*(@boundary.length + 8)
@state = :start
@boundary_chars = {}
@boundary.each_byte do |b|
@boundary_chars[b.chr] = true
end
end
# Registers a callback to be called when the
# given event occurs. Each callback is expected to
# take three parameters: buffer, start_index, and end_index.
# All of these parameters may be null, depending on the callback.
# Valid callbacks are:
# :end
# :header_field
# :header_value
# :header_end
# :headers_end
# :part_begin
# :part_data
# :part_end
def on(event, &callback)
@callbacks[event] = callback
end
# Writes data to the parser.
# Returns the number of bytes parsed.
# In practise, this means that if the return value
# is less than the buffer length, a parse error occured.
def write(buffer)
i = 0
buffer_length = buffer.length
index = @index
flags = @flags.dup
state = @state
lookbehind = @lookbehind
boundary = @boundary
boundary_chars = @boundary_chars
boundary_length = @boundary.length
boundary_end = boundary_length - 1
while i < buffer_length
c = buffer[i, 1]
case state
when :parser_uninitialized
return i;
when :start
index = 0;
state = :start_boundary
when :start_boundary # Differs in that it has no preceeding \r\n
if index == boundary_length - 2
return i unless c == "\r"
index += 1
elsif index - 1 == boundary_length - 2
return i unless c == "\n"
# Boundary read successfully, begin next part
callback(:part_begin)
state = :header_field_start
else
return i unless c == boundary[index+2, 1] # Unexpected character
index += 1
end
i += 1
when :header_field_start
state = :header_field
@marks[:header_field] = i
index = 0
when :header_field
if c == "\r"
@marks.delete :header_field
state = :headers_almost_done
else
index += 1
unless c == "-" # Skip hyphens
if c == ":"
return i if index == 1 # Empty header field
data_callback(:header_field, buffer, i, :clear => true)
state = :header_value_start
else
cl = c.downcase
return i if cl < "a" || cl > "z"
end
end
end
i += 1
when :header_value_start
if c == " " # Skip spaces
i += 1
else
@marks[:header_value] = i
state = :header_value
end
when :header_value
if c == "\r"
data_callback(:header_value, buffer, i, :clear => true)
callback(:header_end)
state = :header_value_almost_done
end
i += 1
when :header_value_almost_done
return i unless c == "\n"
state = :header_field_start
i += 1
when :headers_almost_done
return i unless c == "\n"
callback(:headers_end)
state = :part_data_start
i += 1
when :part_data_start
state = :part_data
@marks[:part_data] = i
when :part_data
prev_index = index
if index == 0
# Boyer-Moore derived algorithm to safely skip non-boundary data
# See http://debuggable.com/posts/parsing-file-uploads-at-500-
# mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb
while i + boundary_length <= buffer_length
break if boundary_chars.has_key? buffer[i + boundary_end].chr
i += boundary_length
end
c = buffer[i, 1]
end
if index < boundary_length
if boundary[index, 1] == c
if index == 0
data_callback(:part_data, buffer, i, :clear => true)
end
index += 1
else # It was not the boundary we found, after all
index = 0
end
elsif index == boundary_length
index += 1
if c == "\r"
flags[:part_boundary] = true
elsif c == "-"
flags[:last_boundary] = true
else # We did not find a boundary after all
index = 0
end
elsif index - 1 == boundary_length
if flags[:part_boundary]
index = 0
if c == "\n"
flags.delete :part_boundary
callback(:part_end)
callback(:part_begin)
state = :header_field_start
i += 1
next # Ugly way to break out of the case statement
end
elsif flags[:last_boundary]
if c == "-"
callback(:part_end)
callback(:end)
state = :end
else
index = 0 # False alarm
end
else
index = 0
end
end
if index > 0
# When matching a possible boundary, keep a lookbehind
# reference in case it turns out to be a false lead
lookbehind[index-1] = c
elsif prev_index > 0
# If our boundary turns out to be rubbish,
# the captured lookbehind belongs to part_data
callback(:part_data, lookbehind, 0, prev_index)
@marks[:part_data] = i
# Reconsider the current character as it might be the
# beginning of a new sequence.
i -= 1
end
i += 1
when :end
i += 1
else
return i;
end
end
data_callback(:header_field, buffer, buffer_length)
data_callback(:header_value, buffer, buffer_length)
data_callback(:part_data, buffer, buffer_length)
@index = index
@state = state
@flags = flags
return buffer_length
end
private
# Issues a callback.
def callback(event, buffer = nil, start = nil, the_end = nil)
return if !start.nil? && start == the_end
if @callbacks.has_key? event
@callbacks[event].call(buffer, start, the_end)
end
end
# Issues a data callback,
# The only valid options is :clear,
# which, if true, will reset the appropriate mark to 0,
# If not specified, the mark will be removed.
def data_callback(data_type, buffer, the_end, options = {})
return unless @marks.has_key? data_type
callback(data_type, buffer, @marks[data_type], the_end)
unless options[:clear]
@marks[data_type] = 0
else
@marks.delete data_type
end
end
end
end