Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds from, read, write, to_s methods #52

Merged
merged 15 commits into from
Aug 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 110 additions & 60 deletions README.md

Large diffs are not rendered by default.

41 changes: 30 additions & 11 deletions lib/daru/io/exporters/avro.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,21 @@
module Daru
module IO
module Exporters
# Avro Exporter Class, that extends `to_avro` method to `Daru::DataFrame`
# instance variables
# Avro Exporter Class, that extends `to_avro_string` and `write_avro` methods to
# `Daru::DataFrame` instance variables
class Avro < Base
Daru::DataFrame.register_io_module :to_avro, self
Daru::DataFrame.register_io_module :to_avro_string, self
Daru::DataFrame.register_io_module :write_avro, self

# Exports `Daru::DataFrame` to an Avro file.
# Initializes an Avro Exporter instance.
#
# @param dataframe [Daru::DataFrame] A dataframe to export
# @param path [String] Path of Avro file where the dataframe is to be saved
# @param schema [Avro::Schema or Hash] The schema should contain details such as `:type`,
# `:name` and `:fields`
#
# @example Writing to an Avro file
# @return A `Daru::IO::Exporter::Avro` instance
#
# @example Initializing an Avro Exporter
# schema = {
# "type" => "record",
# "name" => "User",
Expand All @@ -40,25 +42,42 @@ class Avro < Base
# # 1 Jon 100 true
# # 2 Tyrion 100 true
#
# Daru::IO::Exporters::Avro.new(df, "azorahai.avro", schema).call
def initialize(dataframe, path, schema=nil)
# instance = Daru::IO::Exporters::Avro.new(df, schema)
def initialize(dataframe, schema=nil)
optional_gem 'avro'
require 'json'

super(dataframe)
@path = path
@schema = schema
end

def call
# Exports an Avro Exporter instance to a file-writable String.
#
# @return [String] A file-writable string
#
# @example Getting a file-writable string from Avro Exporter instance
# instance.to_s
#
# #=> "Obj\u0001\u0004\u0014avro.codec\bnull\u0016avro.schema\xBC\u0002{\"type\":\"record\"..."
def to_s
super
end

# Exports an Avro Exporter instance to an avro file.
#
# @param path [String] Path of Avro file where the dataframe is to be saved
#
# @example Writing an Avro Exporter instance to an Avro file
# instance.write('azor_ahai.avro')
def write(path)
@schema_obj = process_schema
@writer = ::Avro::IO::DatumWriter.new(@schema_obj)
@buffer = StringIO.new
@writer = ::Avro::DataFile::Writer.new(@buffer, @writer, @schema_obj)
@dataframe.each_row { |row| @writer << row.to_h }
@writer.close

File.open(@path, 'w') { |file| file.write(@buffer.string) }
File.open(path, 'w') { |file| file.write(@buffer.string) }
end

private
Expand Down
16 changes: 16 additions & 0 deletions lib/daru/io/exporters/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,22 @@ def initialize(dataframe)
end
@dataframe = dataframe
end

# Exports an Exporter instance to a file-writable String.
#
# @return A file-writable `String`
#
# @example Getting a file-writable string from Avro Exporter instance
#
# instance = Daru::IO::Exporters::Format.new(opts)
# instance.to_s #! same as df.to_format_string(opts)
def to_s
tempfile = Tempfile.new('tempfile')
path = tempfile.path
write(path)

File.read(path)
end
end
end
end
Expand Down
52 changes: 32 additions & 20 deletions lib/daru/io/exporters/csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
module Daru
module IO
module Exporters
# CSV Exporter Class, that extends `to_csv` method to `Daru::DataFrame`
# instance variables
# CSV Exporter Class, that extends `to_csv_string` and `write_csv` methods to
# `Daru::DataFrame` instance variables
class CSV < Base
Daru::DataFrame.register_io_module :to_csv, self
Daru::DataFrame.register_io_module :to_csv_string, self
Daru::DataFrame.register_io_module :write_csv, self

# Exports `Daru::DataFrame` to a .csv / .csv.gz file.
# Initializes a CSV Exporter instance
#
# @param dataframe [Daru::DataFrame] A dataframe to export
# @param path [String] Path of CSV file where the dataframe is to be saved
# @param converters [Symbol] A type to convert the data in dataframe
# @param compression [Symbol] Defaults to `:infer`, which decides depending on file format
# like `.csv.gz`. For explicitly writing into a `.csv.gz` file, set
Expand All @@ -23,38 +23,50 @@ class CSV < Base
# @param options [Hash] CSV standard library options, to tweak other
# default options of CSV gem.
#
# @example Writing to a CSV file without options
# @example Initializing a CSV Exporter Instance
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
#
# #=> #<Daru::DataFrame(2x2)>
# # a b
# # 0 1 3
# # 1 2 4
#
# Daru::IO::Exporters::CSV.new(df, "filename.csv").call
#
# @example Writing to a CSV file with options
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
#
# #=> #<Daru::DataFrame(2x2)>
# # a b
# # 0 1 3
# # 1 2 4
#
# Daru::IO::Exporters::CSV.new(df, "filename.csv", convert_comma: true).call
def initialize(dataframe, path, converters: :numeric, compression: :infer,
# csv_instance = Daru::IO::Exporters::CSV.new(df, col_sep: ' ')
# csv_gz_instance = Daru::IO::Exporters::CSV.new(df, col_sep: ' ', compression: :gzip)
def initialize(dataframe, converters: :numeric, compression: :infer,
headers: nil, convert_comma: nil, **options)
require 'csv'

super(dataframe)
@path = path
@headers = headers
@compression = compression
@convert_comma = convert_comma
@options = options.merge converters: converters
end

def call
# Exports a CSV Exporter instance to a file-writable String.
#
# @return [String] A file-writable string
#
# @example Getting a file-writable string from CSV Exporter instance
# csv_instance.to_s
# #=> "a b\n1 3\n2 4\n"
#
# csv_gz_instance.to_s
# #=> "\u001F\x8B\b\u0000*D\xA4Y\u0000\u0003KTH\xE22T0\xE62R0\xE1\u0002\u0000\xF2\\\x96y\..."
def to_s
super
end

# Exports an Avro Exporter instance to a csv / csv.gz file.
#
# @param path [String] Path of the csv / csv.gz file where the dataframe is to be saved
#
# @example Writing an Avro Exporter instance to an Avro file
# csv_instance.write('filename.csv')
# csv_gz_instance.write('filename.csv.gz')
def write(path)
@path = path
contents = process_dataframe

if compression?(:gzip, '.csv.gz')
Expand Down
81 changes: 39 additions & 42 deletions lib/daru/io/exporters/excel.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
module Daru
module IO
module Exporters
# Excel Exporter Class, that extends `to_excel` method to `Daru::DataFrame`
# instance variables
# Excel Exporter Class, that extends `to_excel_string` and `write_excel` methods to
# `Daru::DataFrame` instance variables
class Excel < Base
Daru::DataFrame.register_io_module :to_excel, self
Daru::DataFrame.register_io_module :to_excel_string, self
Daru::DataFrame.register_io_module :write_excel, self

# Exports `Daru::DataFrame` to an Excel Spreadsheet.
# Initializes an Excel Exporter instance.
#
# @note For giving formatting options as hashes to the `:data`, `:index` or `header`
# keyword argument(s), please have a look at the
Expand All @@ -17,9 +18,8 @@ class Excel < Base
# {http://www.rubydoc.info/gems/ruby-spreadsheet/Spreadsheet/Format Spreadsheet::Format}
# pages.
#
# @param dataframe [Daru::DataFrame] A dataframe to export
# @param path [String] Path of the file where the `Daru::DataFrame`
# should be written.
# @param dataframe [Daru::DataFrame] A dataframe to export. Supports even dataframes
# with multi-index.
# @param header [Hash or Boolean] Defaults to true. When set to false or nil,
# headers are not written. When given a hash of formatting options,
# headers are written with the specific formatting. When set to true,
Expand All @@ -33,56 +33,53 @@ class Excel < Base
# index values are written with the specific formatting. When set to true,
# index values are written without any formatting.
#
# @example Writing to an Excel file without options
# @example Initializing an Excel Exporter instance
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
#
# #=> #<Daru::DataFrame(2x2)>
# # a b
# # 0 1 3
# # 1 2 4
#
# Daru::IO::Exporters::Excel.new(df, "dataframe_df.xls").call
#
# @example Writing to an Excel file with formatting options
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
#
# #=> #<Daru::DataFrame(2x2)>
# # a b
# # 0 1 3
# # 1 2 4
#
# Daru::IO::Exporters::Excel.new(df,
# "dataframe_df.xls",
# header: { color: :red, weight: :bold },
# index: false,
# data: { color: :blue }
# ).call
#
# @example Writing a DataFrame with Multi-Index to an Excel file
# df = Daru::DataFrame.new [[1,2],[3,4]], order: [:x, :y], index: [[:a, :b, :c], [:d, :e, :f]]
#
# #=> #<Daru::DataFrame(2x2)>
# # x y
# # a b c 1 3
# # d e f 2 4
#
# Daru::IO::Exporters::Excel.new(df,
# "dataframe_df.xls",
# simple_instance = Daru::IO::Exporters::Excel.new(df)
# formatted_instance = Daru::IO::Exporters::Excel.new(
# df,
# header: { color: :red, weight: :bold },
# index: { color: :green },
# data: { color: :blue }
# ).call
def initialize(dataframe, path, header: true, data: true, index: true)
# index: false,
# data: { color: :blue }
# )
def initialize(dataframe, header: true, data: true, index: true)
optional_gem 'spreadsheet', '~> 1.1.1'

super(dataframe)
@path = path
@data = data
@index = index
@header = header
end

def call
# Exports an Excel Exporter instance to a file-writable String.
#
# @return [String] A file-writable string
#
# @example Getting a file-writable string from Excel Exporter instance
# simple_instance.to_s #! same as df.to_avro_string(schema)
#
# #=> "\xD0\xCF\u0011\u0871\u001A\xE1\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000..."
#
# formatted_instance.to_s
#
# #=> "\xD0\xCF\u0011\u0871\u001A\xE1\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000..."
def to_s
super
end

# Exports an Excel Exporter instance to an xls file.
#
# @param path [String] Path of excel file where the dataframe is to be saved
#
# @example Writing an Excel Exporter instance to an xls file
# instance.write('filename.xls')
def write(path)
@book = Spreadsheet::Workbook.new
@sheet = @book.create_worksheet

Expand All @@ -94,7 +91,7 @@ def call
write_data(row, r+@row_offset)
end

@book.write(@path)
@book.write(path)
end

private
Expand Down
Loading