Skip to content

Commit

Permalink
Merge pull request #52 from athityakumar/read-write-s-methods
Browse files Browse the repository at this point in the history
Adds from, read, write, to_s methods
  • Loading branch information
athityakumar authored Aug 31, 2017
2 parents d613ada + 5e7ead9 commit 2407eee
Show file tree
Hide file tree
Showing 44 changed files with 1,215 additions and 808 deletions.
170 changes: 110 additions & 60 deletions README.md

Large diffs are not rendered by default.

41 changes: 30 additions & 11 deletions lib/daru/io/exporters/avro.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,21 @@
module Daru
module IO
module Exporters
# Avro Exporter Class, that extends `to_avro` method to `Daru::DataFrame`
# instance variables
# Avro Exporter Class, that extends `to_avro_string` and `write_avro` methods to
# `Daru::DataFrame` instance variables
class Avro < Base
Daru::DataFrame.register_io_module :to_avro, self
Daru::DataFrame.register_io_module :to_avro_string, self
Daru::DataFrame.register_io_module :write_avro, self

# Exports `Daru::DataFrame` to an Avro file.
# Initializes an Avro Exporter instance.
#
# @param dataframe [Daru::DataFrame] A dataframe to export
# @param path [String] Path of Avro file where the dataframe is to be saved
# @param schema [Avro::Schema or Hash] The schema should contain details such as `:type`,
# `:name` and `:fields`
#
# @example Writing to an Avro file
# @return A `Daru::IO::Exporter::Avro` instance
#
# @example Initializing an Avro Exporter
# schema = {
# "type" => "record",
# "name" => "User",
Expand All @@ -40,25 +42,42 @@ class Avro < Base
# # 1 Jon 100 true
# # 2 Tyrion 100 true
#
# Daru::IO::Exporters::Avro.new(df, "azorahai.avro", schema).call
def initialize(dataframe, path, schema=nil)
# instance = Daru::IO::Exporters::Avro.new(df, schema)
def initialize(dataframe, schema=nil)
optional_gem 'avro'
require 'json'

super(dataframe)
@path = path
@schema = schema
end

def call
# Exports an Avro Exporter instance to a file-writable String.
#
# @return [String] A file-writable string
#
# @example Getting a file-writable string from Avro Exporter instance
# instance.to_s
#
# #=> "Obj\u0001\u0004\u0014avro.codec\bnull\u0016avro.schema\xBC\u0002{\"type\":\"record\"..."
def to_s
super
end

# Exports an Avro Exporter instance to an avro file.
#
# @param path [String] Path of Avro file where the dataframe is to be saved
#
# @example Writing an Avro Exporter instance to an Avro file
# instance.write('azor_ahai.avro')
def write(path)
@schema_obj = process_schema
@writer = ::Avro::IO::DatumWriter.new(@schema_obj)
@buffer = StringIO.new
@writer = ::Avro::DataFile::Writer.new(@buffer, @writer, @schema_obj)
@dataframe.each_row { |row| @writer << row.to_h }
@writer.close

File.open(@path, 'w') { |file| file.write(@buffer.string) }
File.open(path, 'w') { |file| file.write(@buffer.string) }
end

private
Expand Down
16 changes: 16 additions & 0 deletions lib/daru/io/exporters/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,22 @@ def initialize(dataframe)
end
@dataframe = dataframe
end

# Exports an Exporter instance to a file-writable String.
#
# @return A file-writable `String`
#
# @example Getting a file-writable string from Avro Exporter instance
#
# instance = Daru::IO::Exporters::Format.new(opts)
# instance.to_s #! same as df.to_format_string(opts)
def to_s
tempfile = Tempfile.new('tempfile')
path = tempfile.path
write(path)

File.read(path)
end
end
end
end
Expand Down
52 changes: 32 additions & 20 deletions lib/daru/io/exporters/csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
module Daru
module IO
module Exporters
# CSV Exporter Class, that extends `to_csv` method to `Daru::DataFrame`
# instance variables
# CSV Exporter Class, that extends `to_csv_string` and `write_csv` methods to
# `Daru::DataFrame` instance variables
class CSV < Base
Daru::DataFrame.register_io_module :to_csv, self
Daru::DataFrame.register_io_module :to_csv_string, self
Daru::DataFrame.register_io_module :write_csv, self

# Exports `Daru::DataFrame` to a .csv / .csv.gz file.
# Initializes a CSV Exporter instance
#
# @param dataframe [Daru::DataFrame] A dataframe to export
# @param path [String] Path of CSV file where the dataframe is to be saved
# @param converters [Symbol] A type to convert the data in dataframe
# @param compression [Symbol] Defaults to `:infer`, which decides depending on file format
# like `.csv.gz`. For explicitly writing into a `.csv.gz` file, set
Expand All @@ -23,38 +23,50 @@ class CSV < Base
# @param options [Hash] CSV standard library options, to tweak other
# default options of CSV gem.
#
# @example Writing to a CSV file without options
# @example Initializing a CSV Exporter Instance
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
#
# #=> #<Daru::DataFrame(2x2)>
# # a b
# # 0 1 3
# # 1 2 4
#
# Daru::IO::Exporters::CSV.new(df, "filename.csv").call
#
# @example Writing to a CSV file with options
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
#
# #=> #<Daru::DataFrame(2x2)>
# # a b
# # 0 1 3
# # 1 2 4
#
# Daru::IO::Exporters::CSV.new(df, "filename.csv", convert_comma: true).call
def initialize(dataframe, path, converters: :numeric, compression: :infer,
# csv_instance = Daru::IO::Exporters::CSV.new(df, col_sep: ' ')
# csv_gz_instance = Daru::IO::Exporters::CSV.new(df, col_sep: ' ', compression: :gzip)
def initialize(dataframe, converters: :numeric, compression: :infer,
headers: nil, convert_comma: nil, **options)
require 'csv'

super(dataframe)
@path = path
@headers = headers
@compression = compression
@convert_comma = convert_comma
@options = options.merge converters: converters
end

def call
# Exports a CSV Exporter instance to a file-writable String.
#
# @return [String] A file-writable string
#
# @example Getting a file-writable string from CSV Exporter instance
# csv_instance.to_s
# #=> "a b\n1 3\n2 4\n"
#
# csv_gz_instance.to_s
# #=> "\u001F\x8B\b\u0000*D\xA4Y\u0000\u0003KTH\xE22T0\xE62R0\xE1\u0002\u0000\xF2\\\x96y\..."
def to_s
super
end

# Exports an Avro Exporter instance to a csv / csv.gz file.
#
# @param path [String] Path of the csv / csv.gz file where the dataframe is to be saved
#
# @example Writing an Avro Exporter instance to an Avro file
# csv_instance.write('filename.csv')
# csv_gz_instance.write('filename.csv.gz')
def write(path)
@path = path
contents = process_dataframe

if compression?(:gzip, '.csv.gz')
Expand Down
81 changes: 39 additions & 42 deletions lib/daru/io/exporters/excel.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
module Daru
module IO
module Exporters
# Excel Exporter Class, that extends `to_excel` method to `Daru::DataFrame`
# instance variables
# Excel Exporter Class, that extends `to_excel_string` and `write_excel` methods to
# `Daru::DataFrame` instance variables
class Excel < Base
Daru::DataFrame.register_io_module :to_excel, self
Daru::DataFrame.register_io_module :to_excel_string, self
Daru::DataFrame.register_io_module :write_excel, self

# Exports `Daru::DataFrame` to an Excel Spreadsheet.
# Initializes an Excel Exporter instance.
#
# @note For giving formatting options as hashes to the `:data`, `:index` or `header`
# keyword argument(s), please have a look at the
Expand All @@ -17,9 +18,8 @@ class Excel < Base
# {http://www.rubydoc.info/gems/ruby-spreadsheet/Spreadsheet/Format Spreadsheet::Format}
# pages.
#
# @param dataframe [Daru::DataFrame] A dataframe to export
# @param path [String] Path of the file where the `Daru::DataFrame`
# should be written.
# @param dataframe [Daru::DataFrame] A dataframe to export. Supports even dataframes
# with multi-index.
# @param header [Hash or Boolean] Defaults to true. When set to false or nil,
# headers are not written. When given a hash of formatting options,
# headers are written with the specific formatting. When set to true,
Expand All @@ -33,56 +33,53 @@ class Excel < Base
# index values are written with the specific formatting. When set to true,
# index values are written without any formatting.
#
# @example Writing to an Excel file without options
# @example Initializing an Excel Exporter instance
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
#
# #=> #<Daru::DataFrame(2x2)>
# # a b
# # 0 1 3
# # 1 2 4
#
# Daru::IO::Exporters::Excel.new(df, "dataframe_df.xls").call
#
# @example Writing to an Excel file with formatting options
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
#
# #=> #<Daru::DataFrame(2x2)>
# # a b
# # 0 1 3
# # 1 2 4
#
# Daru::IO::Exporters::Excel.new(df,
# "dataframe_df.xls",
# header: { color: :red, weight: :bold },
# index: false,
# data: { color: :blue }
# ).call
#
# @example Writing a DataFrame with Multi-Index to an Excel file
# df = Daru::DataFrame.new [[1,2],[3,4]], order: [:x, :y], index: [[:a, :b, :c], [:d, :e, :f]]
#
# #=> #<Daru::DataFrame(2x2)>
# # x y
# # a b c 1 3
# # d e f 2 4
#
# Daru::IO::Exporters::Excel.new(df,
# "dataframe_df.xls",
# simple_instance = Daru::IO::Exporters::Excel.new(df)
# formatted_instance = Daru::IO::Exporters::Excel.new(
# df,
# header: { color: :red, weight: :bold },
# index: { color: :green },
# data: { color: :blue }
# ).call
def initialize(dataframe, path, header: true, data: true, index: true)
# index: false,
# data: { color: :blue }
# )
def initialize(dataframe, header: true, data: true, index: true)
optional_gem 'spreadsheet', '~> 1.1.1'

super(dataframe)
@path = path
@data = data
@index = index
@header = header
end

def call
# Exports an Excel Exporter instance to a file-writable String.
#
# @return [String] A file-writable string
#
# @example Getting a file-writable string from Excel Exporter instance
# simple_instance.to_s #! same as df.to_avro_string(schema)
#
# #=> "\xD0\xCF\u0011\u0871\u001A\xE1\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000..."
#
# formatted_instance.to_s
#
# #=> "\xD0\xCF\u0011\u0871\u001A\xE1\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000..."
def to_s
super
end

# Exports an Excel Exporter instance to an xls file.
#
# @param path [String] Path of excel file where the dataframe is to be saved
#
# @example Writing an Excel Exporter instance to an xls file
# instance.write('filename.xls')
def write(path)
@book = Spreadsheet::Workbook.new
@sheet = @book.create_worksheet

Expand All @@ -94,7 +91,7 @@ def call
write_data(row, r+@row_offset)
end

@book.write(@path)
@book.write(path)
end

private
Expand Down
Loading

0 comments on commit 2407eee

Please sign in to comment.