ruby - How do I combine this Hash to a single JSON object? -
i'm using next code generate json file containing category info particular website.
the goal have json file next format:
[ { "id":"36_17", "name":"diversen particulier", "group":"diversen", "search_attributes":{ "0":"prijs van/tot", "1":"groep en rubriek", "2":"conditie", } }, { "id":"36_18", "name":"diversen zakelijk", "group":"diversen", "search_attributes":{ "0":"prijs van/tot", "1":"groep en rubriek", "2":"conditie", } }, { "id":"36_19", "name":"overige diversen", "group":"diversen", "search_attributes":{ "0":"prijs van/tot", "1":"groep en rubriek", "2":"conditie", } }, {...} ] but maintain getting format:
[ { "id":"36_17", "name":"diversen particulier", "group":"diversen", "search_attributes":{"0":"prijs van/tot"} }, { "id":"36_17", "name":"diversen particulier", "group":"diversen", "search_attributes":{"1":"groep en rubriek"} }, { "id":"36_17", "name":"diversen particulier", "group":"diversen", "search_attributes":{"2":"conditie"} }, {...} ] the search_attributes not getting saved correctly.
i'm using next code:
require 'mechanize' @hashes = [] # initialize mechanize object = mechanize.new # begin scraping a.get('http://www.marktplaats.nl/') |page| groups = page.search('//*[(@id = "navigation-categories")]//a') groups.each_with_index |group, index_1| a.get(group[:href]) |page_2| categories = page_2.search('//*[(@id = "category-browser")]//a') categories.each_with_index |category, index_2| a.get(category[:href]) |page_3| search_attributes = page_3.search('//*[contains(concat( " ", @class, " " ), concat( " ", "heading", " " ))]') search_attributes.each_with_index |attribute, index_3| item = { id: "#{index_1}_#{index_2}", name: category.text, group: group.text, :search_attributes => { :index_3.to_s => "#{attribute.text unless attribute.text == 'outlet '}" } } @hashes << item puts item end end end end end end # open file , begin file.open("json/light/#{time.now.strftime '%y%m%d%h%m%s'}_light_categories.json", 'w') |f| puts '# writing category info json file' f.write(@hashes.to_json) puts "|-----------> done. #{@hashes.length} written." end puts '# finished.' the question what's causing , how solve it?
updatea big arie-shaw answer.
here's working code:
require 'mechanize' @hashes = [] # initialize mechanize object = mechanize.new # begin scraping a.get('http://www.marktplaats.nl/') |page| groups = page.search('//*[(@id = "navigation-categories")]//a') groups.each_with_index |group, index_1| a.get(group[:href]) |page_2| categories = page_2.search('//*[(@id = "category-browser")]//a') categories.each_with_index |category, index_2| a.get(category[:href]) |page_3| search_attributes = page_3.search('//*[contains(concat( " ", @class, " " ), concat( " ", "heading", " " ))]') attributes_hash = {} search_attributes.each_with_index |attribute, index_3| attributes_hash[index_3.to_s] = "#{attribute.text unless attribute.text == 'outlet '}" end item = { id: "#{index_1}.#{index_2}", name: category.text, group: group.text, :search_attributes => attributes_hash } @hashes << item puts item end end end end end # open file , begin file.open("json/light/#{time.now.strftime '%y%m%d%h%m%s'}_light_categories.json", 'w') |f| puts '# writing category info json file' f.write(@hashes.to_json) puts "|-----------> done. #{@hashes.length} written." end puts '# finished.'
the inner each_with_index should used generate search_attributes hash, rather element hash of top level array in result.
# begin scraping a.get('http://www.marktplaats.nl/') |page| groups = page.search('//*[(@id = "navigation-categories")]//a') groups.each_with_index |group, index_1| a.get(group[:href]) |page_2| categories = page_2.search('//*[(@id = "category-browser")]//a') categories.each_with_index |category, index_2| a.get(category[:href]) |page_3| search_attributes = page_3.search('//*[contains(concat( " ", @class, " " ), concat( " ", "heading", " " ))]') attributes_hash = {} search_attributes.each_with_index |attribute, index_3| attributes_hash[index_3.to_s] = "#{attribute.text unless attribute.text == 'outlet '}" end @hashes << { id: "#{index_1}_#{index_2}", name: category.text, group: group.text, search_attributes: attributes_hash } end end end end end ruby json hash mechanize
No comments:
Post a Comment