Skip to content
Snippets Groups Projects
Commit 3febd577 authored by ArtOfCode-'s avatar ArtOfCode-
Browse files

In the middle of this but other things are More Important

parent de3a57df
Branches
Tags
No related merge requests found
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
File added
class APIImport
def initialize(options)
@options = options
@filters = {
posts: '!)4k)qh2Ywk(NPgBg204EA_3YzAND'
}
end
def request(uri, params)
params = {
key: @options.key
}.merge(params)
full_uri = URI.parse(uri)
full_uri.query = params.map { |k, v| "#{k}=#{v}" }.join('&')
resp = Net::HTTP.get_response(full_uri)
unless resp.code.start_with? '2'
$logger.error "#{resp.code} GET #{full_uri.to_s}:"
$logger.error resp.body
end
data = JSON.parse(resp.body)
if data['backoff']
end
# [
# [ 0] "id",x
# [ 1] "post_type_id",x
# [ 2] "accepted_answer_id",
# [ 3] "creation_date",x
# [ 4] "score",x
# [ 5] "view_count",
# [ 6] "body",x
# [ 7] "owner_user_id",x
# [ 8] "last_editor_user_id",x
# [ 9] "last_edit_date",x
# [10] "last_activity_date",x
# [11] "title",x
# [12] "tags",
# [13] "answer_count",
# [14] "comment_count"x
# ]
def posts(ids)
end
end
\ No newline at end of file
......@@ -80,6 +80,9 @@ class DumpImport
rows = document.css("#{data_type.downcase} row").to_a
rows = rows.map { |r| r.attributes.map { |n, a| [n.underscore, a.content] }.to_h }
# Allow calling code to add and filter rows before we dump to file.
rows = block_given? ? yield(rows) : rows
progress = ProgressBar.create(title: "#{data_type} (#{rows.size})", total: rows.size, progress_mark: '█')
builder = Nokogiri::XML::Builder.new do |xml|
......
......@@ -3,6 +3,7 @@ require 'optparse'
require 'open-uri'
require 'csv'
require_relative 'api_import'
require_relative 'dump_import'
require_relative 'database_import'
......@@ -43,14 +44,6 @@ def domain_from_api_param(api_param)
end
end
ERROR_CODES = {
no_site: 1,
undefined_mode: 2,
invalid_specifier: 3,
invalid_query_format: 4,
no_query: 5
}
@options = OpenStruct.new
opt_parser = OptionParser.new do |opts|
opts.banner = "Usage: rails r stack_import.rb [options]"
......@@ -102,19 +95,13 @@ opt_parser = OptionParser.new do |opts|
end
opt_parser.parse!
unless @options.site.present?
$logger.fatal 'Site must be specified'
exit ERROR_CODES[:no_site]
end
require = [:query, :path, :community, :category, :mode, :tag_set]
unless @options.query.present?
$logger.fatal 'Query revision ID must be specified'
exit ERROR_CODES[:no_query]
require.each do |r|
unless @options[r].present?
$logger.fatal "#{r.to_s} must be provided. Use --help for a list of parameters."
exit 1
end
unless @options.key.present?
$logger.warn 'No key specified. Can run without one, but only for a limited run. Large imports will require a key ' \
'for added quota.'
end
RequestContext.community = Community.find(@options.community)
......@@ -129,8 +116,24 @@ if @options.mode == 'full' || @options.mode == 'process'
domain = domain_from_api_param(@options.site)
query_response = Net::HTTP.get_response(URI("https://data.stackexchange.com/#{@options.site}/csv/#{@options.query}"))
query_results = CSV.parse(query_response.body)
required_ids = query_results.map { |r| r[0].to_s }
api_importer = APIImport.new @options
users, users_file = DumpImport.do_xml_transform(domain, 'Users', @options)
posts, posts_file = DumpImport.do_xml_transform(domain, 'Posts', @options)
posts, posts_file = DumpImport.do_xml_transform(domain, 'Posts', @options) do |rows|
ids = rows.map { |r| r['id'].to_s }
missing = required_ids.select { |e| !ids.include? e }
excess = ids.select { |e| !required_ids.include? e }
$logger.info "#{ids.size} rows in dump, #{missing.size} to get from API, #{excess.size} excess"
rows = rows.select { |r| !excess.include? r['id'].to_s }
rows = rows.concat(api_importer.posts(missing) || [])
rows
end
tags_file = DumpImport.generate_tags(posts, @options)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment