-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.rb
164 lines (137 loc) · 3.83 KB
/
script.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
require './ncbi.rb'
require './kegg.rb'
require './kegg_enzyme.rb'
require 'yaml'
require 'logger'
require 'fileutils'
#require 'byebug'
class DownloadGenes
def log() @logger end
def initialize
@logger = Logger.new(STDOUT)
@logger.level = Logger::INFO
# load configuration
config = YAML.load_file('config.yml')
# create parent output directories
@dir_prefix = config["output"]["dir"]
FileUtils.mkdir @dir_prefix unless Dir.exists?(@dir_prefix)
if config["output"]["date_prefix"]
@dir_prefix = File.join( config["output"]["dir"], Time.now.strftime('%Y-%m-%d-%H-%M-%S.%L') + "_" + rand(1000000).to_s )
FileUtils.mkdir @dir_prefix unless Dir.exists?(@dir_prefix)
end
@kegg_dir = config["output"]["kegg"]
@kegg_enzyme_dir = config["output"]["kegg_enzyme"]
@kegg_compound_dir = config["output"]["kegg_compound"]
@ncbi_dir = config["output"]["ncbi"]
# get email
@email = config["email"]
# get array of fields to look
@search = config["search"]["ncbi"]
# read query file
read_query_file
# read enzymes file
read_enzyme_file
# read compound file
read_compound_file
end
def read_query_file
File.open "keys.txt", 'r' do |f|
@queries = f.read.split("\n")
end
@queries
end
def read_enzyme_file
File.open "enzymes.txt", 'r' do |f|
@enzymes = f.read.split("\n")
end
@enzymes
end
def read_compound_file
File.open "compounds.txt", 'r' do |f|
@compounds = f.read.split("\n")
end
@enzymes
end
def kegg_compound
kegg = KeggEnzyme.new
@compounds.each do |query|
# create results dir
dirname = File.join(@dir_prefix, @kegg_compound_dir)
Dir.mkdir dirname unless Dir.exists? (dirname)
log.info "Starting Compound query (KEGG): #{query}"
result = kegg.get_genes_from_compound(query)
File.open File.join(dirname,query + ".query"), 'w' do |fw|
#
result.each do |res|
fw.puts res
end
end
log.info "---------------"
end
end
def kegg_enzyme
kegg = KeggEnzyme.new
@enzymes.each do |query|
# create results dir
dirname = File.join(@dir_prefix, @kegg_enzyme_dir)
Dir.mkdir dirname unless Dir.exists? (dirname)
log.info "Starting Enzyme query (KEGG): #{query}"
result = kegg.get_genes_from_enzyme(query)
File.open File.join(dirname,query + ".query"), 'w' do |fw|
#
result.each do |res|
fw.puts res
end
end
log.info "---------------"
end
end
def kegg
kegg = KeggAPI.new
@queries.each do |query|
search = kegg.find_genes(query)
keys = search.response.keys
log.debug "keys: " + keys.join(", ")
# create results dir
dirname = File.join(@dir_prefix, @kegg_dir)
Dir.mkdir dirname unless Dir.exists? (dirname)
log.info "Starting Gene query (KEGG): #{query}"
result = search.download_genes()
File.open File.join(dirname,query + ".query"), 'w' do |fw|
#
result.each do |res|
fw.puts res.ntseq
end
end
log.info "---------------"
end
end
def ncbi()
ncbi = NcbiAPI.new @email
#
@queries.each do |query|
#
dirname = File.join @dir_prefix, @ncbi_dir
Dir.mkdir dirname unless Dir.exists? (dirname)
#
log.info "Starting Gene query (NCBI): #{query}"
result_list = ncbi.find(query,@search)
File.open File.join(dirname,query + ".query"), 'w' do |fw|
#
genes = result_list.download_genes
genes.each do |gene|
fw.puts gene.ntseq unless gene.ntseq.nil?
end
#
end
log.info "---------------"
end
end
end
genes = DownloadGenes.new
genes.ncbi()
genes.kegg()
genes.kegg_enzyme()
genes.kegg_compound()
#require 'pry'
#binding.pry