Skip to content

Commit 2a353a6

Browse files
committed
Add ability to read requester pays buckets
Also add profile flags to dictate which credentials are used for requester pays.
1 parent 850bc27 commit 2a353a6

File tree

6 files changed

+75
-51
lines changed

6 files changed

+75
-51
lines changed

awsutil/aws.go

+18-28
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@ import (
2626
"syscall"
2727
"time"
2828

29+
"github.com/aws/aws-sdk-go/aws/credentials"
30+
2931
"github.com/aws/aws-sdk-go/aws"
3032
"github.com/aws/aws-sdk-go/aws/session"
3133
"github.com/aws/aws-sdk-go/service/s3"
3234
"github.com/jacobsa/fuse"
33-
"github.com/mattrbianchi/twig"
3435
"github.com/pkg/errors"
3536
)
3637

@@ -82,42 +83,41 @@ func GetObjectRange(url, byteRange string) (*http.Response, error) {
8283
return nil, err
8384
}
8485
if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK {
85-
twig.Debugf("status code: %d\n", resp.StatusCode)
8686
return nil, parseHTTPError(resp.StatusCode)
8787
}
8888
return resp, nil
8989
}
9090

9191
type Client struct {
92-
Bucket string
93-
Key string
94-
Region string
92+
Bucket string
93+
Key string
94+
Region string
95+
Profile string
9596
}
9697

97-
func NewClient(bucket, key, region string) Client {
98+
func NewClient(bucket, key, region, profile string) Client {
9899
return Client{
99-
Bucket: bucket,
100-
Key: key,
101-
Region: region,
100+
Bucket: bucket,
101+
Key: key,
102+
Region: region,
103+
Profile: profile,
102104
}
103105
}
104106

105107
func (c Client) GetObjectRange(byteRange string) (io.ReadCloser, error) {
106108
cfg := (&aws.Config{
107-
Region: aws.String(c.Region),
109+
Credentials: credentials.NewSharedCredentials("", c.Profile),
110+
Region: aws.String(c.Region),
108111
}).WithHTTPClient(newHTTPClient())
109112
sess := session.New(cfg)
110113
svc := s3.New(sess)
111114
input := &s3.GetObjectInput{
112-
Bucket: aws.String(c.Bucket),
113-
Key: aws.String(c.Key),
114-
Range: aws.String(byteRange),
115+
Bucket: aws.String(c.Bucket),
116+
Key: aws.String(c.Key),
117+
Range: aws.String(byteRange),
118+
RequestPayer: aws.String("requester"),
115119
}
116120
obj, err := svc.GetObject(input)
117-
if err != nil {
118-
twig.Debug("error from GetObject")
119-
return nil, err
120-
}
121121
return obj.Body, err
122122
}
123123

@@ -140,11 +140,8 @@ func ReadFile(path string) ([]byte, error) {
140140
return nil, errors.Errorf("url did not point to a valid amazon s3 location or follow the virtual-hosted style of https://[bucket].[region].s3.amazonaws.com/[file]: %s", path)
141141
}
142142
bucket := sections[0]
143-
twig.Debugf("bucket: %s", bucket)
144143
region := sections[1]
145-
twig.Debugf("region: %s", region)
146144
file := u.Path
147-
twig.Debugf("file: %s", file)
148145
cfg := (&aws.Config{
149146
Region: &region,
150147
}).WithHTTPClient(newHTTPClient())
@@ -156,7 +153,6 @@ func ReadFile(path string) ([]byte, error) {
156153
}
157154
obj, err := svc.GetObject(input)
158155
if err != nil {
159-
twig.Debug("error from GetObject")
160156
return nil, err
161157
}
162158
bytes, err := ioutil.ReadAll(obj.Body)
@@ -276,23 +272,17 @@ func newHTTPClient() *http.Client {
276272
func parseHTTPError(code int) error {
277273
switch code {
278274
case 400:
279-
twig.Debug("converting to EINVAL")
280275
return fuse.EINVAL
281276
case 403:
282-
twig.Debug("converting to EACCES")
283277
return syscall.EACCES
284278
case 404:
285-
twig.Debug("converting to ENOENT")
286279
return fuse.ENOENT
287280
case 405:
288-
twig.Debug("converting to ENOTSUP")
289281
return syscall.ENOTSUP
290282
case 500:
291-
twig.Debug("converting to EAGAIN")
292283
return syscall.EAGAIN
293284
default:
294-
// TODO: log this and re-evaluate whether this is a good move.
295-
twig.Debug("converting to EOF")
285+
// TODO: re-evaluate whether this is a good move.
296286
return io.EOF
297287
}
298288
}

cmd/mount.go

+32-7
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,12 @@ var (
4141
ngcpath string
4242
filetype string
4343

44-
endpoint string
45-
awsBatch, awsDefault int = 0, 50
46-
gcpBatch, gcpDefault int = 0, 25
47-
eager bool
44+
endpoint string
45+
awsBatch, awsDefault int = 0, 50
46+
gcpBatch, gcpDefault int = 0, 25
47+
awsProfile, awsProfileDefault string = "", "default"
48+
gcpProfile, gcpProfileDefault string = "", "gcp"
49+
eager bool
4850
)
4951

5052
func init() {
@@ -78,11 +80,21 @@ func init() {
7880
panic("INTERNAL ERROR: could not bind aws-batch flag to aws-batch environment variable")
7981
}
8082

83+
mountCmd.Flags().StringVarP(&awsProfile, "aws-profile", "", awsProfileDefault, flags.AwsProfileMsg)
84+
if err := viper.BindPFlag("aws-profile", mountCmd.Flags().Lookup("aws-profile")); err != nil {
85+
panic("INTERNAL ERROR: could not bind aws-profile flag to aws-profile environment variable")
86+
}
87+
8188
mountCmd.Flags().IntVarP(&gcpBatch, "gcp-batch", "", gcpDefault, flags.GcpBatchMsg)
8289
if err := viper.BindPFlag("gcp-batch", mountCmd.Flags().Lookup("gcp-batch")); err != nil {
8390
panic("INTERNAL ERROR: could not bind gcp-batch flag to gcp-batch environment variable")
8491
}
8592

93+
mountCmd.Flags().StringVarP(&gcpProfile, "gcp-profile", "", gcpProfileDefault, flags.GcpProfileMsg)
94+
if err := viper.BindPFlag("gcp-profile", mountCmd.Flags().Lookup("gcp-profile")); err != nil {
95+
panic("INTERNAL ERROR: could not bind gcp-profile flag to gcp-profile environment variable")
96+
}
97+
8698
mountCmd.Flags().BoolVarP(&eager, "eager", "", false, "ADVANCED: Have fusera request that urls be signed by the API on start up.\nEnvironment Variable: [$DBGAP_EAGER]")
8799
if err := viper.BindPFlag("eager", mountCmd.Flags().Lookup("eager")); err != nil {
88100
panic("INTERNAL ERROR: could not bind gcp-batch flag to gcp-batch environment variable")
@@ -204,11 +216,22 @@ func mount(cmd *cobra.Command, args []string) (err error) {
204216
fmt.Println("It seems like none of the accessions were successful, fusera is shutting down.")
205217
os.Exit(1)
206218
}
219+
credProfile := ""
220+
cloud := location[:2]
221+
twig.Debug(cloud)
222+
if cloud == "s3" {
223+
credProfile = awsProfile
224+
}
225+
if cloud == "gs" {
226+
credProfile = gcpProfile
227+
}
228+
twig.Debug(credProfile)
207229
//
208230
opt := &fuseralib.Options{
209-
Signer: client,
210-
Acc: accessions,
211-
Region: location,
231+
Signer: client,
232+
Acc: accessions,
233+
Region: location[3:],
234+
Profile: credProfile,
212235

213236
UID: uint32(uid),
214237
GID: uint32(gid),
@@ -237,6 +260,8 @@ func foldEnvVarsIntoFlagValues() {
237260
flags.ResolveString("endpoint", &endpoint)
238261
flags.ResolveInt("aws-batch", &awsBatch)
239262
flags.ResolveInt("gcp-batch", &gcpBatch)
263+
flags.ResolveString("aws-profile", &awsProfile)
264+
flags.ResolveString("gcp-profile", &gcpProfile)
240265
flags.ResolveBool("eager", &eager)
241266
flags.ResolveString("location", &location)
242267
flags.ResolveString("accession", &accession)

flags/flags.go

+9-7
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@ var (
2323
AwsBatchName = "aws-batch"
2424
GcpBatchName = "gcp-batch"
2525

26-
LocationMsg = "Cloud provider and region where files should be located.\nFORMAT: [cloud.region]\nEXAMPLES: [s3.us-east-1 | gs.US]\nNOTE: This can be auto-resolved if running on AWS or GCP.\nEnvironment Variable: [$DBGAP_LOCATION]"
27-
AccessionMsg = "A list of accessions to mount or path to accession file.\nEXAMPLES: [\"SRR123,SRR456\" | local/accession/file | https://<bucket>.<region>.s3.amazonaws.com/<accession/file>]\nNOTE: If using an s3 url, the proper aws credentials need to be in place on the machine.\nEnvironment Variable: [$DBGAP_ACCESSION]"
28-
NgcMsg = "A path to an ngc file used to authorize access to accessions in dbGaP.\nEXAMPLES: [local/ngc/file | https://<bucket>.<region>.s3.amazonaws.com/<ngc/file>]\nNOTE: If using an s3 url, the proper aws credentials need to be in place on the machine.\nEnvironment Variable: [$DBGAP_NGC]"
29-
FiletypeMsg = "A list of the only file types to copy.\nEXAMPLES: \"cram,crai,bam,bai\"\nEnvironment Variable: [$DBGAP_FILETYPE]"
30-
EndpointMsg = "ADVANCED: Change the endpoint used to communicate with SDL API.\nEnvironment Variable: [$DBGAP_ENDPOINT]"
31-
AwsBatchMsg = "ADVANCED: Adjust the amount of accessions put in one request to the SDL API when using an AWS location.\nEnvironment Variable: [$DBGAP_AWS-BATCH]"
32-
GcpBatchMsg = "ADVANCED: Adjust the amount of accessions put in one request to the SDL API when using a GCP location.\nEnvironment Variable: [$DBGAP_GCP-BATCH]"
26+
LocationMsg = "Cloud provider and region where files should be located.\nFORMAT: [cloud.region]\nEXAMPLES: [s3.us-east-1 | gs.US]\nNOTE: This can be auto-resolved if running on AWS or GCP.\nEnvironment Variable: [$DBGAP_LOCATION]"
27+
AccessionMsg = "A list of accessions to mount or path to accession file.\nEXAMPLES: [\"SRR123,SRR456\" | local/accession/file | https://<bucket>.<region>.s3.amazonaws.com/<accession/file>]\nNOTE: If using an s3 url, the proper aws credentials need to be in place on the machine.\nEnvironment Variable: [$DBGAP_ACCESSION]"
28+
NgcMsg = "A path to an ngc file used to authorize access to accessions in dbGaP.\nEXAMPLES: [local/ngc/file | https://<bucket>.<region>.s3.amazonaws.com/<ngc/file>]\nNOTE: If using an s3 url, the proper aws credentials need to be in place on the machine.\nEnvironment Variable: [$DBGAP_NGC]"
29+
FiletypeMsg = "A list of the only file types to copy.\nEXAMPLES: \"cram,crai,bam,bai\"\nEnvironment Variable: [$DBGAP_FILETYPE]"
30+
EndpointMsg = "ADVANCED: Change the endpoint used to communicate with SDL API.\nEnvironment Variable: [$DBGAP_ENDPOINT]"
31+
AwsBatchMsg = "ADVANCED: Adjust the amount of accessions put in one request to the SDL API when using an AWS location.\nEnvironment Variable: [$DBGAP_AWS-BATCH]"
32+
GcpBatchMsg = "ADVANCED: Adjust the amount of accessions put in one request to the SDL API when using a GCP location.\nEnvironment Variable: [$DBGAP_GCP-BATCH]"
33+
AwsProfileMsg = "The desired AWS credentials profile in ~/.aws/credentials to use for instances when files require the requester (you) to pay for accessing the file.\nEnvironment Variable: [$DBGAP_AWS-PROFILE]\nNOTE: This account will be charged all cost accrued by accessing these certain files through fusera."
34+
GcpProfileMsg = "The desired GCP credentials profile in ~/.aws/credentials to use for instances when files require the requester (you) to pay for accessing the file.\nEnvironment Variable: [$DBGAP_GCP-PROFILE]\nNOTE: This account will be charged all cost accrued by accessing these certain files through fusera. These credentials should be in the AWS supported format that Google provides in order to work with their AWS compatible API."
3335
)
3436

3537
// ResolveLocation attempts to resolve the location on GCP and AWS.

fuseralib/file.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ func (fh *FileHandle) readFromStream(offset int64, buf []byte) (bytesRead int, e
255255
sd, _ := time.ParseDuration("30s")
256256
exp := fh.inode.Attributes.ExpirationDate
257257
if fh.inode.ReqPays {
258-
client := awsutil.NewClient(fh.inode.Bucket, fh.inode.Key, fh.inode.Region)
258+
client := awsutil.NewClient(fh.inode.Bucket, fh.inode.Key, fh.inode.Region, fh.inode.fs.opt.Profile)
259259
body, err := client.GetObjectRange(byteRange)
260260
if err != nil {
261261
return 0, syscall.EACCES

fuseralib/system.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ import (
3838
// Options is a collection of values that describe how Fusera should behave.
3939
type Options struct {
4040
// The file used to authenticate with the SRA Data Locator API
41-
Signer Signer
42-
Acc []*Accession
43-
Region string
41+
Signer Signer
42+
Acc []*Accession
43+
Region string
44+
Profile string
4445

4546
// File system
4647
MountOptions map[string]string

mock/cmd/root.go

+11-5
Original file line numberDiff line numberDiff line change
@@ -71,26 +71,32 @@ type file struct {
7171
Md5Hash string `json:"md5,omitempty"`
7272
Link string `json:"link,omitempty"`
7373
ExpirationDate time.Time `json:"expirationDate,omitempty"`
74+
Bucket string `json:"bucket,omitempty"`
75+
Key string `json:"key,omitempty"`
7476
Service string `json:"service,omitempty"`
7577
}
7678

7779
// HomeHandler returns whatever JSON I want.
7880
func HomeHandler(w http.ResponseWriter, r *http.Request) {
7981
w.WriteHeader(http.StatusOK)
80-
response := make([]payload, 5045, 5045)
82+
response := make([]payload, 1, 1)
8183
for i := range response {
8284
response[i].ID = "a" + fmt.Sprintf("%d", i)
8385
response[i].Status = 200
84-
response[i].Files = make([]file, 3, 3)
86+
response[i].Files = make([]file, 1, 1)
8587
for j := range response[i].Files {
86-
response[i].Files[j].Name = "a" + fmt.Sprintf("%d", i) + "file" + fmt.Sprintf("%d", j)
87-
response[i].Files[j].ExpirationDate = time.Now().Add(time.Hour)
88+
response[i].Files[j].Name = "test.txt"
89+
response[i].Files[j].Bucket = "matt-first-test-bucket"
90+
response[i].Files[j].Key = "test.txt"
91+
response[i].Files[j].Size = "51"
92+
//response[i].Files[j].ExpirationDate = time.Now().Add(time.Hour)
8893
}
8994
}
95+
js, _ := json.Marshal(&response)
9096
if err := json.NewEncoder(w).Encode(&response); err != nil {
9197
panic("couldn't encode json")
9298
}
93-
fmt.Println(response)
99+
fmt.Println(string(js))
94100
}
95101

96102
// Execute runs the root command of mocksdlapi.

0 commit comments

Comments
 (0)