-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathsetup.sh
executable file
·39 lines (32 loc) · 1.1 KB
/
setup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#/bin/bash
BASEDIR=/user/cloudera
echo "Compiling MR"
cd src
javac -classpath `hadoop classpath` *.java
jar cf ../playbyplay.jar *.class
cd ..
echo "Deleting files in HDFS"
hadoop fs -rm -r $BASEDIR/input
hadoop fs -rm -r $BASEDIR/playoutput
hadoop fs -rm -r $BASEDIR/joinedoutput
hadoop fs -rm -r $BASEDIR/weather
hadoop fs -rm -r $BASEDIR/stadium
echo "Putting files in HDFS"
hadoop fs -put -f input $BASEDIR/input
hadoop fs -mkdir $BASEDIR/weather
hadoop fs -put -f 173328.csv $BASEDIR/weather/
hadoop fs -mkdir $BASEDIR/stadium
hadoop fs -put -f stadiums.csv $BASEDIR/stadium/
hadoop fs -put -f arrests.csv $BASEDIR/arrests.csv
echo "Running MR Jobs"
hadoop jar playbyplay.jar PlayByPlayDriver $BASEDIR/input $BASEDIR/playoutput
hadoop jar playbyplay.jar ArrestJoinDriver $BASEDIR/playoutput $BASEDIR/joinedoutput $BASEDIR/arrests.csv
echo "Running Hive queries"
hive -S -f playbyplay_tablecreate.hql
hive -S -f playbyplay_join.hql
hive -S -f adddrives.hql
hive -S -f adddriveresult.hql
echo "All done importing the data"
echo ""
echo ""
echo "** Check the Hive and Pig queries in queries.hql and queries.pig **"