BitcoinAnalysis - Ipynb - Colaboratory
BitcoinAnalysis - Ipynb - Colaboratory
ipynb - Colaboratory
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 1/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
import pandas as pd
file_2017
# File paths . = '/content/drive/MyDrive/BTC-
2017min.csv' file_2018 =
'/content/drive/MyDrive/BTC-2018min.csv' file_2019
= '/content/drive/MyDrive/BTC-2019min.csv'
data_2017
# Load the datasets . =
pd.read_csv(file_2017) data_2018
= pd.read_csv(file_2018)
data_2019 =
merged_data
# = datasets
Merging the pd.concat([data_2017, data_2018, data_2019])
merged_data.to_csv('/content/drive/My
# Saving the merged dataset Drive/BTC_2017-2019_merged.csv',
index=False)
unix int64
date object
symbol object
open float64
high float64
low float64
close float64
Volume BTC float64
Volume USD float64
dtype: object
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 2/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
# Convert 'unix' to datetime
merged_data['unix'] = pd.to_datetime(merged_data['unix'], # Assuming 'unix' is in seconds
unit='s')
# Convert 'date' to datetime
merged_data['date'] = pd.to_datetime(merged_data['date'])
merged_data['symbol']
# =
Convert 'symbol' to string
merged_data['symbol'].astype('string')
merged_data.dtypes
# Check the data types again
unix datetime64[ns]
date datetime64[ns]
symbol string
open float64
high float64
low float64
close float64
Volume BTC float64
Volume USD float64
dtype: object
# unique values
merged_data['is_same'].unique()
merged_data.head()
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 3/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
Volume
date symbol open high low close
BTC Volume USD
2017-12-31
0 BTC/USD 13913.28 13913.28 13867.18 13880.00 0.591748 8213.456549
23:59:00
2017-12-31
1 BTC/USD 13913.26 13953.83 13884.69 13953.77 1.398784 19518.309658
23:58:00
2017-12-31
2 BTC/USD 13908.73 13913.26 13874.99 13913.26 0.775012 10782.944294
23:57:00
BTC/USD 1576797
Name: symbol, dtype: Int64
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 4/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
Sample of Data:
merged_data.sample(5)
Volume
date symbol open high low close
BTC Volume USD
2018-08-26
183380 BTC/USD 6693.25 6693.40 6691.06 6691.06 1.736926 11621.875212
15:39:00
2019-09-27
137022 BTC/USD 8008.00 8008.00 8008.00 8008.00 0.013155 105.347082
20:17:00
2017-02-20
452575 BTC/USD 1060.95 1060.95 1059.92 1059.92 0.028804 30.529830
17:04:00
merged_data.isnull().sum()
date 0
symbol 0
open 0
high 0
low 0
close 0
Volume BTC 0
Volume USD 0
dtype: int64
merged_data['date'].head(5)
0 2017-12-31 23:59:00
1 2017-12-31 23:58:00
2 2017-12-31 23:57:00
3 2017-12-31 23:56:00
4 2017-12-31 23:55:00
Name: date, dtype: datetime64[ns]
merged_data['date'].nunique()
1576797
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 5/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
"we then transform the minutely data into a daily format. This
conversion is aimed at refining the analysis process.
Aggregating the data on a daily basis allows for a clearer, more
manageable overview of trends and patterns, which is
particularly beneficial for more effective and insightful
analysis."
import pandas as pd
merged_data_sorted['date']
# = if not already done
Convert 'date' to datetime
pd.to_datetime(merged_data_sorted['date'])
merged_data_sorted.set_index('date',
# Set the 'date' column as the index inplace=True)
daily_data
# = merged_data_sorted.resample('D').agg({
Resample to daily data and aggregate
'open':
'close':
'low': 'mean',
'mean',
'mean', #
# mean
mean of
of open
close
low prices
prices
'high': 'mean', prices # mean of
# sum
'Volume BTC': 'sum',high of BTC
prices
'Volume USD': 'sum' volumes # sum of
}) USD volumes
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 6/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
daily_data.reset_index(inplace=True)
daily_data.head(5)
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 7/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
import pandas as pd
daily_data
# Load your =dataset
pd.read_csv('/content/drive/MyDrive/daily_data.csv')
#set_index
daily_data.set_index('date', inplace=True)
window_size
# = 20 size for the moving average, 20 days
Choose a window
daily_data['moving_average_close']
# = daily_data['close'].rolling(window=window_size).mean()
Calculate the moving average for the 'close' price
daily_data.reset_index('date',
# Now your daily_data DataFrame inplace=True)
has an additional column with the 20-day moving average of the close
price
#replace first 19 rows of null because of the 20 window.
daily_data['moving_average_close'].fillna(daily_data['close'],
#used this approach , using close price as defauly price. inplace=True)
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 8/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
8 8716.182941 7.782149e+06 893.471535
9 8535.521688 7.706384e+06 902.638375
10 35893.768368 2.945219e+07 846.173313
11 17400.141555 1.363246e+07 782.961688
12 11409.520330 9.224971e+06 807.177507
13 6614.718992 5.469742e+06 827.412431
14 4231.463903 3.454909e+06 817.081007
15 6166.043977 5.107435e+06 827.977958
16 12264.169385 1.077497e+07 876.181472
17 11181.898878 9.830026e+06 885.345653
18 11094.603298 9.928565e+06 893.294389
19 6618.627764 5.915721e+06 905.154059
20 5865.632031 5.373761e+06 902.174225
21 7166.665479 6.566289e+06 897.694986
22 3514.741429 3.234650e+06 892.702387
23 9405.046565 8.497003e+06 884.234022
24 5291.554742 4.725942e+06 876.782092
daily_data['date'].head()
0 2017-01-01
1 2017-01-02
2 2017-01-03
3 2017-01-04
4 2017-01-05
Name: date, dtype: object
daily_data.head()
date open high low close Volume BTC Volume USD moving_ave
2017-
0 977.256602 977.385233 977.132620 977.276060 6850.593309 6.765936e+06
01-01
2017-
2 01-03 1020.001535 1020.226840 1019.794437 1020.040472 9089.658025 9.276735e+06
2017-
3 01-04 1076.558840 1077.271167 1075.572542 1076.553639 21562.456972 2.347651e+07
2017-
4 01-05 1043.608646 1044.905549 1042.094125 1043.547951 36018.861120 3.619081e+07
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 9/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
X Separate
# = daily_data[['open', 'high', 'low', 'Volume BTC', 'Volume USD',
features and target
'moving_average_close']]
y = daily_data['close']
X_train,
# X_test,
Split the data y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
model
# = LinearRegression()
Initialize and train the model
model.fit(X_train, y_train)
print(f'Root Absolute
print(f'Mean Mean Squared Error:
Error: {rmse}')
{mae}')
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 10/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
scaler
# = StandardScaler()
Initialize the StandardScaler
X_train_scaled
# = scaler.fit_transform(X_train)
Scale the training data and also apply the same transformation to the test
data
X_test_scaled = scaler.transform(X_test)
linear_model
# = LinearRegression()
Initialize the Linear Regression model
linear_model.fit(X_train_scaled,
# y_train)
Train the model using the scaled training data
scaled_predictions
# Predict using the =scaled
linear_model.predict(X_test_scaled)
test data
scaled_mae
# Calculate =Mean
mean_absolute_error(y_test,
Absolute Error and Root Mean Squared
scaled_predictions)
Error
scaled_rmse = np.sqrt(mean_squared_error(y_test,
scaled_predictions))
print(f'Scaled
# Mean Absolute
Print the performance Error: {scaled_mae}')
metrics
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 11/12
1/13/24, 10:19 PM BitcoinAnalysis.ipynb - Colaboratory
https://colab.research.google.com/drive/1YzxhLK6bzcIR-j5btF8wVutr8BZsToAR#scrollTo=th-qpph1JIDH&printMode=true 12/12