# Import needed packages
# Standard libraries
import os # for operating system manipulation
import pathlib # for managing files and directories
import pickle # enables serialization of objects for saving
import re # pattern matching in strings
import warnings # control how warnings are displayed

# Geospatial Packages
import cartopy.crs as ccrs # coordinate reference system for maps
import dask.dataframe as dd
import dask.array as da
import earthaccess # simplifies access to nasa earthdata
import earthpy as et # functions that work with raster and vector data
import geopandas as gpd # read and manipulate geo dataframes
import geoviews as gv # geospatial visualization
import hvplot.pandas # plots pandas dataframes
import hvplot.xarray # plots data arrays
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np # numerican computation
import pandas as pd # tabular dataframes
import rioxarray as rxr # combine xarray with geospatial raster data
import rioxarray.merge as rxrmerge # merging multiple raster datasets
import seaborn as sns
from tqdm.notebook import tqdm # tracking processes with progress bar
import xarray as xr # gridded datasets
from shapely.geometry import Polygon # analyze geometric objects
from sklearn.cluster import KMeans # machine learning algorithm to group data
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_samples, silhouette_score

# Environmental Variables
os.environ["GDAL_HTTP_MAX_RETRY"] = "5" # geospatial data abstraction library for website query
os.environ["GDAL_HTTP_RETRY_DELAY"] = "1" # combined lines try website 5 times with 1 second delay between

warnings.simplefilter('ignore') # suppress warnings

c:\Users\stem2\miniconda3\envs\earth-analytics-python\Lib\site-packages\dask\dataframe\__init__.py:42: FutureWarning: 
Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.

  warnings.warn(msg, FutureWarning)

# Setup data directories
data_dir = os.path.join(
    pathlib.Path.home(),
    'Documents',
    'eaclassprojects',
    'clusters',
    'nc'
)
os.makedirs(data_dir, exist_ok=True)

data_dir

'C:\\Users\\stem2\\Documents\\eaclassprojects\\clusters\\nc'

# Define decorator
def cached(func_key, override=False):

    # save function results for retrieval and allow overwrite
    """
    A decorator to cache function results 
    
    Parameters
    ==========
    key: str
      File basename used to save pickled results
    override: bool
      When True, re-compute even if the results are already stored
    """
    # Wrap caching function
    def compute_and_cache_decorator(compute_function):
        """
        Wrap the caching function
        
        Parameters
        ==========
        compute_function: function
          The function to run and cache results
        """
        # detail usage of func_key
        def compute_and_cache(*args, **kwargs):
            """
            Perform a computation and cache, or load cached result.
            
            Parameters
            ==========
            args
              Positional arguments for the compute function
            kwargs
              Keyword arguments for the compute function
            """
            # Add an identifier from the particular function call
            if 'cache_key' in kwargs:
                key = '_'.join((func_key, kwargs['cache_key']))
            else:
                key = func_key

            path = os.path.join(
                et.io.HOME, et.io.DATA_NAME, 'tuck', f'{key}.pickle')
            
            # Check if the cache exists already or override caching
            if not os.path.exists(path) or override:
                # Make tuck directory if needed
                os.makedirs(os.path.dirname(path), exist_ok=True)
                
                # Run the compute function as the user did
                result = compute_function(*args, **kwargs)
                
                # Pickle the object
                with open(path, 'wb') as file:
                    pickle.dump(result, file)
            else:
                # Unpickle the object
                with open(path, 'rb') as file:
                    result = pickle.load(file)
                    
            return result

        return compute_and_cache
    
    return compute_and_cache_decorator

# Read watershed boundary dataset shapefile into a GeoDataFrame
@cached('wbd_06')
def read_wbd_file(wbd_filename, huc_level, cache_key):
    # Download and unzip
    wbd_url = (
        "https://prd-tnm.s3.amazonaws.com"
        "/StagedProducts/Hydrography/WBD/HU2/Shape/"
        f"{wbd_filename}.zip")
    wbd_dir = et.data.get_data(url=wbd_url)
                  
    # Read desired data
    wbd_path = os.path.join(wbd_dir, 'Shape', f'WBDHU{huc_level}.shp')
    wbd_gdf = gpd.read_file(wbd_path, engine='pyogrio')
    return wbd_gdf

huc_level = 12 # identify HUC level
wbd_gdf = read_wbd_file(
    "WBD_06_HU2_Shape", huc_level, cache_key=f'hu{huc_level}') # Call the function using cache

huc_column = f'huc{huc_level}'

if huc_column in wbd_gdf.columns:
    tuck_gdf = (
        wbd_gdf[wbd_gdf[huc_column].isin(['060102030303'])] # filter for specific river basin
        .dissolve() # create a single polygon
)

    if not tuck_gdf.empty:
        (
            tuck_gdf.to_crs(ccrs.Mercator()) # Reproject to Mercator for web mapping
            .hvplot(
                alpha=.2, fill_color='white', # set styling
                tiles='EsriImagery', crs=ccrs.Mercator()) # add background map
            .opts(title='Tuckasegee River Watershed, NC', width=600, height=300) # set plot dimensions
        )

    else:
        print(f"Warning: no data found")
else:
    print(f"Error: Column in gdf not found")

tuck_gdf.head()

tuck_gdf.to_crs(ccrs.Mercator()).hvplot(
    alpha=.2,
    fill_color='red',
    tiles='EsriImagery',
    crs=ccrs.Mercator()
).opts(
    title='Tuckasegee River Watershed, NC',
    width=600,
    height=300
)

wbd_gdf.head()

# Log in to earthaccess
earthaccess.login(persist=True)
# Search for HLS tiles
results = earthaccess.search_data(
    short_name="HLSL30",
    cloud_hosted=True,
    bounding_box=tuple(tuck_gdf.total_bounds),
    temporal=("2023-06", "2023-09"),
)
# Confirm the contents
num_granules =len(results)
print(f"Number of granules found: {num_granules}")

print(results[0])

Number of granules found: 60
Collection: {'EntryTitle': 'HLS Landsat Operational Land Imager Surface Reflectance and TOA Brightness Daily Global 30m v2.0'}
Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Longitude': -83.19517003, 'Latitude': 35.13500195}, {'Longitude': -81.99027924, 'Latitude': 35.15084307}, {'Longitude': -82.00257131, 'Latitude': 36.140702}, {'Longitude': -83.22239647, 'Latitude': 36.12427601}, {'Longitude': -83.19517003, 'Latitude': 35.13500195}]}}]}}}
Temporal coverage: {'RangeDateTime': {'BeginningDateTime': '2023-06-01T16:05:15.499Z', 'EndingDateTime': '2023-06-01T16:05:39.386Z'}}
Size(MB): 198.7151746749878
Data: ['https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B09.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B05.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B10.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.VAA.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B07.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.SAA.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.VZA.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B01.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.SZA.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B03.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B06.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B04.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B11.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.Fmask.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T17SLV.2023152T160515.v2.0/HLS.L30.T17SLV.2023152T160515.v2.0.B02.tif']

results = earthaccess.search_data(
    short_name="HLSL30",
    cloud_hosted=True,
    bounding_box=tuple(tuck_gdf.total_bounds),
    temporal=("2023-05", "2023-09"),
)

print(type(results))  # Prints the type of the 'results' object

<class 'list'>

# Extract and organize Earthaccess data
def get_earthaccess_links(results):
    url_re = re.compile(
        r'\.(?P<tile_id>\w+)\.\d+T\d+\.v\d\.\d\.(?P<band>[A-Za-z0-9]+)\.tif')

    # Loop through each granule
    link_rows = []  # Initialize a list to hold GeoDataFrames 
 
    url_dfs = [] # Initialize a list to hold individual file data
    for granule in tqdm(results):

        # Get granule metadata information
        info_dict = granule['umm']
        granule_id = info_dict['GranuleUR']
        datetime = pd.to_datetime(
            info_dict
            ['TemporalExtent']['RangeDateTime']['BeginningDateTime'])
        points = ( # Extract polygon coordinates
            info_dict
            ['SpatialExtent']['HorizontalSpatialDomain']['Geometry']['GPolygons'][0]
            ['Boundary']['Points'])
        geometry = Polygon( # Create a Shapely Polygon object
            [(point['Longitude'], point['Latitude']) for point in points])
        
        # Get data files associated with the granule
        files = earthaccess.open([granule])

        # Loop through each file within the granule
        for file in files:
            match = url_re.search(file.full_name)
            if match is not None:
                # Create a GeoDataFrame for the current file and append to list
                link_rows.append(
                    gpd.GeoDataFrame(
                        dict(
                            datetime=[datetime],
                            tile_id=[match.group('tile_id')], # Extract tile ID
                            band=[match.group('band')], # Extract band name
                            url=[file], # Store the file object
                            geometry=[geometry] # Store the geometry
                        ), 
                        crs="EPSG:4326" # set coordinate reference system
                    )
                )

    # Concatenate GeoDataFrames into a single gdf
    file_df = pd.concat(link_rows).reset_index(drop=True) # combine all rows
    return file_df

results = earthaccess.search_data(
    short_name="HLSL30",
    cloud_hosted=True,
    bounding_box=tuple(tuck_gdf.total_bounds),
    temporal=("2023-06", "2023-09"),
)

if results:  # Check if results is not empty
    first_result = results[0]  # Get the first result
    file_df = get_earthaccess_links([first_result]) # Pass a list containing only the first result to the function
    print(file_df.head())

else:
    print("No results found.")

  0%|          | 0/1 [00:00<?, ?it/s]

QUEUEING TASKS | :   0%|          | 0/15 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/15 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/15 [00:00<?, ?it/s]

                          datetime tile_id band  \
0 2023-06-01 16:05:15.499000+00:00  T17SLV  B09   
1 2023-06-01 16:05:15.499000+00:00  T17SLV  B05   
2 2023-06-01 16:05:15.499000+00:00  T17SLV  B10   
3 2023-06-01 16:05:15.499000+00:00  T17SLV  VAA   
4 2023-06-01 16:05:15.499000+00:00  T17SLV  B07   

                                                 url  \
0  <File-like object HTTPFileSystem, https://data...   
1  <File-like object HTTPFileSystem, https://data...   
2  <File-like object HTTPFileSystem, https://data...   
3  <File-like object HTTPFileSystem, https://data...   
4  <File-like object HTTPFileSystem, https://data...   

                                            geometry  
0  POLYGON ((-83.19517 35.135, -81.99028 35.15084...  
1  POLYGON ((-83.19517 35.135, -81.99028 35.15084...  
2  POLYGON ((-83.19517 35.135, -81.99028 35.15084...  
3  POLYGON ((-83.19517 35.135, -81.99028 35.15084...  
4  POLYGON ((-83.19517 35.135, -81.99028 35.15084...

@cached('tuck_reflectance_da_df')
def compute_reflectance_da(search_results, boundary_gdf):
    """
    Connect to files over VSI, crop, cloud mask, and wrangle.

    Returns a single reflectance DataFrame with all bands as columns and
    centroid coordinates and datetime as the index.

    Parameters
    ==========
    file_df : pd.DataFrame
        File connection and metadata (datetime, tile_id, band, and url)
    boundary_gdf : gpd.GeoDataFrame
        Boundary use to crop the data
    """

    def open_dataarray(url, boundary_proj_gdf, scale=1, masked=True):
        # Open masked DataArray
        da = rxr.open_rasterio(url, masked=masked).squeeze() * scale
        
        # Reproject boundary if needed
        if boundary_proj_gdf is None:
            boundary_proj_gdf = boundary_gdf.to_crs(da.rio.crs)
            
        # Crop
        cropped = da.rio.clip_box(*boundary_proj_gdf.total_bounds)
        return cropped

    def compute_quality_mask(da, mask_bits=[1, 2, 3]):
        """Mask out low quality data by bit."""
        # Unpack bits into a new axis
        bits = (
            np.unpackbits(
                da.astype(np.uint8), bitorder='little'
            ).reshape(da.shape + (-1,))
        )

        # Select the required bits and check if any are flagged
        mask = np.prod(bits[..., mask_bits]==0, axis=-1)
        return mask

    file_df = get_earthaccess_links(search_results)
    granule_da_rows = []
    boundary_proj_gdf = None

    # Loop through each image
    group_iter = file_df.groupby(['datetime', 'tile_id'])
    for (datetime, tile_id), granule_df in tqdm(group_iter):
        print(f'Processing granule {tile_id} {datetime}')
              
        # Open granule cloud cover
        cloud_mask_url = (
            granule_df.loc[granule_df.band=='Fmask', 'url']
            .values[0])
        cloud_mask_cropped_da = open_dataarray(cloud_mask_url, boundary_proj_gdf, masked=False)

        # Compute cloud mask
        cloud_mask = compute_quality_mask(cloud_mask_cropped_da)

        # Loop through each spectral band
        da_list = []
        df_list = []
        for i, row in granule_df.iterrows():
            if row.band.startswith('B'):
                # Open, crop, and mask the band
                band_cropped = open_dataarray(
                    row.url, boundary_proj_gdf, scale=0.0001)
                band_cropped.name = row.band
                # Add the DataArray to the metadata DataFrame row
                row['da'] = band_cropped.where(cloud_mask)
                granule_da_rows.append(row.to_frame().T)
    
    # Chunking for memory efficiency
    chunk_size = 100  # Adjust as needed
    result_chunks = []
    for i in range(0, len(granule_da_rows), chunk_size):
        chunk = granule_da_rows[i:i + chunk_size]
        result_chunks.append(pd.concat(chunk))
    
    final_result = pd.concat(result_chunks)
    return final_result

tuck_reflectance_da_df = compute_reflectance_da(results, tuck_gdf)
tuck_reflectance_da_df.head()

@cached('tuck_reflectance_da') # Function output stored, same input gives cached output improving performance
def merge_and_composite_arrays(granule_da_df):
    # Merge and composite and image for each band
    df_list = [] # List to store DataFrames
    da_list = [] # List to store the composited DataArrays for each band
    for band, band_df in tqdm(granule_da_df.groupby('band')): # Iterate through each band
        merged_das = [] # List to store merged DAs for each date within the current band

        for datetime, date_df in tqdm(band_df.groupby('datetime')): # Iterate through each date within the current band
            # Merge granules for each date
            merged_da = rxrmerge.merge_arrays(list(date_df.da))
            # Mask negative values
            merged_da = merged_da.where(merged_da>0)
            # Add the merged DA for the current date to the list
            merged_das.append(merged_da)
            
        # Composite images across dates with median reflectance value for each pixel reducing the effect of cloud cover.
        composite_da = xr.concat(merged_das, dim='datetime').median('datetime')
        composite_da['band'] = int(band[1:]) # Converts band name to integer 
        composite_da.name = 'reflectance' # Sets the name of the DataArray
        da_list.append(composite_da) # Add the composite DA for the current band to the list

    # Concatenate the composite DataArrays for all bands into a single DataArray   
    return xr.concat(da_list, dim='band') 

tuck_reflectance_da = merge_and_composite_arrays(tuck_reflectance_da_df) # Call the function to process the reflectance data
tuck_reflectance_da[0] # Display the first resulting reflectance DataArray

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

### how many different types of vegetation are there?
# Convert spectral DataArray to DataFrame
model_df = tuck_reflectance_da.to_dataframe().reflectance.unstack('band')
model_df = model_df.drop(columns=[10, 11]).dropna()

model_df.head()

# Running the fit and predict functions at the same time.
# We can do this since we don't have target data.
prediction = KMeans(n_clusters=6).fit_predict(model_df.values)

# Add the predicted values back to the model DataFrame
model_df['clusters'] = prediction
model_df

# Select bands from reflectance data array
rgb = tuck_reflectance_da.sel(band=[4, 3, 2]) # band 4=red, 3=green and 2=blue
rgb_uint8 = (rgb * 255).astype(np.uint8).where(rgb!=np.nan) # convert to integers (0-255)
rgb_bright = rgb_uint8 * 10 # increase brightness 
rgb_sat = rgb_bright.where(rgb_bright < 255, 255) # no pixel exceeds 255

# Create a composite RGB image plot in square
(
    rgb_sat.hvplot.rgb( 
        x='x', y='y', bands='band',
        data_aspect=1,
        xaxis=None, yaxis=None,
        title="RGB Composite Cluster Overlay of the Tuckasegee River"  # Add title here
        )
    + # Overlay cluster data
    model_df.clusters.to_xarray().sortby(['x', 'y']).hvplot(
        cmap="colorblind", aspect='equal', xaxis=None, yaxis=None) 
)

	tnmid	metasource	sourcedata	sourceorig	sourcefeat	loaddate	referenceg	areaacres	areasqkm	states	...	name	hutype	humod	tohuc	noncontrib	noncontr_1	shape_Leng	shape_Area	ObjectID	geometry
0	{B2398188-7360-43E5-9734-34C4C2C0ED75}	{511D2AC8-11BA-45FC-AB98-F69D693D4C44}	Watershed Boundary Dataset (WBD)	Natural Resources and Conservation Service and...	None	2024-08-15	125931,151915	19065.85	77.16	AL	...	Rorex Creek-Jones Creek	S	NM	060300010408	0.0	0.0	NaN	NaN	1	POLYGON ((-85.7839 34.70814, -85.78404 34.7080...
1	{ADD4C622-05E7-4EE0-A37C-7A7561819CD1}	{511D2AC8-11BA-45FC-AB98-F69D693D4C44}	Watershed Boundary Dataset (WBD)	Natural Resources and Conservation Service and...	None	2024-08-15	1272000,1269346	18132.07	73.38	TN	...	Sycamore Creek-Big Sycamore Creek	S	KA	060102050903	NaN	NaN	NaN	NaN	2	POLYGON ((-83.28651 36.54126, -83.28664 36.541...
2	{7FDF1CF1-714A-46C3-98F9-3CDEE8CE962E}	{511D2AC8-11BA-45FC-AB98-F69D693D4C44}	Watershed Boundary Dataset (WBD)	Natural Resources and Conservation Service and...	None	2024-08-15	123559	33164.34	134.21	AL	...	Lower Mud Creek	S	NM	060300010408	0.0	0.0	NaN	NaN	3	POLYGON ((-85.91333 34.84872, -85.91333 34.848...
3	{D7AED0DA-C3E5-4AE3-9989-EDD6A77FF1A7}	{511D2AC8-11BA-45FC-AB98-F69D693D4C44}	Watershed Boundary Dataset (WBD)	Natural Resources and Conservation Service and...	None	2024-08-15	116557	27750.14	112.30	AL	...	Coon Creek	S	NM	060300010408	0.0	0.0	NaN	NaN	4	POLYGON ((-85.73206 34.82984, -85.73252 34.829...
4	{70D79B1B-2E9E-4B38-AEA9-6CB0A10F391B}	{511D2AC8-11BA-45FC-AB98-F69D693D4C44}	Watershed Boundary Dataset (WBD)	Natural Resources and Conservation Service and...	None	2024-08-15	1485282,1487063	28196.00	114.11	VA	...	McDonald Branch-North Fork Holston River	S	KA	060101010103	0.0	0.0	NaN	NaN	5	POLYGON ((-81.37268 36.96393, -81.37284 36.963...

	band	1	2	3	4	5	6	7	9
y	x
3917025.0	293925.0	0.0173	0.0202	0.0447	0.0244	0.3751	0.1667	0.0628	0.0012
	293955.0	0.0122	0.0155	0.0351	0.0223	0.1678	0.0759	0.0313	0.0012
	293985.0	0.0138	0.0180	0.0351	0.0310	0.1667	0.0959	0.0460	0.0012
	294015.0	0.0175	0.0235	0.0501	0.0400	0.3075	0.1918	0.0945	0.0012
	294045.0	0.0239	0.0264	0.0518	0.0365	0.3165	0.1866	0.0854	0.0013

	band	1	2	3	4	5	6	7	9	clusters
y	x
3917025.0	293925.0	0.01730	0.02020	0.04470	0.02440	0.37510	0.16670	0.06280	0.0012	0
	293955.0	0.01220	0.01550	0.03510	0.02230	0.16780	0.07590	0.03130	0.0012	1
	293985.0	0.01380	0.01800	0.03510	0.03100	0.16670	0.09590	0.04600	0.0012	1
	294015.0	0.01750	0.02350	0.05010	0.04000	0.30750	0.19180	0.09450	0.0012	5
	294045.0	0.02390	0.02640	0.05180	0.03650	0.31650	0.18660	0.08540	0.0013	5
...	...	...	...	...	...	...	...	...	...	...
3911475.0	302535.0	0.01310	0.01950	0.04745	0.03040	0.33955	0.15270	0.06685	0.0012	2
	302565.0	0.01305	0.01495	0.03500	0.01605	0.34540	0.13385	0.05060	0.0010	2
	302595.0	0.01725	0.01855	0.03675	0.01890	0.33700	0.13670	0.05455	0.0010	2
	302625.0	0.01830	0.02030	0.04130	0.02235	0.35000	0.14685	0.05905	0.0011	2
	302655.0	0.02060	0.02190	0.04475	0.02390	0.38795	0.15685	0.06280	0.0010	0

Land cover classification in the Tuckasegee Watershed¶

Background Information¶

This study will use the harmonized Sentinal/Landsat multispectral dataset to look at patterns in vegetation data. The HUC region 6 watershed covers the drainage of the Tennessee River Basin from Kentucky to the Gulf of Mexico. Most of the Tuckasegee River basin within this region is forested.¶

Sources¶

Organize Information Downloads¶

Using a Decorator to Cache Results¶

Study Site¶

Access Multispectral Data¶

Compile Information about each Granule¶

Open, Crop, and Mask Data using a Cached Decorator¶

Merge and Composite Data¶

Analyze using K-MEANS¶

Plot Reflectance¶

Landcover Analysis using K-Means Clusters from Sentinel/Landsat Multispectral Data¶

Source¶

	datetime	tile_id	band	url	geometry	da
17	2023-06-01 16:05:15.499000+00:00	T17SKV	B04	<File-like object HTTPFileSystem, https://data...	POLYGON ((-83.72416443 35.12425967, -83.087891...	[[<xarray.DataArray 'B04' ()> Size: 4B\narray(...
18	2023-06-01 16:05:15.499000+00:00	T17SKV	B07	<File-like object HTTPFileSystem, https://data...	POLYGON ((-83.72416443 35.12425967, -83.087891...	[[<xarray.DataArray 'B07' ()> Size: 4B\narray(...
21	2023-06-01 16:05:15.499000+00:00	T17SKV	B03	<File-like object HTTPFileSystem, https://data...	POLYGON ((-83.72416443 35.12425967, -83.087891...	[[<xarray.DataArray 'B03' ()> Size: 4B\narray(...
22	2023-06-01 16:05:15.499000+00:00	T17SKV	B09	<File-like object HTTPFileSystem, https://data...	POLYGON ((-83.72416443 35.12425967, -83.087891...	[[<xarray.DataArray 'B09' ()> Size: 4B\narray(...
24	2023-06-01 16:05:15.499000+00:00	T17SKV	B01	<File-like object HTTPFileSystem, https://data...	POLYGON ((-83.72416443 35.12425967, -83.087891...	[[<xarray.DataArray 'B01' ()> Size: 4B\narray(...