Source code for liquepy.field.nzgd_cpt_converter

import pandas as pd
import glob
import os
import inspect


D_STR = "Depth (m)"
QC_STR = "qc (MPa)"
FS_STR = "fs (MPa)"
U2_STR = "u (MPa)"


[docs]def convert_file(ffp, out_fp, verbose=0):
    """
    Converts a CPT from the NZGD into the standard liquepy format.

    This solves the problem of dealing with multiple different file formats by converting to a single format.

    The additional metadata is mostly maintained in the file.

    Algorithm explained
    -------------------

    Algorithm works by trial and error:
     - Many different converters have been written to convert from an existing format to the liquepy format
     - This algorithm cycles through each converter and tries to recognise the existing file format
        and then apply the conversion
     - if the file is different to the expected existing format then the conversion will fail and another converter is
        attempted
     - if successful, then the converter function name that was successful is returned
     - if unsuccessful, then 'NONE' is returned

    Liquepy CPT format
    ------------------

    The liquepy format is a simple standard format.

     - File is saved as a CSV
     - all units in metres and MPa.
     - CPT measurements start at line 25
     - Comma separation using ','
     - First 23 lines contain meta data
     - Pre-drill depth is defined in meta data as 'Pre-Drill:,<pre-drill depth>,'
     - Ground water level is defined in meta data as 'Assumed GWL:,<ground water depth>,'
     - Cone area ratio is defined in meta data as 'aratio,<cone area ratio>,'
     - Second line of metadata contains name of converter function (useful for debugging)

    Parameters
    ----------
    :param ffp: str
        Full file path to original CPT file
    :param out_fp: str
        Output folder path where formatted CPT file should be saved
    :param verbose: bool or int
        if true then print to algorithm steps to console
    :return:
    """

    fns = [convert_ags, convert_raw01_xls, convert_raw01_xls_v2, convert_raw01_xls_v3, convert_raw01_xlsx,
           convert_raw02_xlsx_or_raw_03_xls, convert_raw01_xlsx_v2, convert_fugro_raw01, convert_raw01_fugro_v2,
           convert_raw01_extra_cols, convert_tabulated_kpas, convert_raw01_xlsx_from_csv, convert_raw_txt_xlsx,
           convert_raw01_w_kpa, convert_raw01_fugro_xls_v3, convert_raw01_xlsx_space_sep, convert_shortened_ags,
           convert_raw01_w_underscores, convert_raw01_w_meta_on_right]
    for i, fn in enumerate(fns):
        if verbose:
            print(fn.__name__)
        res = fn(ffp, out_fp, verbose=verbose)
        if res == 1:
            return fn.__name__
        if i == len(fns) - 1:
            if verbose:
                print("NOT CONVERTED!")
            return "NONE"


[docs]def convert_folder(in_fp, out_fp, verbose=0):
    """
    Reads through a folder of NZGD CPT files and converts them to liquepy format

    :param in_fp: str
        Folder path of existing NZGD CPT files
    :param out_fp: str
        Folder path where formatted NZGD files should be saved
    :param verbose: int or bool
        if true then print to algorithm steps to console
    :return:
    """
    results = []
    ffps = glob.glob(in_fp + "*.xls*")  # Does not handle .csv and .txt
    ffps.sort()

    if not os.path.exists(out_fp):
        os.makedirs(out_fp)
    for ffp in ffps:
        if verbose:
            print(ffp)

        converter_name = convert_file(ffp, out_fp, verbose)
        fname = ffp.split(in_fp)[-1]

        results.append("{0},{1}".format(fname, converter_name))
    if verbose:
        print('SUMMARY: ')
        for line in results:
            print(line)
    ofile = open(out_fp + 'last_processed.txt', 'w')
    ofile.write("\n".join(results))
    ofile.close()
    return results


[docs]def trim_missing_at_end_data_df(df_data, neg_lim=None):
    """
    Removes rows at end of file that have empty data

    :param df_data:
    :param neg_lim:
    :return:
    """
    df_data = df_data.reset_index(drop=True)
    nan_rows = df_data[df_data.iloc[:, :3].isnull().T.any().T]
    nan_indexes = list(nan_rows.index)
    if len(nan_indexes):
        i_start = None
        i_end = None
        if nan_indexes[0] == 0:
            i_start = nan_indexes[-1] + 1
            for i in range(1, len(nan_indexes)):
                if nan_indexes[i] - nan_indexes[i - 1] > 1:
                    i_start = nan_indexes[i - 1] + 1
                    i_end = nan_indexes[i]
                    break
        else:
            i_end = nan_indexes[0]
        df_data = df_data[i_start:i_end]

    if neg_lim is not None:
        # remove large neg values
        neg_rows = df_data[df_data.iloc[:, 2] < neg_lim]
        neg_indexes = list(neg_rows.index)
        if len(neg_indexes):
            df_data = df_data[:neg_indexes[0]]

    return df_data


[docs]def clean_top(df_top):
    """
    Removes empty data at top of file

    :param df_top:
    :return:
    """
    for i in range(len(df_top)):
        for j in range(4):
            if isinstance(df_top.iloc[i, j], str) and '\r\n' in df_top.iloc[i, j]:
                df_top.iloc[i, j] = df_top.iloc[i, j].replace('\r\n', '')


[docs]def convert_ags(ffp, out_fp, verbose=0):
    cpt_num = ffp.split("CPT_")[-1]
    if "_AGS" in ffp:
        cpt_num = int(cpt_num.split("_AGS")[0])
    elif "_Raw" in ffp:  # e.g. CPT_27240_Raw01.xls
        cpt_num = int(cpt_num.split("_Raw")[0])
    elif "McM" in ffp or "M" in ffp or "DAL" in ffp or "Fitz" in ffp or "ANZ" in ffp:
        cpt_num = cpt_num[:-4]
    else:
        try:
            if cpt_num[-5:] == '.xlsx':
                cpt_num = int(cpt_num[:-5])
            elif cpt_num[-4:] == '.xls':
                cpt_num = int(cpt_num[:-4])
            else:
                return 0
        except ValueError:
            return 0

    df = pd.read_excel(ffp, sheet_name=0)
    cols = list(df)
    if len(df) < 20 or len(cols) < 4:
        return 0
    if df.iloc[20, 0] == D_STR and df.iloc[20, 1] == QC_STR and df.iloc[20, 2] == FS_STR and df.iloc[20, 3] == U2_STR:
        pass
    else:
        return 0
    df_titles = df.iloc[20:21, 0:4]
    df_data = df.iloc[21:, 0:4]
    df_data = trim_missing_at_end_data_df(df_data, neg_lim=-100)

    df_top = df.iloc[0:19, 0:4]
    more = 3
    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_top.index = df_top.index + 1  # shifting index
    df_top = df_top.sort_index()  # sorting by index
    clean_top(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]

    df_new = pd.concat([df_top, df_z, df_titles, df_data])
    # df_new.reindex(index=np.arange(len(df_new)))
    # df_new = pd.concat([df_top, df_z, df_data])
    df_top.iloc[3, 1] = 'H'
    df_tt = pd.concat([df_top, df_z, df_titles])

    df_new.to_csv(out_fp + "CPT_{0}.csv".format(cpt_num), index=False)
    return 1


[docs]def convert_shortened_ags(ffp, out_fp, verbose=0):
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:  # e.g. CPT_27240_Raw01.xls
        cpt_num = int(cpt_num.split("_Raw")[0])
    elif "McM" in ffp or "M" in ffp or "DAL" in ffp or "Fitz" in ffp or "ANZ" in ffp:
        cpt_num = cpt_num[:-4]
    else:
        try:
            if cpt_num[-5:] == '.xlsx':
                cpt_num = int(cpt_num[:-5])
            elif cpt_num[-4:] == '.xls':
                cpt_num = int(cpt_num[:-4])
            else:
                return 0
        except ValueError:
            return 0

    df = pd.read_excel(ffp, sheet_name=0)
    cols = list(df)
    if len(df) < 20 or len(cols) < 4:
        return 0
    ln0 = 14
    ln1 = 15
    if df.iloc[ln0, 0] == 'Depth [m]' and df.iloc[ln0, 1] == 'qc [MPa]' and df.iloc[ln0, 2] == 'fs [MPa]' and df.iloc[ln0, 3] == 'u2 [MPa]':
        ln = ln0
    elif df.iloc[ln1, 0] == 'Depth [m]' and df.iloc[ln1, 1] == 'qc [MPa]' and df.iloc[ln1, 2] == 'fs [MPa]' and df.iloc[ln1, 3] == 'u2 [MPa]':
        ln = ln1
    else:
        return 0

    df_data = df.iloc[ln + 1:, 0:4]
    df_data = trim_missing_at_end_data_df(df_data, neg_lim=-100)

    df_top = df.iloc[0:ln - 1, 0:4]
    heads = [D_STR, QC_STR, FS_STR, U2_STR]
    df_headers = pd.DataFrame([heads], columns=list(df_top))
    limit = 22
    more = limit - len(df_top)
    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_top.index = df_top.index + 1  # shifting index
    df_top = df_top.sort_index()  # sorting by index
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Predrill":
            df_top.iloc[i, 0] = 'Pre-Drill:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
        if df_top.iloc[i, 0] == "Cone Area Ratio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])
    # df_new = pd.concat([df_top, df_z, df_data])

    df_new.to_csv(out_fp + "CPT_{0}.csv".format(cpt_num), index=False)
    return 1


[docs]def convert_tabulated_kpas(ffp, out_fp, verbose=0):
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:  # e.g. CPT_27240_Raw01.xls
        cpt_num = int(cpt_num.split("_Raw")[0])
    else:
        return 0

    df = pd.read_excel(ffp, sheet_name=0)
    cols = list(df)
    if len(df) < 21 or len(cols) < 4:
        return 0
    if df.iloc[21, 0] == D_STR and df.iloc[21, 1] == QC_STR and df.iloc[21, 2] == 'fs (kPa)' and df.iloc[21, 3] == 'u (kPa)':
        pass
    else:
        return 0
    # df_titles = df.iloc[20:21, 0:4]
    df_data = df.iloc[22:, 0:4]
    df_data = trim_missing_at_end_data_df(df_data, neg_lim=-100)
    df_data.iloc[:, 2] = df_data.iloc[:, 2] / 1e3  # convert to MPa
    df_data.iloc[:, 3] = df_data.iloc[:, 3] / 1e3  # convert to MPa

    df_top = df.iloc[0:20, 0:4]

    df_headers = pd.DataFrame([[D_STR, QC_STR, FS_STR, U2_STR]], columns=list(df_top))
    more = 2
    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_top.index = df_top.index + 1  # shifting index
    df_top = df_top.sort_index()  # sorting by index
    for i in range(len(df_top)):
        for j in range(3):
            if df_top.iloc[i, j] == 'Nil':
                df_top.iloc[i, j] = 0.0
            if df_top.iloc[i, j] == 'nil':
                df_top.iloc[i, j] = 0.0
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])
    # df_new = pd.concat([df_top, df_z, df_data])

    df_new.to_csv(out_fp + "CPT_{0}.csv".format(cpt_num), index=False)
    return 1


[docs]def convert_raw01_extra_cols(ffp, out_fp, verbose=0):
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:
        cpt_num = int(cpt_num.split("_Raw")[0])

    d_str = 'Penetration Depth (m)'
    # try info all on one page
    try:
        df = pd.read_excel(ffp, sheet_name=0)
        cols = list(df)
        if len(df) < 20 or len(cols) < 4:
            raise ValueError
        if df.iloc[20, 1] == d_str and df.iloc[20, 2] == QC_STR and df.iloc[20, 3] == FS_STR and df.iloc[20, 4] == U2_STR:
            df_data = df.iloc[21:, 1:5]
            df_top = df.iloc[0:19, 0:4]
        else:
            raise ValueError
    except ValueError:
        # try info split over two sheets
        try:
            df = pd.read_excel(ffp, sheet_name=1)
        except IndexError:
            return 0
        cols = list(df)
        if cols[0] == d_str and cols[1] == QC_STR and cols[2] == FS_STR and cols[3] == U2_STR:
            df_data = df.iloc[:, :4]
            df_top = pd.read_excel(ffp, sheet_name=0)
            df_top = df_top.iloc[:min(22, len(df_top)), :4]
        else:
            return 0

    heads = [D_STR, QC_STR, FS_STR, U2_STR]

    df_data = trim_missing_at_end_data_df(df_data)

    df_headers = pd.DataFrame([heads], columns=list(df_top))
    df_data.columns = list(df_top)
    more = 22 - len(df_top)
    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_top.index = df_top.index + 1  # shifting index
    df_top = df_top.sort_index()  # sorting by index
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data], sort=True)

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_fugro_raw01(ffp, out_fp, verbose=0):  # e.g. CPT_27643_Raw01.xlsx
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:
        cpt_num = int(cpt_num.split("_Raw")[0])
    else:
        return 0

    df = pd.read_excel(ffp, sheet_name=0)
    cols = list(df)
    tline = 44
    uline = 45
    if len(df) < 20 or len(cols) < 4:
        return 0
    if df.iloc[tline, 0] == 'Depth' and df.iloc[tline, 1] == 'Cone' and df.iloc[tline, 2] == 'Friction' and df.iloc[tline, 3] == 'Pore 2':
        if df.iloc[uline, 0] == 'm' and df.iloc[uline, 1] == 'MPa' and df.iloc[uline, 2] == 'MPa' and df.iloc[uline, 3] == 'MPa':
            pass
        else:
            return 0
    else:
        return 0
    df_data = df.iloc[uline + 1:, 0:4]
    df_data = trim_missing_at_end_data_df(df_data)
    df_top = df.iloc[0:19, 0:4]
    more = 3
    zeros = [["", "", "", ""]] * more
    zeros.append([D_STR, QC_STR, FS_STR, U2_STR])
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_top.index = df_top.index + 1  # shifting index
    df_top = df_top.sort_index()  # sorting by index
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_data], sort=True)

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_xls(ffp, out_fp, verbose=0):
    if "_Raw" not in ffp:
        return 0
    cpt_num = ffp.split("CPT_")[-1]
    cpt_num = int(cpt_num.split("_Raw")[0])
    xf = pd.ExcelFile(ffp)
    if 'Header' in xf.sheet_names and 'CPT1' in xf.sheet_names:
        if verbose:
            print('found header match at convert_raw01_xls')
    else:
        return 0
    df = pd.read_excel(ffp, sheet_name='CPT1')
    if df.iloc[5, 0] == D_STR and df.iloc[5, 1] == QC_STR and df.iloc[5, 2] == 'fs(kPa)' and df.iloc[5, 3] == 'u (kPa)':
        pass
    else:
        return 0
    df_data = df.iloc[6:, 0:4]
    df_data = trim_missing_at_end_data_df(df_data)
    # Convert columns from kPa to MPa
    df_data.iloc[:, 2] = df_data.iloc[:, 2] / 1e3
    df_data.iloc[:, 3] = df_data.iloc[:, 3] / 1e3

    df_top = pd.read_excel(ffp, sheet_name='Header')
    df_top = df_top.iloc[:, 3:7]

    n_cols = len(list(df_top))
    if n_cols < 4:
        for i in range(n_cols, 4):
            df_top["C%i" % i] = ""
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Water level":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "Cone Area Ratio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
    limit = 20
    more = limit - pp

    if gwl_row is not None and gwl_row > limit:
        df_top[limit - 2] = df_top[gwl_row]
    if aratio_row is not None and aratio_row > limit:
        df_top[limit - 1] = df_top[aratio_row]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_headers = pd.DataFrame([[D_STR, QC_STR, FS_STR, U2_STR]], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_xls_v2(ffp, out_fp, verbose=0):
    if "_Raw" not in ffp:
        return 0
    cpt_num = ffp.split("CPT_")[-1]
    cpt_num = int(cpt_num.split("_Raw")[0])
    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)
        d_str = 'Depth (m)'
        qc_str = 'qc Clean (MPa)'
        fs_str = 'fs Clean (MPa)'
        u2_str = 'u2 Clean (MPa)'
        cols = list(df)
        if len(cols) < 3:
            continue
        if cols[0] == d_str and cols[1] == qc_str and cols[2] == fs_str and cols[3] == u2_str:
            found = name
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    df_data = df.iloc[:, 0:4]
    # Columns already in MPa
    df_data = trim_missing_at_end_data_df(df_data)

    df_top = pd.read_excel(ffp, sheet_name='CPT_Details')

    n_cols = len(list(df_top))
    if n_cols < 4:
        for i in range(n_cols, 4):
            df_top["C%i" % i] = ""
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Water Level":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "Cone Area Ratio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
    limit = 22
    more = limit - pp

    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    # for i in range(more):
    #     df_top.loc[-1] = [0, 0, 0, 0]
    #     df_top.append([0, 0, 0, 0])
    df_headers = pd.DataFrame([[D_STR, QC_STR, FS_STR, U2_STR]], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_w_meta_on_right(ffp, out_fp, verbose=0):
    if "_Raw" not in ffp:
        return 0
    cpt_num = ffp.split("CPT_")[-1]
    cpt_num = int(cpt_num.split("_Raw")[0])
    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)
        d_str = 'Depth (m)'
        qc_str = 'qcClean (MPa)'
        fs_str = 'fsClean (MPa)'
        u2_str = 'u2Clean (MPa)'
        cols = list(df)
        if len(cols) < 3:
            continue
        if cols[0] == d_str and cols[1] == qc_str and cols[2] == fs_str and cols[3] == u2_str:
            found = name
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    df_data = df.iloc[:, 0:4]
    # Columns already in MPa
    df_data = trim_missing_at_end_data_df(df_data)

    df_top = df.iloc[:, 5:]
    r1 = []
    r2 = []
    r3 = []
    for head in df_top.columns:
        r1.append([head, df_top[head].iloc[0]])
        if not pd.isnull(df_top[head].iloc[2]):
            r2.append([df_top[head].iloc[2], df_top[head].iloc[3]])
        if not pd.isnull(df_top[head].iloc[6]):
            r3.append([df_top[head].iloc[6], df_top[head].iloc[7]])
    dd = r1 + r2 + r3
    df_top = pd.DataFrame(dd)

    n_header_cols = 2
    if n_header_cols < 4:
        for i in range(n_header_cols, 4):
            df_top["C%i" % i] = ""
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Water Level":  # not used
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "ConeAreaRatio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
    limit = 22
    more = limit - pp

    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))

    df_headers = pd.DataFrame([[D_STR, QC_STR, FS_STR, U2_STR]], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_fugro_xls_v3(ffp, out_fp, verbose=0):
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:
        cpt_num = int(cpt_num.split("_Raw")[0])
    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)
        d_str = 'Depth'
        qc_str = 'Cone'
        fs_str = 'Friction'
        u2_strs = ['Pore 2', 'Pore2']
        cols = list(df)
        if len(cols) < 3:
            continue
        if cols[0] == d_str and cols[1] == qc_str and cols[2] == fs_str and cols[3] in u2_strs:
            if df.iloc[0, 0] == 'm' and df.iloc[0, 1] == 'MPa' and df.iloc[0, 2] == 'MPa' and df.iloc[0, 3] == 'MPa':
                found = name
            else:
                return 0
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    df_data = df.iloc[1:, 0:4]
    # Columns already in MPa
    df_data = trim_missing_at_end_data_df(df_data)

    df_top = pd.read_excel(ffp, sheet_name='Description')

    n_cols = len(list(df_top))
    if n_cols < 4:
        for i in range(n_cols, 4):
            df_top["C%i" % i] = ""
    else:
        df_top = df_top.iloc[:, :4]
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    predrill_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Derived GWL (m):":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "Cone Tip Net Area Ratio:":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
        if df_top.iloc[i, 0] == "Pre-Drill (m):":
            df_top.iloc[i, 0] = 'Pre-Drill:'
            predrill_row = i
    limit = 22
    more = limit - pp

    if predrill_row is not None and predrill_row > limit:
        df_top.iloc[limit - 3, :] = df_top.iloc[predrill_row, :]
    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_headers = pd.DataFrame([[D_STR, QC_STR, FS_STR, U2_STR]], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_fugro_v2(ffp, out_fp, verbose=0):
    if "_Raw" not in ffp:
        return 0
    cpt_num = ffp.split("CPT_")[-1]
    cpt_num = int(cpt_num.split("_Raw")[0])
    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    opt0 = ['Depth [m]', 'qc [MPa]', 'fs [MPa]', 'u2 [MPa]']
    opt1 = ['Depth [m]', 'Qc [MPa]', 'Fs [MPa]', 'U2 [MPa]']
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)
        cols = list(df)
        if len(cols) < 3:
            continue
        if cols[0] == opt0[0] and cols[1] == opt0[1] and cols[2] == opt0[2] and cols[3] == opt0[3]:
            found = name
        if cols[0] == opt1[0] and cols[1] == opt1[1] and cols[2] == opt1[2] and cols[3] == opt1[3]:
            found = name
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    df_data = df.iloc[:, 0:4]
    # Convert columns from kPa to MPa
    df_data.iloc[:, 2] = df_data.iloc[:, 2]
    df_data.iloc[:, 3] = df_data.iloc[:, 3]
    df_data = trim_missing_at_end_data_df(df_data)

    if 'TESTING SUMMARY SHEET' in sheet_names:
        df_top = pd.read_excel(ffp, sheet_name='TESTING SUMMARY SHEET')
    elif 'Description' in sheet_names:
        df_top = pd.read_excel(ffp, sheet_name='Description')
    elif 'CPT SUMMARY SHEET' in sheet_names:
        df_top = pd.read_excel(ffp, sheet_name='CPT SUMMARY SHEET')
        dd = []
        for head in df_top.columns:
            dd.append([head, df_top[head].iloc[0]])
        df_top = pd.DataFrame(dd)
    else:
        raise ValueError("Sheet name not detected")

    n_cols = len(list(df_top))
    if n_cols < 4:
        for i in range(n_cols, 4):
            df_top["C%i" % i] = ""
    elif n_cols > 4:
        df_top = df_top.iloc[:, :4]
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Water Level":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "Cone Area Ratio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
    limit = 22
    more = limit - pp

    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [["", "", "", ""]] * more

    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_headers = pd.DataFrame([[D_STR, QC_STR, FS_STR, U2_STR]], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_w_kpa(ffp, out_fp, verbose=0):
    if "_Raw" not in ffp:
        return 0
    cpt_num = ffp.split("CPT_")[-1]
    cpt_num = int(cpt_num.split("_Raw")[0])
    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    opt1 = ['Depth [m]', 'Qc [MPa]', 'Fs [KPa]', 'U2 [KPa]']
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)
        if len(df) < 32:
            continue
        cols = list(df)

        if len(cols) < 3:
            continue
        if df.iloc[31, 0] == opt1[0] and df.iloc[31, 1] == opt1[1] and df.iloc[31, 2] == opt1[2] and df.iloc[31, 3] == opt1[3]:
            found = name
            break
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df_data = df.iloc[32:, 0:4]
    df_data.iloc[:, 2] = df_data.iloc[:, 2] / 1e3
    df_data.iloc[:, 3] = df_data.iloc[:, 3] / 1e3
    df_top = df.iloc[0:19, 0:4]

    heads = [D_STR, QC_STR, FS_STR, U2_STR]

    df_data = trim_missing_at_end_data_df(df_data)

    df_headers = pd.DataFrame([heads], columns=list(df_top))
    df_data.columns = list(df_top)
    more = 22 - len(df_top)
    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_top.index = df_top.index + 1  # shifting index
    df_top = df_top.sort_index()  # sorting by index
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data], sort=True)

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_xls_v3(ffp, out_fp, verbose=0):
    d_str = 'Test Length (m)'
    qc_str = 'Cone Resistance (MPa)'
    fs_str = 'Local Friction (MPa)'
    u2_str = 'Pore Pressure (MPa)'

    if "_Raw" not in ffp:
        return 0
    cpt_num = ffp.split("CPT_")[-1]
    cpt_num = int(cpt_num.split("_Raw")[0])
    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    n_data_cols = 3
    offset = None
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)

        cols = list(df)
        if len(cols) < 3:
            continue
        if cols[0] == d_str and cols[1] == qc_str and cols[2] == fs_str:
            if cols[3] == u2_str:
                n_data_cols = 4
            offset = 0
            found = name
        elif cols[1] == d_str and cols[2] == qc_str and cols[3] == fs_str:
            if cols[4] == u2_str:
                n_data_cols = 4
            offset = 1
            found = name
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    df_data = df.iloc[:, offset:n_data_cols + offset]
    df_data = trim_missing_at_end_data_df(df_data)
    df_data.fillna(0, inplace=True)
    # Convert columns from kPa to MPa
    df_data.iloc[:, 2] = df_data.iloc[:, 2]
    if n_data_cols == 4:
        df_data.iloc[:, 3] = df_data.iloc[:, 3]

    df_top = pd.read_excel(ffp, sheet_name='General')
    dd = []
    for head in df_top.columns:
        dd.append([head, df_top[head].iloc[0]])
    df_top = pd.DataFrame(dd)

    n_cols = len(list(df_top))
    if n_cols < 4:
        for i in range(n_cols, n_data_cols):
            df_top["C%i" % i] = ""
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    predrill_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Water Level":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "Cone Area Ratio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
        if df_top.iloc[i, 0] == "Predrilled":
            df_top.iloc[i, 0] = 'Pre-Drill:'
            predrill_row = i
    limit = 21
    more = limit - pp

    if predrill_row is not None and predrill_row > limit:
        df_top.iloc[limit - 3, :] = df_top.iloc[predrill_row, :]
    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [[""] * n_data_cols] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    heads = [D_STR, QC_STR, FS_STR, U2_STR]
    if n_data_cols == 3:
        heads = [D_STR, QC_STR, FS_STR]
    df_headers = pd.DataFrame([heads], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_w_underscores(ffp, out_fp, verbose=0):
    d_str = 'Depth_m'
    qc_str = 'qc_mpa'
    fs_str = 'fs_mpa'
    u2_str = 'u2_mpa'

    if "_Raw" not in ffp:
        return 0
    cpt_num = ffp.split("CPT_")[-1]
    cpt_num = int(cpt_num.split("_Raw")[0])
    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    n_data_cols = 3
    offset = None
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)

        cols = list(df)
        if len(cols) < 3:
            continue
        if cols[0] == d_str and cols[1] == qc_str and cols[2] == fs_str:
            if cols[3] == u2_str:
                n_data_cols = 4
            offset = 0
            found = name
        elif cols[1] == d_str and cols[2] == qc_str and cols[3] == fs_str:
            if cols[4] == u2_str:
                n_data_cols = 4
            offset = 1
            found = name
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    df_data = df.iloc[:, offset:n_data_cols + offset]
    df_data = trim_missing_at_end_data_df(df_data)
    df_data.fillna(0, inplace=True)

    df_top = pd.read_excel(ffp, sheet_name='CPT_Setup')
    dd = []
    for head in df_top.columns:
        dd.append([head, df_top[head].iloc[0]])
    df_top = pd.DataFrame(dd)

    n_cols = len(list(df_top))
    if n_cols < 4:
        for i in range(n_cols, n_data_cols):
            df_top["C%i" % i] = ""
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    predrill_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "WaterLevel":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "ConeAreaRatio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
        if df_top.iloc[i, 0] == "Predrill":
            df_top.iloc[i, 0] = 'Pre-Drill:'
            if df_top.iloc[i, 1] == '' or pd.isnull(df_top.iloc[i, 1]):
                df_top.iloc[i, 1] = '0.0'
            predrill_row = i
    limit = 21
    more = limit - pp

    if predrill_row is not None and predrill_row > limit:
        df_top.iloc[limit - 3, :] = df_top.iloc[predrill_row, :]
    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [[""] * n_data_cols] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    heads = [D_STR, QC_STR, FS_STR, U2_STR]
    if n_data_cols == 3:
        heads = [D_STR, QC_STR, FS_STR]
    df_headers = pd.DataFrame([heads], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_xlsx_v2(ffp, out_fp, verbose=0):  # e.g. CPT_38858_Raw01.xlsx
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:
        cpt_num = int(cpt_num.split("_Raw")[0])
    else:
        return 0

    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    n_data_cols = 3
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)
        d_str = 'Depth'
        qc_str = 'qc'
        fs_str = 'fs'
        u2_str = 'u2'
        cols = list(df)
        if len(cols) < 3:
            continue
        if cols[0] == d_str and cols[1] == qc_str and cols[2] == fs_str:
            if df.iloc[0, 1] != '[MPa]' and df.iloc[0, 2] != '[MPa]':
                return 0
            if cols[3] == u2_str:
                n_data_cols = 4
                if df.iloc[0, 3] != '[MPa]':
                    return 0
            found = name
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    df_data = df.iloc[2:, 0:n_data_cols]
    df_data.fillna(0, inplace=True)
    # Convert columns from kPa to MPa
    df_data.iloc[:, 2] = df_data.iloc[:, 2]
    if n_data_cols == 4:
        df_data.iloc[:, 3] = df_data.iloc[:, 3]

    df_data = trim_missing_at_end_data_df(df_data)
    df_top = pd.read_excel(ffp, sheet_name='Header')
    dd = []
    for head in df_top.columns:
        dd.append([head, df_top[head].iloc[0]])
    df_top = pd.DataFrame(dd)

    n_cols = len(list(df_top))
    if n_cols < n_data_cols:
        for i in range(n_cols, n_data_cols):
            df_top["C%i" % i] = ""
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Waterlevel:":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "Cone Area Ratio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
    limit = 22
    more = limit - pp

    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [[""] * n_data_cols] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    if n_data_cols == 4:
        heads = [D_STR, QC_STR, FS_STR, U2_STR]
    else:
        heads = [D_STR, QC_STR, FS_STR]
    df_headers = pd.DataFrame([heads], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_xlsx_space_sep(ffp, out_fp, verbose=0):  # e.g. CPT_38858_Raw01.xlsx
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:
        cpt_num = int(cpt_num.split("_Raw")[0])
    else:
        return 0

    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    n_data_cols = 3
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)
        cols = list(df)
        if len(cols) != 1:
            continue
        titles = df.iloc[9, 0]
        units = df.iloc[10, 0]
        if 'Depth     qc' in titles:
            if '[MPa]     [MPa]' in units:
                found = name
                break

    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    data = []
    for i in range(11, len(df)):
        data.append(df.iloc[i, 0].split()[:4])

    top = []
    for i in range(9):
        top.append(df.iloc[i, 0].split(':'))

    more = 13
    zeros = [[""] * n_data_cols] * more
    heads = [D_STR, QC_STR, FS_STR, U2_STR]
    head_lines = [heads]
    dd = top + zeros + head_lines + data

    df_new = pd.DataFrame(dd)
    df_new.iloc[0, -1] = inspect.stack()[0][3]

    for i in range(len(df_new)):
        if df_new.iloc[i, 0] == "Waterlevel":
            df_new.iloc[i, 0] = 'Assumed GWL:'
        if df_new.iloc[i, 0] == "Cone Area Ratio":
            df_new.iloc[i, 0] = 'aratio'
        if df_new.iloc[i, 0] == "PreDrill":
            df_new.iloc[i, 0] = 'Pre-Drill:'

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw_txt_xlsx(ffp, out_fp, verbose=0):  # e.g. CPT_49062
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:
        cpt_num = int(cpt_num.split("_Raw")[0])
    else:
        return 0

    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    n_data_cols = 4
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)
        d_str = 'Depth'
        qc_str = 'qc'
        fs_str = 'fs'
        u2_str = 'u2'
        cols = list(df)
        if len(cols) < 3:
            continue
        if len(df) <= 12:
            continue

        if df.iloc[12, 0] == d_str and df.iloc[12, 1] == qc_str and df.iloc[12, 2] == fs_str and df.iloc[12, 3] == u2_str:
            found = name
        else:
            return 0

    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    df_data = df.iloc[15:, 0:n_data_cols]

    df_data = trim_missing_at_end_data_df(df_data)
    df_data.fillna(0, inplace=True)
    df_top = df.iloc[:12, 0:n_data_cols]

    n_cols = len(list(df_top))
    if n_cols < n_data_cols:
        for i in range(n_cols, n_data_cols):
            df_top["C%i" % i] = ""
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    predrill_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Waterlevel:":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "Cone Area Ratio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
        if df_top.iloc[i, 0] == "PreDrill":
            df_top.iloc[i, 0] = 'Pre-Drill:'
            predrill_row = i
    limit = 22
    more = limit - pp

    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if predrill_row is not None and predrill_row > limit:
        df_top.iloc[limit - 3, :] = df_top.iloc[predrill_row, :]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [[""] * n_data_cols] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    if n_data_cols == 4:
        heads = [D_STR, QC_STR, FS_STR, U2_STR]
    else:
        heads = [D_STR, QC_STR, FS_STR]
    df_headers = pd.DataFrame([heads], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_xlsx_from_csv(ffp, out_fp, verbose=0):  # e.g. CPT_29585_Raw01.xlsx
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:
        cpt_num = int(cpt_num.split("_Raw")[0])
    else:
        return 0

    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    n_data_cols = 4
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)
        d_str = 'H [m]'
        qc_str = 'qc [MPa]'
        fs_str = 'fs [MPa]'
        u2_str = 'u2 [MPa]'
        cols = list(df)
        if len(cols) < 3:
            continue
        if cols[0] == d_str and cols[1] == qc_str and cols[2] == fs_str:
            found = name
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    df_data = df.iloc[:, 0:n_data_cols]
    df_data.fillna(0, inplace=True)

    df_data = trim_missing_at_end_data_df(df_data)

    more = 22

    zeros = [[""] * n_data_cols] * more
    df_z = pd.DataFrame(zeros, columns=list([""] * n_data_cols))
    if n_data_cols == 4:
        heads = [D_STR, QC_STR, FS_STR, U2_STR]
    else:
        heads = [D_STR, QC_STR, FS_STR]
    df_headers = pd.DataFrame([heads], columns=[""] * n_data_cols)
    df_data.columns = [""] * n_data_cols
    df_z.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw02_xlsx_or_raw_03_xls(ffp, out_fp, verbose=0):
    cpt_num = ffp.split("CPT_")[-1]
    if "_Raw" in ffp:
        cpt_num = int(cpt_num.split("_Raw")[0])
    else:
        return 0

    xf = pd.ExcelFile(ffp)

    sheet_names = xf.sheet_names
    found = 0
    n_data_cols = 3
    opt1 = ['Test length[1]', 'Cone resistance[2]', 'Local friction[3]', 'Pore pressure u2[6]']
    opt2 = ['Test length[1] m', 'Cone resistance[2] MPa', 'Local friction[3] MPa', 'Pore pressure u2[6] MPa']
    units_in_title = 0
    for name in sheet_names:
        df = pd.read_excel(ffp, sheet_name=name)

        cols = list(df)
        if len(cols) < 3:
            continue
        if cols[0] == opt1[0] and cols[1] == opt1[1] and cols[2] == opt1[2]:
            if df.iloc[0, 0] == 'm' and df.iloc[0, 1] == 'MPa' and df.iloc[0, 2] == 'MPa':
                if cols[3] == opt1[3]:
                    n_data_cols = 4
                found = name
                break
        elif cols[0] == opt2[0] and cols[1] == opt2[1] and cols[2] == opt2[2] and cols[3] == opt2[3]:
            found = name
            units_in_title = 1
            break
    if not found:
        if verbose:
            print("CPT headers not found")
        return 0
    df = pd.read_excel(ffp, sheet_name=found)
    if units_in_title:
        df_data = df.iloc[:, 0:n_data_cols]
    else:
        df_data = df.iloc[1:, 0:n_data_cols]
    df_data.fillna(0, inplace=True)

    df_data = trim_missing_at_end_data_df(df_data)
    if 'General' in sheet_names:
        df_top = pd.read_excel(ffp, sheet_name='General')
        dd = []
        for head in df_top.columns:
            dd.append([head, df_top[head].iloc[0]])
        df_top = pd.DataFrame(dd)
    elif 'Header' in sheet_names:
        df_top = pd.read_excel(ffp, sheet_name='Header')
    else:
        return 0

    n_cols = len(list(df_top))
    if n_cols < n_data_cols:
        for i in range(n_cols, n_data_cols):
            df_top["C%i" % i] = ""
    if n_cols > n_data_cols:
        df_top = df_top.iloc[:, :n_data_cols]
    pp = len(df_top)

    gwl_row = None
    aratio_row = None
    predrill_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Water Level":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "Cone Area Ratio":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
        if df_top.iloc[i, 0] == "Predrilled":
            df_top.iloc[i, 0] = 'Pre-Drill:'
            predrill_row = i
    limit = 22
    more = limit - pp

    if predrill_row is not None and predrill_row > limit:
        df_top.iloc[limit - 3, :] = df_top.iloc[predrill_row, :]
    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if more < 0:
        df_top = df_top[:limit]

    zeros = [[""] * n_data_cols] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))

    if n_data_cols == 4:
        heads = [D_STR, QC_STR, FS_STR, U2_STR]
        df_data.columns = list(df_top)
    else:
        heads = [D_STR, QC_STR, FS_STR]
        df_data.columns = list(df_top)
    df_headers = pd.DataFrame([heads], columns=list(df_top))
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)
    return 1


[docs]def convert_raw01_xlsx(ffp, out_fp, verbose=0):
    if "_Raw" not in ffp:
        return 0
    cpt_num = ffp.split("CPT_")[-1]
    cpt_num = int(cpt_num.split("_Raw")[0])
    xf = pd.ExcelFile(ffp)
    if 'Header' in xf.sheet_names and 'Data' in xf.sheet_names:
        if verbose:
            print('found sheet names at convert_raw01_xlsx')
    else:
        return 0
    df = pd.read_excel(ffp, sheet_name='Data')
    d_str = 'Depth [m]'
    qc_str = 'Cone resistance (qc) in MPa'
    fs_str = 'Sleeve friction (fs) in MPa'
    u2_str = 'Dynamic pore pressure (u2) in MPa'
    cols = list(df)
    if cols[0] == d_str and cols[1] == qc_str and cols[2] == fs_str and cols[3] == u2_str:
        pass
    else:
        return 0
    df_data = df.iloc[:, 0:4]

    df_data = trim_missing_at_end_data_df(df_data)

    df_top = pd.read_excel(ffp, sheet_name='Header')
    df_top = df_top.iloc[:, 0:4]

    pp = len(df_top)
    limit = 22
    more = limit - pp

    gwl_row = None
    aratio_row = None
    predrill_row = None
    for i in range(len(df_top)):
        if df_top.iloc[i, 0] == "Predrill":
            df_top.iloc[i, 0] = 'Pre-Drill:'
            predrill_row = i
        if df_top.iloc[i, 0] == "Water level":
            df_top.iloc[i, 0] = 'Assumed GWL:'
            if df_top.iloc[i, 1] < 0:
                df_top.iloc[i, 1] *= -1
            gwl_row = i
        if df_top.iloc[i, 0] == "Alpha Factor":
            df_top.iloc[i, 0] = 'aratio'
            aratio_row = i
    if predrill_row is not None and predrill_row > limit:
        df_top.iloc[limit - 3, :] = df_top.iloc[predrill_row, :]
    if gwl_row is not None and gwl_row > limit:
        df_top.iloc[limit - 2, :] = df_top.iloc[gwl_row, :]
    if aratio_row is not None and aratio_row > limit:
        df_top.iloc[limit - 1, :] = df_top.iloc[aratio_row, :]
    if more < 0:
        df_top = df_top[:limit]
    n_cols = len(list(df_top))
    if n_cols < 4:
        for i in range(n_cols, 4):
            df_top["C%i" % i] = ""

    n_cols = len(list(df_top))
    if more < 0:
        df_top = df_top[:limit]
    zeros = [["", "", "", ""]] * more
    df_z = pd.DataFrame(zeros, columns=list(df_top))
    df_headers = pd.DataFrame([[D_STR, QC_STR, FS_STR, U2_STR]], columns=list(df_top))
    df_data.columns = list(df_top)
    df_top.iloc[0, -1] = inspect.stack()[0][3]
    df_new = pd.concat([df_top, df_z, df_headers, df_data])

    df_new.to_csv(out_fp + "CPT_%i.csv" % cpt_num, index=False)

    return 1