Code: Select all
import pandas as pd
import time
t0 = time.time()
df = pd.DataFrame({
'FullName': ['C:/historical Dog analysis/Digger.doc', 'C:/historical Dog analysis/Roscoe.doc', 'C:/2024/Budgie requests/pipsqueak.csv', 'C:/text4.doc', 'C:/text5.doc'],
})
new_columns = {"_Outreach/Website design": (df['FullName'].str.contains("/historical Dog analysis/|"\
"/Budgie requests/|"\
"Dog analysis/best practices",case=False))
}
new_df = pd.DataFrame(new_columns)
df = pd.concat([df, new_df], axis=1).reindex(df.index)
t1 = time.time()
print(t1-t0)
print(df)
Code: Select all
t0 = time.time()
df = pd.DataFrame({
'FullName': ['C:/historical Dog analysis/Digger.doc', 'C:/historical Dog analysis/Roscoe.doc', 'C:/2024/Budgie requests/pipsqueak.csv', 'C:/text4.doc', 'C:/text5.doc'],
})
#works, but not useful because requires full string match
new_columns = df["FullName"].isin(["C:/historical Dog analysis/Digger.doc","C:/2024/Budgie requests/pipsqueak.csv"])
#doesn't work (Returns a list of FALSE in next column)
# new_columns = df["FullName"].isin([".*/historical Dog analysis/.*"])
new_df = pd.DataFrame(new_columns)
df = pd.concat([df, new_df], axis=1).reindex(df.index)
t1 = time.time()
print(t1-t0)
print(df)
Code: Select all
col_one_list = df['FullName'].tolist()
#doesn't work:TypeError: 'in ' requires string as left operand, not list
# b = ["/historical Dog analysis/","/Budgie requests/"]
#doesn't work: TypeError: unsupported operand type(s) for |: 'str' and 'str'
# b = ("/historical Dog analysis/"|"/Budgie requests/")
#works, but can only search one substring at a time
b = "/historical Dog analysis/"
new_columns = list(filter(lambda x: b in x, col_one_list))
print(new_columns)
new_df = pd.DataFrame(new_columns)
df = pd.concat([df, new_df], axis=1).reindex(df.index)
t1 = time.time()
print(t1-t0)
print(df)