I'll start by saying I am NOT a Python developer. But I have a need for synthetic data and was trying to use the Synthetic Data Vault (https://github.com/sdv-dev/SDV).
I have Python 3.7 installed (on Windows, I'm doing this right on my laptop for the moment while learning how it works).
python --version
Python 3.7.6
I was able to download the sdv package with pip and I can run the first several lines of demo code to load and view the metadata and demo tables). However, when I get to these lines in the demo:
sdv = SDV()
sdv.fit(metadata, tables)
I get the following error:
TypeError: cannot astype a datetimelike from [datetime64[ns]] to [int32]
I have not modified any of the code from git at all and have not tried any of my own code. I'm literally just trying to get the demo to work as described in the readme. I just installed the package and am working through the first example. Anybody try this and have the same problem? Any ideas on what I can do to get passed this error?
The full stack trace is:
sdv.fit(metadata, tables)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\tools\Python\3.7\lib\site-packages\sdv\sdv.py", line 69, in fit
self.modeler.model_database(tables)
File "C:\tools\Python\3.7\lib\site-packages\sdv\modeler.py", line 128, in model_database
self.cpa(table_name, tables)
File "C:\tools\Python\3.7\lib\site-packages\sdv\modeler.py", line 99, in cpa
child_table = self.cpa(child_name, tables, child_key)
File "C:\tools\Python\3.7\lib\site-packages\sdv\modeler.py", line 99, in cpa
child_table = self.cpa(child_name, tables, child_key)
File "C:\tools\Python\3.7\lib\site-packages\sdv\modeler.py", line 92, in cpa
extended = self.metadata.transform(table_name, table)
File "C:\tools\Python\3.7\lib\site-packages\sdv\metadata.py", line 477, in transform
hyper_transformer.fit(data[fields])
File "C:\tools\Python\3.7\lib\site-packages\rdt\hyper_transformer.py", line 128, in fit
transformer.fit(column)
File "C:\tools\Python\3.7\lib\site-packages\rdt\transformers\datetime.py", line 55, in fit
transformed = self._transform(data)
File "C:\tools\Python\3.7\lib\site-packages\rdt\transformers\datetime.py", line 40, in _transform
integers = datetimes.astype(int).astype(float).values
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\generic.py", line 5691, in astype
**kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\managers.py", line 531, in astype
return self.apply('astype', dtype=dtype, **kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\managers.py", line 395, in apply
applied = getattr(b, f)(**kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\blocks.py", line 534, in astype
**kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\blocks.py", line 2139, in _astype
return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\blocks.py", line 633, in _astype
values = astype_nansafe(values.ravel(), dtype, copy=True)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\dtypes\cast.py", line 646, in astype_nansafe
to_dtype=dtype))
TypeError: cannot astype a datetimelike from [datetime64[ns]] to [int32]
Here is the full output of my session:
from sdv import load_demo
metadata, tables = load_demo(metadata=True)
metadata.to_dict()
{
"tables": {
"users": {
"primary_key": "user_id",
"fields": {
"user_id": {
"type": "id",
"subtype": "integer"
},
"country": {
"type": "categorical"
},
"gender": {
"type": "categorical"
},
"age": {
"type": "numerical",
"subtype": "integer"
}
}
},
"sessions": {
"primary_key": "session_id",
"fields": {
"session_id": {
"type": "id",
"subtype": "integer"
},
"user_id": {
"ref": {
"field": "user_id",
"table": "users"
},
"type": "id",
"subtype": "integer"
},
"device": {
"type": "categorical"
},
"os": {
"type": "categorical"
}
}
},
"transactions": {
"primary_key": "transaction_id",
"fields": {
"transaction_id": {
"type": "id",
"subtype": "integer"
},
"session_id": {
"ref": {
"field": "session_id",
"table": "sessions"
},
"type": "id",
"subtype": "integer"
},
"timestamp": {
"type": "datetime",
"format": "%Y-%m-%d"
},
"amount": {
"type": "numerical",
"subtype": "float"
},
"approved": {
"type": "boolean"
}
}
}
}
}
>>> tables
{'users': user_id country gender age
0 0 USA M 34
1 1 UK F 23
2 2 ES None 44
3 3 UK M 22
4 4 USA F 54
5 5 DE M 57
6 6 BG F 45
7 7 ES None 41
8 8 FR F 23
9 9 UK None 30, 'sessions': session_id user_id device os
0 0 0 mobile android
1 1 1 tablet ios
2 2 1 tablet android
3 3 2 mobile android
4 4 4 mobile ios
5 5 5 mobile android
6 6 6 mobile ios
7 7 6 tablet ios
8 8 6 mobile ios
9 9 8 tablet ios, 'transactions': transaction_id session_id timestamp amount approved
0 0 0 2019-01-01 12:34:32 100.0 True
1 1 0 2019-01-01 12:42:21 55.3 True
2 2 1 2019-01-07 17:23:11 79.5 True
3 3 3 2019-01-10 11:08:57 112.1 False
4 4 5 2019-01-10 21:54:08 110.0 False
5 5 5 2019-01-11 11:21:20 76.3 True
6 6 7 2019-01-22 14:44:10 89.5 True
7 7 8 2019-01-23 10:14:09 132.1 False
8 8 9 2019-01-27 16:09:17 68.0 True
9 9 9 2019-01-29 12:10:48 99.9 True}
metadata.visualize()
<graphviz.dot.Digraph object at 0x00000196E8755488>
from sdv import SDV
sdv = SDV()
sdv.fit(metadata, tables)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\tools\Python\3.7\lib\site-packages\sdv\sdv.py", line 69, in fit
self.modeler.model_database(tables)
File "C:\tools\Python\3.7\lib\site-packages\sdv\modeler.py", line 128, in model_database
self.cpa(table_name, tables)
File "C:\tools\Python\3.7\lib\site-packages\sdv\modeler.py", line 99, in cpa
child_table = self.cpa(child_name, tables, child_key)
File "C:\tools\Python\3.7\lib\site-packages\sdv\modeler.py", line 99, in cpa
child_table = self.cpa(child_name, tables, child_key)
File "C:\tools\Python\3.7\lib\site-packages\sdv\modeler.py", line 92, in cpa
extended = self.metadata.transform(table_name, table)
File "C:\tools\Python\3.7\lib\site-packages\sdv\metadata.py", line 477, in transform
hyper_transformer.fit(data[fields])
File "C:\tools\Python\3.7\lib\site-packages\rdt\hyper_transformer.py", line 128, in fit
transformer.fit(column)
File "C:\tools\Python\3.7\lib\site-packages\rdt\transformers\datetime.py", line 55, in fit
transformed = self._transform(data)
File "C:\tools\Python\3.7\lib\site-packages\rdt\transformers\datetime.py", line 40, in _transform
integers = datetimes.astype(int).astype(float).values
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\generic.py", line 5691, in astype
**kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\managers.py", line 531, in astype
return self.apply('astype', dtype=dtype, **kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\managers.py", line 395, in apply
applied = getattr(b, f)(**kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\blocks.py", line 534, in astype
**kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\blocks.py", line 2139, in _astype
return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\internals\blocks.py", line 633, in _astype
values = astype_nansafe(values.ravel(), dtype, copy=True)
File "C:\tools\Python\3.7\lib\site-packages\pandas\core\dtypes\cast.py", line 646, in astype_nansafe
to_dtype=dtype))
TypeError: cannot astype a datetimelike from [datetime64[ns]] to [int32]
Actually, I found a solution - not being a Python developer, not sure if it's the best solution but it cleared up the error.
In the datetime.py code on line 41, I changed:
to
I would assume though, there's a way to resolve this without changing the projects code (meaning this is not my code, this is the package I downloaded), but I'm able to continue my research now.