dataframes#

# import the pandas library for working with tabular (spreadsheet) data

import pandas as pd

We will first work in our data in the form of a dataframe. A dataframe is a two-dimensional data structure that holds data in a table with rows and columns. It is the same as a dictionary or dict type of data.

# pulling up a CSV of search results from congress.gov 
# if this code doesn't work, download the csv from link 
# below, and upload it to your google colab space
# https://bit.ly/congress_csv

df = pd.read_csv('https://bit.ly/transgender_raw_data')
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
Cell In[2], line 6
      1 # pulling up a CSV of search results from congress.gov 
      2 # if this code doesn't work, download the csv from link 
      3 # below, and upload it to your google colab space
      4 # https://bit.ly/congress_csv
----> 6 df = pd.read_csv('https://bit.ly/transgender_raw_data')

File /opt/anaconda3/lib/python3.11/site-packages/pandas/io/parsers/readers.py:948, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
    935 kwds_defaults = _refine_defaults_read(
    936     dialect,
    937     delimiter,
   (...)
    944     dtype_backend=dtype_backend,
    945 )
    946 kwds.update(kwds_defaults)
--> 948 return _read(filepath_or_buffer, kwds)

File /opt/anaconda3/lib/python3.11/site-packages/pandas/io/parsers/readers.py:611, in _read(filepath_or_buffer, kwds)
    608 _validate_names(kwds.get("names", None))
    610 # Create the parser.
--> 611 parser = TextFileReader(filepath_or_buffer, **kwds)
    613 if chunksize or iterator:
    614     return parser

File /opt/anaconda3/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1448, in TextFileReader.__init__(self, f, engine, **kwds)
   1445     self.options["has_index_names"] = kwds["has_index_names"]
   1447 self.handles: IOHandles | None = None
-> 1448 self._engine = self._make_engine(f, self.engine)

File /opt/anaconda3/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1705, in TextFileReader._make_engine(self, f, engine)
   1703     if "b" not in mode:
   1704         mode += "b"
-> 1705 self.handles = get_handle(
   1706     f,
   1707     mode,
   1708     encoding=self.options.get("encoding", None),
   1709     compression=self.options.get("compression", None),
   1710     memory_map=self.options.get("memory_map", False),
   1711     is_text=is_text,
   1712     errors=self.options.get("encoding_errors", "strict"),
   1713     storage_options=self.options.get("storage_options", None),
   1714 )
   1715 assert self.handles is not None
   1716 f = self.handles.handle

File /opt/anaconda3/lib/python3.11/site-packages/pandas/io/common.py:718, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    715     codecs.lookup_error(errors)
    717 # open URLs
--> 718 ioargs = _get_filepath_or_buffer(
    719     path_or_buf,
    720     encoding=encoding,
    721     compression=compression,
    722     mode=mode,
    723     storage_options=storage_options,
    724 )
    726 handle = ioargs.filepath_or_buffer
    727 handles: list[BaseBuffer]

File /opt/anaconda3/lib/python3.11/site-packages/pandas/io/common.py:372, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
    370 # assuming storage_options is to be interpreted as headers
    371 req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options)
--> 372 with urlopen(req_info) as req:
    373     content_encoding = req.headers.get("Content-Encoding", None)
    374     if content_encoding == "gzip":
    375         # Override compression based on Content-Encoding header

File /opt/anaconda3/lib/python3.11/site-packages/pandas/io/common.py:274, in urlopen(*args, **kwargs)
    268 """
    269 Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
    270 the stdlib.
    271 """
    272 import urllib.request
--> 274 return urllib.request.urlopen(*args, **kwargs)

File /opt/anaconda3/lib/python3.11/urllib/request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    214 else:
    215     opener = _opener
--> 216 return opener.open(url, data, timeout)

File /opt/anaconda3/lib/python3.11/urllib/request.py:525, in OpenerDirector.open(self, fullurl, data, timeout)
    523 for processor in self.process_response.get(protocol, []):
    524     meth = getattr(processor, meth_name)
--> 525     response = meth(req, response)
    527 return response

File /opt/anaconda3/lib/python3.11/urllib/request.py:634, in HTTPErrorProcessor.http_response(self, request, response)
    631 # According to RFC 2616, "2xx" code indicates that the client's
    632 # request was successfully received, understood, and accepted.
    633 if not (200 <= code < 300):
--> 634     response = self.parent.error(
    635         'http', request, response, code, msg, hdrs)
    637 return response

File /opt/anaconda3/lib/python3.11/urllib/request.py:557, in OpenerDirector.error(self, proto, *args)
    555     http_err = 0
    556 args = (dict, proto, meth_name) + args
--> 557 result = self._call_chain(*args)
    558 if result:
    559     return result

File /opt/anaconda3/lib/python3.11/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
    494 for handler in handlers:
    495     func = getattr(handler, meth_name)
--> 496     result = func(*args)
    497     if result is not None:
    498         return result

File /opt/anaconda3/lib/python3.11/urllib/request.py:749, in HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
    746 fp.read()
    747 fp.close()
--> 749 return self.parent.open(new, timeout=req.timeout)

File /opt/anaconda3/lib/python3.11/urllib/request.py:525, in OpenerDirector.open(self, fullurl, data, timeout)
    523 for processor in self.process_response.get(protocol, []):
    524     meth = getattr(processor, meth_name)
--> 525     response = meth(req, response)
    527 return response

File /opt/anaconda3/lib/python3.11/urllib/request.py:634, in HTTPErrorProcessor.http_response(self, request, response)
    631 # According to RFC 2616, "2xx" code indicates that the client's
    632 # request was successfully received, understood, and accepted.
    633 if not (200 <= code < 300):
--> 634     response = self.parent.error(
    635         'http', request, response, code, msg, hdrs)
    637 return response

File /opt/anaconda3/lib/python3.11/urllib/request.py:563, in OpenerDirector.error(self, proto, *args)
    561 if http_err:
    562     args = (dict, 'default', 'http_error_default') + orig_args
--> 563     return self._call_chain(*args)

File /opt/anaconda3/lib/python3.11/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
    494 for handler in handlers:
    495     func = getattr(handler, meth_name)
--> 496     result = func(*args)
    497     if result is not None:
    498         return result

File /opt/anaconda3/lib/python3.11/urllib/request.py:643, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
    642 def http_error_default(self, req, fp, code, msg, hdrs):
--> 643     raise HTTPError(req.full_url, code, msg, hdrs, fp)

HTTPError: HTTP Error 404: Not Found
# checking out the df object

df
Legislation Number URL Congress Title Sponsor Party of Sponsor Date of Introduction Committees Latest Action Latest Action Date ... Related Bill.211 Related Bill.212 Related Bill.213 Latest Summary Amends Bill Date Offered Date Submitted Date Proposed Amendment Text (Latest) Amends Amendment
0 H.R. 1112 https://www.congress.gov/bill/118th-congress/h... 118th Congress (2023-2024) Ensuring Military Readiness Act of 2023 Banks, Jim [Rep.-R-IN-3] Republican 2/21/23 House - Armed Services Referred to the House Committee on Armed Servi... 2/21/23 ... NaN NaN NaN <p><b>Ensuring Military Readiness Act of 2023... NaN NaN NaN NaN NaN NaN
1 S. 435 https://www.congress.gov/bill/118th-congress/s... 118th Congress (2023-2024) Ensuring Military Readiness Act of 2023 Rubio, Marco [Sen.-R-FL] Republican 2/15/23 Senate - Armed Services Read twice and referred to the Committee on Ar... 2/15/23 ... NaN NaN NaN <p><b>Ensuring Military Readiness Act of 2023... NaN NaN NaN NaN NaN NaN
2 H.Res. 886 https://www.congress.gov/bill/118th-congress/h... 118th Congress (2023-2024) Supporting the goals and principles of Transge... Jayapal, Pramila [Rep.-D-WA-7] Democratic 11/21/23 House - Judiciary Referred to the House Committee on the Judiciary. 11/21/23 ... NaN NaN NaN <p>This resolution expresses support for the ... NaN NaN NaN NaN NaN NaN
3 S.Res. 464 https://www.congress.gov/bill/118th-congress/s... 118th Congress (2023-2024) A resolution supporting the goals and principl... Hirono, Mazie K. [Sen.-D-HI] Democratic 11/15/23 Senate - Judiciary Star Print ordered on resolution. 12/4/23 ... NaN NaN NaN <p>This resolution expresses support for the ... NaN NaN NaN NaN NaN NaN
4 H.Res. 269 https://www.congress.gov/bill/118th-congress/h... 118th Congress (2023-2024) Recognizing that it is the duty of the Federal... Jayapal, Pramila [Rep.-D-WA-7] Democratic 3/30/23 House - Judiciary, Education and the Workforce... Sponsor introductory remarks on measure. (CR H... 4/19/23 ... NaN NaN NaN <p>This resolution expresses support for impl... NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
281 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN I welcome that discussion.\n Mr. Chair, I yie... NaN
282 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN biotechnology equipment or service produced or... NaN
283 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN United States assistance has \n been provi... NaN
284 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN Director of National Intelligence, shall submi... NaN
285 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN demanded.\n A recorded vote was ordered.\n T... NaN

286 rows × 650 columns

What is a dataframe object?#

  • tabular format

  • dictionary structure, with a particular syntax for accessing elements

    • key-value pairs

    • df[‘column’]

filipa = {
    'name': ['filipa', 'da gama', 'calado'],
    'age': 34,
    'degree': 'literature',
    'job': 'digital scholarship specialist',
}
type(filipa)
dict
filipa['name']
['filipa', 'da gama', 'calado']
filipa['age']
34
filipa['job']
'digital scholarship specialist'
# within a DF is a Series, a list type of object. Use list indexing to pull out items

filipa['name'][0]
'filipa'
filipa['name'][2]
'calado'

viewing data: info() head() tail()#

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 286 entries, 0 to 285
Columns: 650 entries, Legislation Number to Amends Amendment
dtypes: float64(4), object(646)
memory usage: 1.4+ MB
df.head()
Legislation Number URL Congress Title Sponsor Party of Sponsor Date of Introduction Committees Latest Action Latest Action Date ... Related Bill.211 Related Bill.212 Related Bill.213 Latest Summary Amends Bill Date Offered Date Submitted Date Proposed Amendment Text (Latest) Amends Amendment
0 H.R. 1112 https://www.congress.gov/bill/118th-congress/h... 118th Congress (2023-2024) Ensuring Military Readiness Act of 2023 Banks, Jim [Rep.-R-IN-3] Republican 2/21/23 House - Armed Services Referred to the House Committee on Armed Servi... 2/21/23 ... NaN NaN NaN <p><b>Ensuring Military Readiness Act of 2023... NaN NaN NaN NaN NaN NaN
1 S. 435 https://www.congress.gov/bill/118th-congress/s... 118th Congress (2023-2024) Ensuring Military Readiness Act of 2023 Rubio, Marco [Sen.-R-FL] Republican 2/15/23 Senate - Armed Services Read twice and referred to the Committee on Ar... 2/15/23 ... NaN NaN NaN <p><b>Ensuring Military Readiness Act of 2023... NaN NaN NaN NaN NaN NaN
2 H.Res. 886 https://www.congress.gov/bill/118th-congress/h... 118th Congress (2023-2024) Supporting the goals and principles of Transge... Jayapal, Pramila [Rep.-D-WA-7] Democratic 11/21/23 House - Judiciary Referred to the House Committee on the Judiciary. 11/21/23 ... NaN NaN NaN <p>This resolution expresses support for the ... NaN NaN NaN NaN NaN NaN
3 S.Res. 464 https://www.congress.gov/bill/118th-congress/s... 118th Congress (2023-2024) A resolution supporting the goals and principl... Hirono, Mazie K. [Sen.-D-HI] Democratic 11/15/23 Senate - Judiciary Star Print ordered on resolution. 12/4/23 ... NaN NaN NaN <p>This resolution expresses support for the ... NaN NaN NaN NaN NaN NaN
4 H.Res. 269 https://www.congress.gov/bill/118th-congress/h... 118th Congress (2023-2024) Recognizing that it is the duty of the Federal... Jayapal, Pramila [Rep.-D-WA-7] Democratic 3/30/23 House - Judiciary, Education and the Workforce... Sponsor introductory remarks on measure. (CR H... 4/19/23 ... NaN NaN NaN <p>This resolution expresses support for impl... NaN NaN NaN NaN NaN NaN

5 rows × 650 columns

df.tail()
Legislation Number URL Congress Title Sponsor Party of Sponsor Date of Introduction Committees Latest Action Latest Action Date ... Related Bill.211 Related Bill.212 Related Bill.213 Latest Summary Amends Bill Date Offered Date Submitted Date Proposed Amendment Text (Latest) Amends Amendment
281 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN I welcome that discussion.\n Mr. Chair, I yie... NaN
282 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN biotechnology equipment or service produced or... NaN
283 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN United States assistance has \n been provi... NaN
284 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN Director of National Intelligence, shall submi... NaN
285 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN demanded.\n A recorded vote was ordered.\n T... NaN

5 rows × 650 columns

We can also explore columns using the dictionary syntax to access each column

df['Date of Introduction']
0       2/21/23
1       2/15/23
2      11/21/23
3      11/15/23
4       3/30/23
         ...   
281         NaN
282         NaN
283         NaN
284         NaN
285         NaN
Name: Date of Introduction, Length: 286, dtype: object

cleaning rows & columns: dropna() .notna() drop()#

# only drops rows containing all NaN: missing value
# this doesn't work for our dataset. Why?

df.dropna()
Legislation Number URL Congress Title Sponsor Party of Sponsor Date of Introduction Committees Latest Action Latest Action Date ... Related Bill.211 Related Bill.212 Related Bill.213 Latest Summary Amends Bill Date Offered Date Submitted Date Proposed Amendment Text (Latest) Amends Amendment

0 rows × 650 columns

# check the tail. There are still some values there.

df.tail()
Legislation Number URL Congress Title Sponsor Party of Sponsor Date of Introduction Committees Latest Action Latest Action Date ... Related Bill.211 Related Bill.212 Related Bill.213 Latest Summary Amends Bill Date Offered Date Submitted Date Proposed Amendment Text (Latest) Amends Amendment
281 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN I welcome that discussion.\n Mr. Chair, I yie... NaN
282 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN biotechnology equipment or service produced or... NaN
283 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN United States assistance has \n been provi... NaN
284 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN Director of National Intelligence, shall submi... NaN
285 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN demanded.\n A recorded vote was ordered.\n T... NaN

5 rows × 650 columns

# check df info. We still have the same number of entries (rows)

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 286 entries, 0 to 285
Columns: 650 entries, Legislation Number to Amends Amendment
dtypes: float64(4), object(646)
memory usage: 1.4+ MB
# try isolating a column, dropping a row if it has any NaN within
# uses syntax for accessing columns from DF

df = df[df['Legislation Number'].notna()]
# notice the line with "entries"
# we have 148 rows, but our index goes up to 280

df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 148 entries, 0 to 280
Columns: 650 entries, Legislation Number to Amends Amendment
dtypes: float64(4), object(646)
memory usage: 752.7+ KB
# what's in the columns? 
# googled how to print out all of the column names

df.columns.values
array(['Legislation Number', 'URL', 'Congress', 'Title', 'Sponsor',
       'Party of Sponsor', 'Date of Introduction', 'Committees',
       'Latest Action', 'Latest Action Date', 'Cosponsor', 'Cosponsor.1',
       'Cosponsor.2', 'Cosponsor.3', 'Cosponsor.4', 'Cosponsor.5',
       'Cosponsor.6', 'Cosponsor.7', 'Cosponsor.8', 'Cosponsor.9',
       'Cosponsor.10', 'Cosponsor.11', 'Cosponsor.12', 'Cosponsor.13',
       'Cosponsor.14', 'Cosponsor.15', 'Cosponsor.16', 'Cosponsor.17',
       'Cosponsor.18', 'Cosponsor.19', 'Cosponsor.20', 'Cosponsor.21',
       'Cosponsor.22', 'Cosponsor.23', 'Cosponsor.24', 'Cosponsor.25',
       'Cosponsor.26', 'Cosponsor.27', 'Cosponsor.28', 'Cosponsor.29',
       'Cosponsor.30', 'Cosponsor.31', 'Cosponsor.32', 'Cosponsor.33',
       'Cosponsor.34', 'Cosponsor.35', 'Cosponsor.36', 'Cosponsor.37',
       'Cosponsor.38', 'Cosponsor.39', 'Cosponsor.40', 'Cosponsor.41',
       'Cosponsor.42', 'Cosponsor.43', 'Cosponsor.44', 'Cosponsor.45',
       'Cosponsor.46', 'Cosponsor.47', 'Cosponsor.48', 'Cosponsor.49',
       'Cosponsor.50', 'Cosponsor.51', 'Cosponsor.52', 'Cosponsor.53',
       'Cosponsor.54', 'Cosponsor.55', 'Cosponsor.56', 'Cosponsor.57',
       'Cosponsor.58', 'Cosponsor.59', 'Cosponsor.60', 'Cosponsor.61',
       'Cosponsor.62', 'Cosponsor.63', 'Cosponsor.64', 'Cosponsor.65',
       'Cosponsor.66', 'Cosponsor.67', 'Cosponsor.68', 'Cosponsor.69',
       'Cosponsor.70', 'Cosponsor.71', 'Cosponsor.72', 'Cosponsor.73',
       'Cosponsor.74', 'Cosponsor.75', 'Cosponsor.76', 'Cosponsor.77',
       'Cosponsor.78', 'Cosponsor.79', 'Cosponsor.80', 'Cosponsor.81',
       'Cosponsor.82', 'Cosponsor.83', 'Cosponsor.84', 'Cosponsor.85',
       'Cosponsor.86', 'Cosponsor.87', 'Cosponsor.88', 'Cosponsor.89',
       'Cosponsor.90', 'Cosponsor.91', 'Cosponsor.92', 'Cosponsor.93',
       'Cosponsor.94', 'Cosponsor.95', 'Cosponsor.96', 'Cosponsor.97',
       'Cosponsor.98', 'Cosponsor.99', 'Cosponsor.100', 'Cosponsor.101',
       'Cosponsor.102', 'Cosponsor.103', 'Cosponsor.104', 'Cosponsor.105',
       'Cosponsor.106', 'Cosponsor.107', 'Cosponsor.108', 'Cosponsor.109',
       'Cosponsor.110', 'Cosponsor.111', 'Cosponsor.112', 'Cosponsor.113',
       'Cosponsor.114', 'Cosponsor.115', 'Cosponsor.116', 'Cosponsor.117',
       'Cosponsor.118', 'Cosponsor.119', 'Cosponsor.120', 'Cosponsor.121',
       'Cosponsor.122', 'Cosponsor.123', 'Cosponsor.124', 'Cosponsor.125',
       'Cosponsor.126', 'Cosponsor.127', 'Cosponsor.128', 'Cosponsor.129',
       'Cosponsor.130', 'Cosponsor.131', 'Cosponsor.132', 'Cosponsor.133',
       'Cosponsor.134', 'Cosponsor.135', 'Cosponsor.136', 'Cosponsor.137',
       'Cosponsor.138', 'Cosponsor.139', 'Cosponsor.140', 'Cosponsor.141',
       'Cosponsor.142', 'Cosponsor.143', 'Cosponsor.144', 'Cosponsor.145',
       'Cosponsor.146', 'Cosponsor.147', 'Cosponsor.148', 'Cosponsor.149',
       'Cosponsor.150', 'Cosponsor.151', 'Cosponsor.152', 'Cosponsor.153',
       'Cosponsor.154', 'Cosponsor.155', 'Cosponsor.156', 'Cosponsor.157',
       'Cosponsor.158', 'Cosponsor.159', 'Cosponsor.160', 'Cosponsor.161',
       'Cosponsor.162', 'Cosponsor.163', 'Cosponsor.164', 'Cosponsor.165',
       'Cosponsor.166', 'Cosponsor.167', 'Cosponsor.168', 'Cosponsor.169',
       'Cosponsor.170', 'Cosponsor.171', 'Cosponsor.172', 'Cosponsor.173',
       'Cosponsor.174', 'Cosponsor.175', 'Cosponsor.176', 'Cosponsor.177',
       'Cosponsor.178', 'Cosponsor.179', 'Cosponsor.180', 'Cosponsor.181',
       'Cosponsor.182', 'Cosponsor.183', 'Cosponsor.184', 'Cosponsor.185',
       'Cosponsor.186', 'Cosponsor.187', 'Cosponsor.188', 'Cosponsor.189',
       'Cosponsor.190', 'Cosponsor.191', 'Cosponsor.192', 'Cosponsor.193',
       'Cosponsor.194', 'Cosponsor.195', 'Cosponsor.196', 'Cosponsor.197',
       'Cosponsor.198', 'Cosponsor.199', 'Cosponsor.200', 'Cosponsor.201',
       'Cosponsor.202', 'Cosponsor.203', 'Cosponsor.204', 'Cosponsor.205',
       'Cosponsor.206', 'Cosponsor.207', 'Cosponsor.208', 'Cosponsor.209',
       'Cosponsor.210', 'Cosponsor.211', 'Cosponsor.212', 'Cosponsor.213',
       'Cosponsor.214', 'Number of Cosponsors', 'Subject', 'Subject.1',
       'Subject.2', 'Subject.3', 'Subject.4', 'Subject.5', 'Subject.6',
       'Subject.7', 'Subject.8', 'Subject.9', 'Subject.10', 'Subject.11',
       'Subject.12', 'Subject.13', 'Subject.14', 'Subject.15',
       'Subject.16', 'Subject.17', 'Subject.18', 'Subject.19',
       'Subject.20', 'Subject.21', 'Subject.22', 'Subject.23',
       'Subject.24', 'Subject.25', 'Subject.26', 'Subject.27',
       'Subject.28', 'Subject.29', 'Subject.30', 'Subject.31',
       'Subject.32', 'Subject.33', 'Subject.34', 'Subject.35',
       'Subject.36', 'Subject.37', 'Subject.38', 'Subject.39',
       'Subject.40', 'Subject.41', 'Subject.42', 'Subject.43',
       'Subject.44', 'Subject.45', 'Subject.46', 'Subject.47',
       'Subject.48', 'Subject.49', 'Subject.50', 'Subject.51',
       'Subject.52', 'Subject.53', 'Subject.54', 'Subject.55',
       'Subject.56', 'Subject.57', 'Subject.58', 'Subject.59',
       'Subject.60', 'Subject.61', 'Subject.62', 'Subject.63',
       'Subject.64', 'Subject.65', 'Subject.66', 'Subject.67',
       'Subject.68', 'Subject.69', 'Subject.70', 'Subject.71',
       'Subject.72', 'Subject.73', 'Subject.74', 'Subject.75',
       'Subject.76', 'Subject.77', 'Subject.78', 'Subject.79',
       'Subject.80', 'Subject.81', 'Subject.82', 'Subject.83',
       'Subject.84', 'Subject.85', 'Subject.86', 'Subject.87',
       'Subject.88', 'Subject.89', 'Subject.90', 'Subject.91',
       'Subject.92', 'Subject.93', 'Subject.94', 'Subject.95',
       'Subject.96', 'Subject.97', 'Subject.98', 'Subject.99',
       'Subject.100', 'Subject.101', 'Subject.102', 'Subject.103',
       'Subject.104', 'Subject.105', 'Subject.106', 'Subject.107',
       'Subject.108', 'Subject.109', 'Subject.110', 'Subject.111',
       'Subject.112', 'Subject.113', 'Subject.114', 'Subject.115',
       'Subject.116', 'Subject.117', 'Subject.118', 'Subject.119',
       'Subject.120', 'Subject.121', 'Subject.122', 'Subject.123',
       'Subject.124', 'Subject.125', 'Subject.126', 'Subject.127',
       'Subject.128', 'Subject.129', 'Subject.130', 'Subject.131',
       'Subject.132', 'Subject.133', 'Subject.134', 'Subject.135',
       'Subject.136', 'Subject.137', 'Subject.138', 'Subject.139',
       'Subject.140', 'Subject.141', 'Subject.142', 'Subject.143',
       'Subject.144', 'Subject.145', 'Subject.146', 'Subject.147',
       'Subject.148', 'Subject.149', 'Subject.150', 'Subject.151',
       'Subject.152', 'Subject.153', 'Subject.154', 'Subject.155',
       'Subject.156', 'Subject.157', 'Subject.158', 'Subject.159',
       'Subject.160', 'Subject.161', 'Subject.162', 'Subject.163',
       'Subject.164', 'Subject.165', 'Subject.166', 'Subject.167',
       'Subject.168', 'Subject.169', 'Subject.170', 'Subject.171',
       'Subject.172', 'Subject.173', 'Subject.174', 'Subject.175',
       'Subject.176', 'Subject.177', 'Subject.178', 'Subject.179',
       'Subject.180', 'Subject.181', 'Subject.182', 'Subject.183',
       'Subject.184', 'Subject.185', 'Subject.186', 'Subject.187',
       'Subject.188', 'Subject.189', 'Subject.190', 'Subject.191',
       'Subject.192', 'Subject.193', 'Subject.194', 'Subject.195',
       'Subject.196', 'Subject.197', 'Subject.198', 'Subject.199',
       'Subject.200', 'Subject.201', 'Number of Related Bills',
       'Related Bill', 'Related Bill.1', 'Related Bill.2',
       'Related Bill.3', 'Related Bill.4', 'Related Bill.5',
       'Related Bill.6', 'Related Bill.7', 'Related Bill.8',
       'Related Bill.9', 'Related Bill.10', 'Related Bill.11',
       'Related Bill.12', 'Related Bill.13', 'Related Bill.14',
       'Related Bill.15', 'Related Bill.16', 'Related Bill.17',
       'Related Bill.18', 'Related Bill.19', 'Related Bill.20',
       'Related Bill.21', 'Related Bill.22', 'Related Bill.23',
       'Related Bill.24', 'Related Bill.25', 'Related Bill.26',
       'Related Bill.27', 'Related Bill.28', 'Related Bill.29',
       'Related Bill.30', 'Related Bill.31', 'Related Bill.32',
       'Related Bill.33', 'Related Bill.34', 'Related Bill.35',
       'Related Bill.36', 'Related Bill.37', 'Related Bill.38',
       'Related Bill.39', 'Related Bill.40', 'Related Bill.41',
       'Related Bill.42', 'Related Bill.43', 'Related Bill.44',
       'Related Bill.45', 'Related Bill.46', 'Related Bill.47',
       'Related Bill.48', 'Related Bill.49', 'Related Bill.50',
       'Related Bill.51', 'Related Bill.52', 'Related Bill.53',
       'Related Bill.54', 'Related Bill.55', 'Related Bill.56',
       'Related Bill.57', 'Related Bill.58', 'Related Bill.59',
       'Related Bill.60', 'Related Bill.61', 'Related Bill.62',
       'Related Bill.63', 'Related Bill.64', 'Related Bill.65',
       'Related Bill.66', 'Related Bill.67', 'Related Bill.68',
       'Related Bill.69', 'Related Bill.70', 'Related Bill.71',
       'Related Bill.72', 'Related Bill.73', 'Related Bill.74',
       'Related Bill.75', 'Related Bill.76', 'Related Bill.77',
       'Related Bill.78', 'Related Bill.79', 'Related Bill.80',
       'Related Bill.81', 'Related Bill.82', 'Related Bill.83',
       'Related Bill.84', 'Related Bill.85', 'Related Bill.86',
       'Related Bill.87', 'Related Bill.88', 'Related Bill.89',
       'Related Bill.90', 'Related Bill.91', 'Related Bill.92',
       'Related Bill.93', 'Related Bill.94', 'Related Bill.95',
       'Related Bill.96', 'Related Bill.97', 'Related Bill.98',
       'Related Bill.99', 'Related Bill.100', 'Related Bill.101',
       'Related Bill.102', 'Related Bill.103', 'Related Bill.104',
       'Related Bill.105', 'Related Bill.106', 'Related Bill.107',
       'Related Bill.108', 'Related Bill.109', 'Related Bill.110',
       'Related Bill.111', 'Related Bill.112', 'Related Bill.113',
       'Related Bill.114', 'Related Bill.115', 'Related Bill.116',
       'Related Bill.117', 'Related Bill.118', 'Related Bill.119',
       'Related Bill.120', 'Related Bill.121', 'Related Bill.122',
       'Related Bill.123', 'Related Bill.124', 'Related Bill.125',
       'Related Bill.126', 'Related Bill.127', 'Related Bill.128',
       'Related Bill.129', 'Related Bill.130', 'Related Bill.131',
       'Related Bill.132', 'Related Bill.133', 'Related Bill.134',
       'Related Bill.135', 'Related Bill.136', 'Related Bill.137',
       'Related Bill.138', 'Related Bill.139', 'Related Bill.140',
       'Related Bill.141', 'Related Bill.142', 'Related Bill.143',
       'Related Bill.144', 'Related Bill.145', 'Related Bill.146',
       'Related Bill.147', 'Related Bill.148', 'Related Bill.149',
       'Related Bill.150', 'Related Bill.151', 'Related Bill.152',
       'Related Bill.153', 'Related Bill.154', 'Related Bill.155',
       'Related Bill.156', 'Related Bill.157', 'Related Bill.158',
       'Related Bill.159', 'Related Bill.160', 'Related Bill.161',
       'Related Bill.162', 'Related Bill.163', 'Related Bill.164',
       'Related Bill.165', 'Related Bill.166', 'Related Bill.167',
       'Related Bill.168', 'Related Bill.169', 'Related Bill.170',
       'Related Bill.171', 'Related Bill.172', 'Related Bill.173',
       'Related Bill.174', 'Related Bill.175', 'Related Bill.176',
       'Related Bill.177', 'Related Bill.178', 'Related Bill.179',
       'Related Bill.180', 'Related Bill.181', 'Related Bill.182',
       'Related Bill.183', 'Related Bill.184', 'Related Bill.185',
       'Related Bill.186', 'Related Bill.187', 'Related Bill.188',
       'Related Bill.189', 'Related Bill.190', 'Related Bill.191',
       'Related Bill.192', 'Related Bill.193', 'Related Bill.194',
       'Related Bill.195', 'Related Bill.196', 'Related Bill.197',
       'Related Bill.198', 'Related Bill.199', 'Related Bill.200',
       'Related Bill.201', 'Related Bill.202', 'Related Bill.203',
       'Related Bill.204', 'Related Bill.205', 'Related Bill.206',
       'Related Bill.207', 'Related Bill.208', 'Related Bill.209',
       'Related Bill.210', 'Related Bill.211', 'Related Bill.212',
       'Related Bill.213', 'Latest Summary', 'Amends Bill',
       'Date Offered', 'Date Submitted', 'Date Proposed',
       'Amendment Text (Latest)', 'Amends Amendment'], dtype=object)
for item in df.columns.values:
    if 'Cosponsor' in item:
        # reassigning DF in each step of the loop
        # axis = 1 means columns; axis = 0 means rows
        df = df.drop(item, axis=1)
# We are down to about 400. let's do the rest

df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 148 entries, 0 to 280
Columns: 434 entries, Legislation Number to Amends Amendment
dtypes: float64(3), object(431)
memory usage: 503.0+ KB
# see there is no ellipses anymore on the column headers

df.head()
Legislation Number URL Congress Title Sponsor Party of Sponsor Date of Introduction Committees Latest Action Latest Action Date ... Related Bill.211 Related Bill.212 Related Bill.213 Latest Summary Amends Bill Date Offered Date Submitted Date Proposed Amendment Text (Latest) Amends Amendment
0 H.R. 1112 https://www.congress.gov/bill/118th-congress/h... 118th Congress (2023-2024) Ensuring Military Readiness Act of 2023 Banks, Jim [Rep.-R-IN-3] Republican 2/21/23 House - Armed Services Referred to the House Committee on Armed Servi... 2/21/23 ... NaN NaN NaN <p><b>Ensuring Military Readiness Act of 2023... NaN NaN NaN NaN NaN NaN
1 S. 435 https://www.congress.gov/bill/118th-congress/s... 118th Congress (2023-2024) Ensuring Military Readiness Act of 2023 Rubio, Marco [Sen.-R-FL] Republican 2/15/23 Senate - Armed Services Read twice and referred to the Committee on Ar... 2/15/23 ... NaN NaN NaN <p><b>Ensuring Military Readiness Act of 2023... NaN NaN NaN NaN NaN NaN
2 H.Res. 886 https://www.congress.gov/bill/118th-congress/h... 118th Congress (2023-2024) Supporting the goals and principles of Transge... Jayapal, Pramila [Rep.-D-WA-7] Democratic 11/21/23 House - Judiciary Referred to the House Committee on the Judiciary. 11/21/23 ... NaN NaN NaN <p>This resolution expresses support for the ... NaN NaN NaN NaN NaN NaN
3 S.Res. 464 https://www.congress.gov/bill/118th-congress/s... 118th Congress (2023-2024) A resolution supporting the goals and principl... Hirono, Mazie K. [Sen.-D-HI] Democratic 11/15/23 Senate - Judiciary Star Print ordered on resolution. 12/4/23 ... NaN NaN NaN <p>This resolution expresses support for the ... NaN NaN NaN NaN NaN NaN
4 H.Res. 269 https://www.congress.gov/bill/118th-congress/h... 118th Congress (2023-2024) Recognizing that it is the duty of the Federal... Jayapal, Pramila [Rep.-D-WA-7] Democratic 3/30/23 House - Judiciary, Education and the Workforce... Sponsor introductory remarks on measure. (CR H... 4/19/23 ... NaN NaN NaN <p>This resolution expresses support for impl... NaN NaN NaN NaN NaN NaN

5 rows × 434 columns

How would we get rid of the rest of the columns that are mostly NaN?

for item in df.columns.values:
    if 'Subject' in item:
        df = df.drop(item, axis=1)
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 148 entries, 0 to 280
Columns: 232 entries, Legislation Number to Amends Amendment
dtypes: float64(3), object(229)
memory usage: 269.4+ KB
for item in df.columns.values:
    if 'Related' in item:
        df = df.drop(item, axis=1)
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 148 entries, 0 to 280
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Legislation Number       148 non-null    object 
 1   URL                      148 non-null    object 
 2   Congress                 148 non-null    object 
 3   Title                    146 non-null    object 
 4   Sponsor                  148 non-null    object 
 5   Party of Sponsor         148 non-null    object 
 6   Date of Introduction     115 non-null    object 
 7   Committees               114 non-null    object 
 8   Latest Action            146 non-null    object 
 9   Latest Action Date       146 non-null    object 
 10  Latest Summary           61 non-null     object 
 11  Amends Bill              33 non-null     object 
 12  Date Offered             31 non-null     object 
 13  Date Submitted           2 non-null      object 
 14  Date Proposed            0 non-null      float64
 15  Amendment Text (Latest)  33 non-null     object 
 16  Amends Amendment         0 non-null      float64
dtypes: float64(2), object(15)
memory usage: 20.8+ KB

And now we have a much cleaner csv file. We can save it to our workspace with the following line:

df.to_csv('congress_clean.csv')